Merge 5.6-rc3 into usb-next

We need the USB fixes in here as well.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Greg Kroah-Hartman 2020-02-24 08:48:49 +01:00
commit 24e6aea480
756 changed files with 13241 additions and 9767 deletions

View File

@ -16,3 +16,5 @@ In addition, other licenses may also apply. Please see:
Documentation/process/license-rules.rst Documentation/process/license-rules.rst
for more details. for more details.
All contributions to the Linux Kernel are subject to this COPYING file.

View File

@ -129,7 +129,7 @@ this logic.
As a single binary will need to support both 48-bit and 52-bit VA As a single binary will need to support both 48-bit and 52-bit VA
spaces, the VMEMMAP must be sized large enough for 52-bit VAs and spaces, the VMEMMAP must be sized large enough for 52-bit VAs and
also must be sized large enought to accommodate a fixed PAGE_OFFSET. also must be sized large enough to accommodate a fixed PAGE_OFFSET.
Most code in the kernel should not need to consider the VA_BITS, for Most code in the kernel should not need to consider the VA_BITS, for
code that does need to know the VA size the variables are code that does need to know the VA size the variables are

View File

@ -44,8 +44,15 @@ The AArch64 Tagged Address ABI has two stages of relaxation depending
how the user addresses are used by the kernel: how the user addresses are used by the kernel:
1. User addresses not accessed by the kernel but used for address space 1. User addresses not accessed by the kernel but used for address space
management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use management (e.g. ``mprotect()``, ``madvise()``). The use of valid
of valid tagged pointers in this context is always allowed. tagged pointers in this context is allowed with the exception of
``brk()``, ``mmap()`` and the ``new_address`` argument to
``mremap()`` as these have the potential to alias with existing
user addresses.
NOTE: This behaviour changed in v5.6 and so some earlier kernels may
incorrectly accept valid tagged pointers for the ``brk()``,
``mmap()`` and ``mremap()`` system calls.
2. User addresses accessed by the kernel (e.g. ``write()``). This ABI 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
relaxation is disabled by default and the application thread needs to relaxation is disabled by default and the application thread needs to

View File

@ -43,9 +43,13 @@ properties:
- enum: - enum:
- allwinner,sun8i-h3-tcon-tv - allwinner,sun8i-h3-tcon-tv
- allwinner,sun50i-a64-tcon-tv - allwinner,sun50i-a64-tcon-tv
- allwinner,sun50i-h6-tcon-tv
- const: allwinner,sun8i-a83t-tcon-tv - const: allwinner,sun8i-a83t-tcon-tv
- items:
- enum:
- allwinner,sun50i-h6-tcon-tv
- const: allwinner,sun8i-r40-tcon-tv
reg: reg:
maxItems: 1 maxItems: 1

View File

@ -1,9 +1,10 @@
Ilitek ILI210x/ILI2117/ILI251x touchscreen controller Ilitek ILI210x/ILI2117/ILI2120/ILI251x touchscreen controller
Required properties: Required properties:
- compatible: - compatible:
ilitek,ili210x for ILI210x ilitek,ili210x for ILI210x
ilitek,ili2117 for ILI2117 ilitek,ili2117 for ILI2117
ilitek,ili2120 for ILI2120
ilitek,ili251x for ILI251x ilitek,ili251x for ILI251x
- reg: The I2C address of the device - reg: The I2C address of the device

View File

@ -33,24 +33,40 @@ properties:
maxItems: 1 maxItems: 1
clocks: clocks:
minItems: 2 oneOf:
maxItems: 3 - items:
items: - description: The CSI interface clock
- description: The CSI interface clock - description: The CSI DRAM clock
- description: The CSI ISP clock
- description: The CSI DRAM clock - items:
- description: The CSI interface clock
- description: The CSI ISP clock
- description: The CSI DRAM clock
clock-names: clock-names:
minItems: 2 oneOf:
maxItems: 3 - items:
items: - const: bus
- const: bus - const: ram
- const: isp
- const: ram - items:
- const: bus
- const: isp
- const: ram
resets: resets:
maxItems: 1 maxItems: 1
# FIXME: This should be made required eventually once every SoC will
# have the MBUS declared.
interconnects:
maxItems: 1
# FIXME: This should be made required eventually once every SoC will
# have the MBUS declared.
interconnect-names:
const: dma-mem
# See ./video-interfaces.txt for details # See ./video-interfaces.txt for details
port: port:
type: object type: object

View File

@ -347,6 +347,7 @@ examples:
interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>; interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
#iommu-cells = <1>; #iommu-cells = <1>;
#reset-cells = <1>;
}; };
external-memory-controller@7001b000 { external-memory-controller@7001b000 {
@ -363,20 +364,23 @@ examples:
timing-0 { timing-0 {
clock-frequency = <12750000>; clock-frequency = <12750000>;
nvidia,emc-zcal-cnt-long = <0x00000042>;
nvidia,emc-auto-cal-interval = <0x001fffff>;
nvidia,emc-ctt-term-ctrl = <0x00000802>;
nvidia,emc-cfg = <0x73240000>;
nvidia,emc-cfg-2 = <0x000008c5>;
nvidia,emc-sel-dpd-ctrl = <0x00040128>;
nvidia,emc-bgbias-ctl0 = <0x00000008>;
nvidia,emc-auto-cal-config = <0xa1430000>; nvidia,emc-auto-cal-config = <0xa1430000>;
nvidia,emc-auto-cal-config2 = <0x00000000>; nvidia,emc-auto-cal-config2 = <0x00000000>;
nvidia,emc-auto-cal-config3 = <0x00000000>; nvidia,emc-auto-cal-config3 = <0x00000000>;
nvidia,emc-mode-reset = <0x80001221>; nvidia,emc-auto-cal-interval = <0x001fffff>;
nvidia,emc-bgbias-ctl0 = <0x00000008>;
nvidia,emc-cfg = <0x73240000>;
nvidia,emc-cfg-2 = <0x000008c5>;
nvidia,emc-ctt-term-ctrl = <0x00000802>;
nvidia,emc-mode-1 = <0x80100003>; nvidia,emc-mode-1 = <0x80100003>;
nvidia,emc-mode-2 = <0x80200008>; nvidia,emc-mode-2 = <0x80200008>;
nvidia,emc-mode-4 = <0x00000000>; nvidia,emc-mode-4 = <0x00000000>;
nvidia,emc-mode-reset = <0x80001221>;
nvidia,emc-mrs-wait-cnt = <0x000e000e>;
nvidia,emc-sel-dpd-ctrl = <0x00040128>;
nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
nvidia,emc-zcal-cnt-long = <0x00000042>;
nvidia,emc-zcal-interval = <0x00000000>;
nvidia,emc-configuration = < nvidia,emc-configuration = <
0x00000000 /* EMC_RC */ 0x00000000 /* EMC_RC */

View File

@ -124,7 +124,7 @@ not every application needs SDIO irq, e.g. MMC cards.
pinctrl-1 = <&mmc1_idle>; pinctrl-1 = <&mmc1_idle>;
pinctrl-2 = <&mmc1_sleep>; pinctrl-2 = <&mmc1_sleep>;
... ...
interrupts-extended = <&intc 64 &gpio2 28 GPIO_ACTIVE_LOW>; interrupts-extended = <&intc 64 &gpio2 28 IRQ_TYPE_LEVEL_LOW>;
}; };
mmc1_idle : pinmux_cirq_pin { mmc1_idle : pinmux_cirq_pin {

View File

@ -56,7 +56,6 @@ patternProperties:
examples: examples:
- | - |
davinci_mdio: mdio@5c030000 { davinci_mdio: mdio@5c030000 {
compatible = "ti,davinci_mdio";
reg = <0x5c030000 0x1000>; reg = <0x5c030000 0x1000>;
#address-cells = <1>; #address-cells = <1>;
#size-cells = <0>; #size-cells = <0>;

View File

@ -71,9 +71,13 @@ b) Example for device tree::
ipmb@10 { ipmb@10 {
compatible = "ipmb-dev"; compatible = "ipmb-dev";
reg = <0x10>; reg = <0x10>;
i2c-protocol;
}; };
}; };
If xmit of data to be done using raw i2c block vs smbus
then "i2c-protocol" needs to be defined as above.
2) Manually from Linux:: 2) Manually from Linux::
modprobe ipmb-dev-int modprobe ipmb-dev-int

View File

@ -134,7 +134,7 @@ Sequential zone files can only be written sequentially, starting from the file
end, that is, write operations can only be append writes. Zonefs makes no end, that is, write operations can only be append writes. Zonefs makes no
attempt at accepting random writes and will fail any write request that has a attempt at accepting random writes and will fail any write request that has a
start offset not corresponding to the end of the file, or to the end of the last start offset not corresponding to the end of the file, or to the end of the last
write issued and still in-flight (for asynchrnous I/O operations). write issued and still in-flight (for asynchronous I/O operations).
Since dirty page writeback by the page cache does not guarantee a sequential Since dirty page writeback by the page cache does not guarantee a sequential
write pattern, zonefs prevents buffered writes and writeable shared mappings write pattern, zonefs prevents buffered writes and writeable shared mappings
@ -142,7 +142,7 @@ on sequential files. Only direct I/O writes are accepted for these files.
zonefs relies on the sequential delivery of write I/O requests to the device zonefs relies on the sequential delivery of write I/O requests to the device
implemented by the block layer elevator. An elevator implementing the sequential implemented by the block layer elevator. An elevator implementing the sequential
write feature for zoned block device (ELEVATOR_F_ZBD_SEQ_WRITE elevator feature) write feature for zoned block device (ELEVATOR_F_ZBD_SEQ_WRITE elevator feature)
must be used. This type of elevator (e.g. mq-deadline) is the set by default must be used. This type of elevator (e.g. mq-deadline) is set by default
for zoned block devices on device initialization. for zoned block devices on device initialization.
There are no restrictions on the type of I/O used for read operations in There are no restrictions on the type of I/O used for read operations in
@ -196,7 +196,7 @@ additional conditions that result in I/O errors.
may still happen in the case of a partial failure of a very large direct I/O may still happen in the case of a partial failure of a very large direct I/O
operation split into multiple BIOs/requests or asynchronous I/O operations. operation split into multiple BIOs/requests or asynchronous I/O operations.
If one of the write request within the set of sequential write requests If one of the write request within the set of sequential write requests
issued to the device fails, all write requests after queued after it will issued to the device fails, all write requests queued after it will
become unaligned and fail. become unaligned and fail.
* Delayed write errors: similarly to regular block devices, if the device side * Delayed write errors: similarly to regular block devices, if the device side
@ -207,7 +207,7 @@ additional conditions that result in I/O errors.
causing all data to be dropped after the sector that caused the error. causing all data to be dropped after the sector that caused the error.
All I/O errors detected by zonefs are notified to the user with an error code All I/O errors detected by zonefs are notified to the user with an error code
return for the system call that trigered or detected the error. The recovery return for the system call that triggered or detected the error. The recovery
actions taken by zonefs in response to I/O errors depend on the I/O type (read actions taken by zonefs in response to I/O errors depend on the I/O type (read
vs write) and on the reason for the error (bad sector, unaligned writes or zone vs write) and on the reason for the error (bad sector, unaligned writes or zone
condition change). condition change).
@ -222,7 +222,7 @@ condition change).
* A zone condition change to read-only or offline also always triggers zonefs * A zone condition change to read-only or offline also always triggers zonefs
I/O error recovery. I/O error recovery.
Zonefs minimal I/O error recovery may change a file size and a file access Zonefs minimal I/O error recovery may change a file size and file access
permissions. permissions.
* File size changes: * File size changes:
@ -237,7 +237,7 @@ permissions.
A file size may also be reduced to reflect a delayed write error detected on A file size may also be reduced to reflect a delayed write error detected on
fsync(): in this case, the amount of data effectively written in the zone may fsync(): in this case, the amount of data effectively written in the zone may
be less than originally indicated by the file inode size. After such I/O be less than originally indicated by the file inode size. After such I/O
error, zonefs always fixes a file inode size to reflect the amount of data error, zonefs always fixes the file inode size to reflect the amount of data
persistently stored in the file zone. persistently stored in the file zone.
* Access permission changes: * Access permission changes:
@ -281,11 +281,11 @@ Further notes:
permissions to read-only applies to all files. The file system is remounted permissions to read-only applies to all files. The file system is remounted
read-only. read-only.
* Access permission and file size changes due to the device transitioning zones * Access permission and file size changes due to the device transitioning zones
to the offline condition are permanent. Remounting or reformating the device to the offline condition are permanent. Remounting or reformatting the device
with mkfs.zonefs (mkzonefs) will not change back offline zone files to a good with mkfs.zonefs (mkzonefs) will not change back offline zone files to a good
state. state.
* File access permission changes to read-only due to the device transitioning * File access permission changes to read-only due to the device transitioning
zones to the read-only condition are permanent. Remounting or reformating zones to the read-only condition are permanent. Remounting or reformatting
the device will not re-enable file write access. the device will not re-enable file write access.
* File access permission changes implied by the remount-ro, zone-ro and * File access permission changes implied by the remount-ro, zone-ro and
zone-offline mount options are temporary for zones in a good condition. zone-offline mount options are temporary for zones in a good condition.
@ -301,13 +301,13 @@ Mount options
zonefs define the "errors=<behavior>" mount option to allow the user to specify zonefs define the "errors=<behavior>" mount option to allow the user to specify
zonefs behavior in response to I/O errors, inode size inconsistencies or zone zonefs behavior in response to I/O errors, inode size inconsistencies or zone
condition chages. The defined behaviors are as follow: condition changes. The defined behaviors are as follow:
* remount-ro (default) * remount-ro (default)
* zone-ro * zone-ro
* zone-offline * zone-offline
* repair * repair
The I/O error actions defined for each behavior is detailed in the previous The I/O error actions defined for each behavior are detailed in the previous
section. section.
Zonefs User Space Tools Zonefs User Space Tools

View File

@ -24,6 +24,7 @@ This driver implements support for Infineon Multi-phase XDPE122 family
dual loop voltage regulators. dual loop voltage regulators.
The family includes XDPE12284 and XDPE12254 devices. The family includes XDPE12284 and XDPE12254 devices.
The devices from this family complaint with: The devices from this family complaint with:
- Intel VR13 and VR13HC rev 1.3, IMVP8 rev 1.2 and IMPVP9 rev 1.3 DC-DC - Intel VR13 and VR13HC rev 1.3, IMVP8 rev 1.2 and IMPVP9 rev 1.3 DC-DC
converter specification. converter specification.
- Intel SVID rev 1.9. protocol. - Intel SVID rev 1.9. protocol.

View File

@ -244,23 +244,23 @@ disclosure of a particular issue, unless requested by a response team or by
an involved disclosed party. The current ambassadors list: an involved disclosed party. The current ambassadors list:
============= ======================================================== ============= ========================================================
ARM ARM Grant Likely <grant.likely@arm.com>
AMD Tom Lendacky <tom.lendacky@amd.com> AMD Tom Lendacky <tom.lendacky@amd.com>
IBM IBM
Intel Tony Luck <tony.luck@intel.com> Intel Tony Luck <tony.luck@intel.com>
Qualcomm Trilok Soni <tsoni@codeaurora.org> Qualcomm Trilok Soni <tsoni@codeaurora.org>
Microsoft Sasha Levin <sashal@kernel.org> Microsoft James Morris <jamorris@linux.microsoft.com>
VMware VMware
Xen Andrew Cooper <andrew.cooper3@citrix.com> Xen Andrew Cooper <andrew.cooper3@citrix.com>
Canonical Tyler Hicks <tyhicks@canonical.com> Canonical John Johansen <john.johansen@canonical.com>
Debian Ben Hutchings <ben@decadent.org.uk> Debian Ben Hutchings <ben@decadent.org.uk>
Oracle Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Oracle Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Red Hat Josh Poimboeuf <jpoimboe@redhat.com> Red Hat Josh Poimboeuf <jpoimboe@redhat.com>
SUSE Jiri Kosina <jkosina@suse.cz> SUSE Jiri Kosina <jkosina@suse.cz>
Amazon Peter Bowen <pzb@amzn.com> Amazon
Google Kees Cook <keescook@chromium.org> Google Kees Cook <keescook@chromium.org>
============= ======================================================== ============= ========================================================

View File

@ -183,7 +183,7 @@ CVE分配
VMware VMware
Xen Andrew Cooper <andrew.cooper3@citrix.com> Xen Andrew Cooper <andrew.cooper3@citrix.com>
Canonical Tyler Hicks <tyhicks@canonical.com> Canonical John Johansen <john.johansen@canonical.com>
Debian Ben Hutchings <ben@decadent.org.uk> Debian Ben Hutchings <ben@decadent.org.uk>
Oracle Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Oracle Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Red Hat Josh Poimboeuf <jpoimboe@redhat.com> Red Hat Josh Poimboeuf <jpoimboe@redhat.com>

View File

@ -1,9 +1,11 @@
==================
Guest halt polling Guest halt polling
================== ==================
The cpuidle_haltpoll driver, with the haltpoll governor, allows The cpuidle_haltpoll driver, with the haltpoll governor, allows
the guest vcpus to poll for a specified amount of time before the guest vcpus to poll for a specified amount of time before
halting. halting.
This provides the following benefits to host side polling: This provides the following benefits to host side polling:
1) The POLL flag is set while polling is performed, which allows 1) The POLL flag is set while polling is performed, which allows
@ -29,18 +31,21 @@ Module Parameters
The haltpoll governor has 5 tunable module parameters: The haltpoll governor has 5 tunable module parameters:
1) guest_halt_poll_ns: 1) guest_halt_poll_ns:
Maximum amount of time, in nanoseconds, that polling is Maximum amount of time, in nanoseconds, that polling is
performed before halting. performed before halting.
Default: 200000 Default: 200000
2) guest_halt_poll_shrink: 2) guest_halt_poll_shrink:
Division factor used to shrink per-cpu guest_halt_poll_ns when Division factor used to shrink per-cpu guest_halt_poll_ns when
wakeup event occurs after the global guest_halt_poll_ns. wakeup event occurs after the global guest_halt_poll_ns.
Default: 2 Default: 2
3) guest_halt_poll_grow: 3) guest_halt_poll_grow:
Multiplication factor used to grow per-cpu guest_halt_poll_ns Multiplication factor used to grow per-cpu guest_halt_poll_ns
when event occurs after per-cpu guest_halt_poll_ns when event occurs after per-cpu guest_halt_poll_ns
but before global guest_halt_poll_ns. but before global guest_halt_poll_ns.
@ -48,6 +53,7 @@ but before global guest_halt_poll_ns.
Default: 2 Default: 2
4) guest_halt_poll_grow_start: 4) guest_halt_poll_grow_start:
The per-cpu guest_halt_poll_ns eventually reaches zero The per-cpu guest_halt_poll_ns eventually reaches zero
in case of an idle system. This value sets the initial in case of an idle system. This value sets the initial
per-cpu guest_halt_poll_ns when growing. This can per-cpu guest_halt_poll_ns when growing. This can
@ -66,7 +72,7 @@ high once achieves global guest_halt_poll_ns value).
Default: Y Default: Y
The module parameters can be set from the debugfs files in: The module parameters can be set from the debugfs files in::
/sys/module/haltpoll/parameters/ /sys/module/haltpoll/parameters/
@ -74,5 +80,5 @@ Further Notes
============= =============
- Care should be taken when setting the guest_halt_poll_ns parameter as a - Care should be taken when setting the guest_halt_poll_ns parameter as a
large value has the potential to drive the cpu usage to 100% on a machine which large value has the potential to drive the cpu usage to 100% on a machine
would be almost entirely idle otherwise. which would be almost entirely idle otherwise.

View File

@ -8,7 +8,9 @@ Linux Virtualization Support
:maxdepth: 2 :maxdepth: 2
kvm/index kvm/index
uml/user_mode_linux
paravirt_ops paravirt_ops
guest-halt-polling
.. only:: html and subproject .. only:: html and subproject

View File

@ -1,4 +1,8 @@
* Internal ABI between the kernel and HYP .. SPDX-License-Identifier: GPL-2.0
=======================================
Internal ABI between the kernel and HYP
=======================================
This file documents the interaction between the Linux kernel and the This file documents the interaction between the Linux kernel and the
hypervisor layer when running Linux as a hypervisor (for example hypervisor layer when running Linux as a hypervisor (for example
@ -19,25 +23,31 @@ and only act on individual CPUs.
Unless specified otherwise, any built-in hypervisor must implement Unless specified otherwise, any built-in hypervisor must implement
these functions (see arch/arm{,64}/include/asm/virt.h): these functions (see arch/arm{,64}/include/asm/virt.h):
* r0/x0 = HVC_SET_VECTORS * ::
r1/x1 = vectors
r0/x0 = HVC_SET_VECTORS
r1/x1 = vectors
Set HVBAR/VBAR_EL2 to 'vectors' to enable a hypervisor. 'vectors' Set HVBAR/VBAR_EL2 to 'vectors' to enable a hypervisor. 'vectors'
must be a physical address, and respect the alignment requirements must be a physical address, and respect the alignment requirements
of the architecture. Only implemented by the initial stubs, not by of the architecture. Only implemented by the initial stubs, not by
Linux hypervisors. Linux hypervisors.
* r0/x0 = HVC_RESET_VECTORS * ::
r0/x0 = HVC_RESET_VECTORS
Turn HYP/EL2 MMU off, and reset HVBAR/VBAR_EL2 to the initials Turn HYP/EL2 MMU off, and reset HVBAR/VBAR_EL2 to the initials
stubs' exception vector value. This effectively disables an existing stubs' exception vector value. This effectively disables an existing
hypervisor. hypervisor.
* r0/x0 = HVC_SOFT_RESTART * ::
r1/x1 = restart address
x2 = x0's value when entering the next payload (arm64) r0/x0 = HVC_SOFT_RESTART
x3 = x1's value when entering the next payload (arm64) r1/x1 = restart address
x4 = x2's value when entering the next payload (arm64) x2 = x0's value when entering the next payload (arm64)
x3 = x1's value when entering the next payload (arm64)
x4 = x2's value when entering the next payload (arm64)
Mask all exceptions, disable the MMU, move the arguments into place Mask all exceptions, disable the MMU, move the arguments into place
(arm64 only), and jump to the restart address while at HYP/EL2. This (arm64 only), and jump to the restart address while at HYP/EL2. This

View File

@ -0,0 +1,12 @@
.. SPDX-License-Identifier: GPL-2.0
===
ARM
===
.. toctree::
:maxdepth: 2
hyp-abi
psci
pvtime

View File

@ -1,3 +1,9 @@
.. SPDX-License-Identifier: GPL-2.0
=========================================
Power State Coordination Interface (PSCI)
=========================================
KVM implements the PSCI (Power State Coordination Interface) KVM implements the PSCI (Power State Coordination Interface)
specification in order to provide services such as CPU on/off, reset specification in order to provide services such as CPU on/off, reset
and power-off to the guest. and power-off to the guest.
@ -30,32 +36,42 @@ The following register is defined:
- Affects the whole VM (even if the register view is per-vcpu) - Affects the whole VM (even if the register view is per-vcpu)
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: * KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
Holds the state of the firmware support to mitigate CVE-2017-5715, as Holds the state of the firmware support to mitigate CVE-2017-5715, as
offered by KVM to the guest via a HVC call. The workaround is described offered by KVM to the guest via a HVC call. The workaround is described
under SMCCC_ARCH_WORKAROUND_1 in [1]. under SMCCC_ARCH_WORKAROUND_1 in [1].
Accepted values are: Accepted values are:
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
KVM does not offer
firmware support for the workaround. The mitigation status for the firmware support for the workaround. The mitigation status for the
guest is unknown. guest is unknown.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
The workaround HVC call is
available to the guest and required for the mitigation. available to the guest and required for the mitigation.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
The workaround HVC call
is available to the guest, but it is not needed on this VCPU. is available to the guest, but it is not needed on this VCPU.
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: * KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
Holds the state of the firmware support to mitigate CVE-2018-3639, as Holds the state of the firmware support to mitigate CVE-2018-3639, as
offered by KVM to the guest via a HVC call. The workaround is described offered by KVM to the guest via a HVC call. The workaround is described
under SMCCC_ARCH_WORKAROUND_2 in [1]. under SMCCC_ARCH_WORKAROUND_2 in [1]_.
Accepted values are: Accepted values are:
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
A workaround is not
available. KVM does not offer firmware support for the workaround. available. KVM does not offer firmware support for the workaround.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
The workaround state is
unknown. KVM does not offer firmware support for the workaround. unknown. KVM does not offer firmware support for the workaround.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available, KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
The workaround is available,
and can be disabled by a vCPU. If and can be disabled by a vCPU. If
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
this vCPU. this vCPU.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
always active on this vCPU or it is not needed. The workaround is always active on this vCPU or it is not needed.
[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf .. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf

View File

@ -1,3 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
===============================================
ARM Virtual Interrupt Translation Service (ITS) ARM Virtual Interrupt Translation Service (ITS)
=============================================== ===============================================
@ -12,22 +15,32 @@ There can be multiple ITS controllers per guest, each of them has to have
a separate, non-overlapping MMIO region. a separate, non-overlapping MMIO region.
Groups: Groups
KVM_DEV_ARM_VGIC_GRP_ADDR ======
KVM_DEV_ARM_VGIC_GRP_ADDR
-------------------------
Attributes: Attributes:
KVM_VGIC_ITS_ADDR_TYPE (rw, 64-bit) KVM_VGIC_ITS_ADDR_TYPE (rw, 64-bit)
Base address in the guest physical address space of the GICv3 ITS Base address in the guest physical address space of the GICv3 ITS
control register frame. control register frame.
This address needs to be 64K aligned and the region covers 128K. This address needs to be 64K aligned and the region covers 128K.
Errors: Errors:
-E2BIG: Address outside of addressable IPA range
-EINVAL: Incorrectly aligned address ======= =================================================
-EEXIST: Address already configured -E2BIG Address outside of addressable IPA range
-EFAULT: Invalid user pointer for attr->addr. -EINVAL Incorrectly aligned address
-ENODEV: Incorrect attribute or the ITS is not supported. -EEXIST Address already configured
-EFAULT Invalid user pointer for attr->addr.
-ENODEV Incorrect attribute or the ITS is not supported.
======= =================================================
KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEV_ARM_VGIC_GRP_CTRL
-------------------------
Attributes: Attributes:
KVM_DEV_ARM_VGIC_CTRL_INIT KVM_DEV_ARM_VGIC_CTRL_INIT
request the initialization of the ITS, no additional parameter in request the initialization of the ITS, no additional parameter in
@ -58,16 +71,21 @@ Groups:
"ITS Restore Sequence". "ITS Restore Sequence".
Errors: Errors:
-ENXIO: ITS not properly configured as required prior to setting
this attribute
-ENOMEM: Memory shortage when allocating ITS internal data
-EINVAL: Inconsistent restored data
-EFAULT: Invalid guest ram access
-EBUSY: One or more VCPUS are running
-EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
state is not available
KVM_DEV_ARM_VGIC_GRP_ITS_REGS ======= ==========================================================
-ENXIO ITS not properly configured as required prior to setting
this attribute
-ENOMEM Memory shortage when allocating ITS internal data
-EINVAL Inconsistent restored data
-EFAULT Invalid guest ram access
-EBUSY One or more VCPUS are running
-EACCES The virtual ITS is backed by a physical GICv4 ITS, and the
state is not available
======= ==========================================================
KVM_DEV_ARM_VGIC_GRP_ITS_REGS
-----------------------------
Attributes: Attributes:
The attr field of kvm_device_attr encodes the offset of the The attr field of kvm_device_attr encodes the offset of the
ITS register, relative to the ITS control frame base address ITS register, relative to the ITS control frame base address
@ -78,6 +96,7 @@ Groups:
be accessed with full length. be accessed with full length.
Writes to read-only registers are ignored by the kernel except for: Writes to read-only registers are ignored by the kernel except for:
- GITS_CREADR. It must be restored otherwise commands in the queue - GITS_CREADR. It must be restored otherwise commands in the queue
will be re-executed after restoring CWRITER. GITS_CREADR must be will be re-executed after restoring CWRITER. GITS_CREADR must be
restored before restoring the GITS_CTLR which is likely to enable the restored before restoring the GITS_CTLR which is likely to enable the
@ -91,30 +110,36 @@ Groups:
For other registers, getting or setting a register has the same For other registers, getting or setting a register has the same
effect as reading/writing the register on real hardware. effect as reading/writing the register on real hardware.
Errors:
-ENXIO: Offset does not correspond to any supported register
-EFAULT: Invalid user pointer for attr->addr
-EINVAL: Offset is not 64-bit aligned
-EBUSY: one or more VCPUS are running
ITS Restore Sequence: Errors:
-------------------------
======= ====================================================
-ENXIO Offset does not correspond to any supported register
-EFAULT Invalid user pointer for attr->addr
-EINVAL Offset is not 64-bit aligned
-EBUSY one or more VCPUS are running
======= ====================================================
ITS Restore Sequence:
---------------------
The following ordering must be followed when restoring the GIC and the ITS: The following ordering must be followed when restoring the GIC and the ITS:
a) restore all guest memory and create vcpus a) restore all guest memory and create vcpus
b) restore all redistributors b) restore all redistributors
c) provide the ITS base address c) provide the ITS base address
(KVM_DEV_ARM_VGIC_GRP_ADDR) (KVM_DEV_ARM_VGIC_GRP_ADDR)
d) restore the ITS in the following order: d) restore the ITS in the following order:
1. Restore GITS_CBASER
2. Restore all other GITS_ registers, except GITS_CTLR! 1. Restore GITS_CBASER
3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES) 2. Restore all other ``GITS_`` registers, except GITS_CTLR!
4. Restore GITS_CTLR 3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
4. Restore GITS_CTLR
Then vcpus can be started. Then vcpus can be started.
ITS Table ABI REV0: ITS Table ABI REV0:
------------------- -------------------
Revision 0 of the ABI only supports the features of a virtual GICv3, and does Revision 0 of the ABI only supports the features of a virtual GICv3, and does
not support a virtual GICv4 with support for direct injection of virtual not support a virtual GICv4 with support for direct injection of virtual
@ -125,12 +150,13 @@ Then vcpus can be started.
entries in the collection are listed in no particular order. entries in the collection are listed in no particular order.
All entries are 8 bytes. All entries are 8 bytes.
Device Table Entry (DTE): Device Table Entry (DTE)::
bits: | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 | bits: | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
values: | V | next | ITT_addr | Size | values: | V | next | ITT_addr | Size |
where:
where;
- V indicates whether the entry is valid. If not, other fields - V indicates whether the entry is valid. If not, other fields
are not meaningful. are not meaningful.
- next: equals to 0 if this entry is the last one; otherwise it - next: equals to 0 if this entry is the last one; otherwise it
@ -140,32 +166,34 @@ Then vcpus can be started.
- Size specifies the supported number of bits for the EventID, - Size specifies the supported number of bits for the EventID,
minus one minus one
Collection Table Entry (CTE): Collection Table Entry (CTE)::
bits: | 63| 62 .. 52 | 51 ... 16 | 15 ... 0 | bits: | 63| 62 .. 52 | 51 ... 16 | 15 ... 0 |
values: | V | RES0 | RDBase | ICID | values: | V | RES0 | RDBase | ICID |
where: where:
- V indicates whether the entry is valid. If not, other fields are - V indicates whether the entry is valid. If not, other fields are
not meaningful. not meaningful.
- RES0: reserved field with Should-Be-Zero-or-Preserved behavior. - RES0: reserved field with Should-Be-Zero-or-Preserved behavior.
- RDBase is the PE number (GICR_TYPER.Processor_Number semantic), - RDBase is the PE number (GICR_TYPER.Processor_Number semantic),
- ICID is the collection ID - ICID is the collection ID
Interrupt Translation Entry (ITE): Interrupt Translation Entry (ITE)::
bits: | 63 ... 48 | 47 ... 16 | 15 ... 0 | bits: | 63 ... 48 | 47 ... 16 | 15 ... 0 |
values: | next | pINTID | ICID | values: | next | pINTID | ICID |
where: where:
- next: equals to 0 if this entry is the last one; otherwise it corresponds - next: equals to 0 if this entry is the last one; otherwise it corresponds
to the EventID offset to the next ITE capped by 2^16 -1. to the EventID offset to the next ITE capped by 2^16 -1.
- pINTID is the physical LPI ID; if zero, it means the entry is not valid - pINTID is the physical LPI ID; if zero, it means the entry is not valid
and other fields are not meaningful. and other fields are not meaningful.
- ICID is the collection ID - ICID is the collection ID
ITS Reset State: ITS Reset State:
---------------- ----------------
RESET returns the ITS to the same state that it was when first created and RESET returns the ITS to the same state that it was when first created and
initialized. When the RESET command returns, the following things are initialized. When the RESET command returns, the following things are

View File

@ -1,9 +1,12 @@
.. SPDX-License-Identifier: GPL-2.0
==============================================================
ARM Virtual Generic Interrupt Controller v3 and later (VGICv3) ARM Virtual Generic Interrupt Controller v3 and later (VGICv3)
============================================================== ==============================================================
Device types supported: Device types supported:
KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0 - KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0
Only one VGIC instance may be instantiated through this API. The created VGIC Only one VGIC instance may be instantiated through this API. The created VGIC
will act as the VM interrupt controller, requiring emulated user-space devices will act as the VM interrupt controller, requiring emulated user-space devices
@ -15,7 +18,8 @@ Creating a guest GICv3 device requires a host GICv3 as well.
Groups: Groups:
KVM_DEV_ARM_VGIC_GRP_ADDR KVM_DEV_ARM_VGIC_GRP_ADDR
Attributes: Attributes:
KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit) KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
Base address in the guest physical address space of the GICv3 distributor Base address in the guest physical address space of the GICv3 distributor
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
@ -29,21 +33,25 @@ Groups:
This address needs to be 64K aligned. This address needs to be 64K aligned.
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit) KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit)
The attribute data pointed to by kvm_device_attr.addr is a __u64 value: The attribute data pointed to by kvm_device_attr.addr is a __u64 value::
bits: | 63 .... 52 | 51 .... 16 | 15 - 12 |11 - 0
values: | count | base | flags | index bits: | 63 .... 52 | 51 .... 16 | 15 - 12 |11 - 0
values: | count | base | flags | index
- index encodes the unique redistributor region index - index encodes the unique redistributor region index
- flags: reserved for future use, currently 0 - flags: reserved for future use, currently 0
- base field encodes bits [51:16] of the guest physical base address - base field encodes bits [51:16] of the guest physical base address
of the first redistributor in the region. of the first redistributor in the region.
- count encodes the number of redistributors in the region. Must be - count encodes the number of redistributors in the region. Must be
greater than 0. greater than 0.
There are two 64K pages for each redistributor in the region and There are two 64K pages for each redistributor in the region and
redistributors are laid out contiguously within the region. Regions redistributors are laid out contiguously within the region. Regions
are filled with redistributors in the index order. The sum of all are filled with redistributors in the index order. The sum of all
region count fields must be greater than or equal to the number of region count fields must be greater than or equal to the number of
VCPUs. Redistributor regions must be registered in the incremental VCPUs. Redistributor regions must be registered in the incremental
index order, starting from index 0. index order, starting from index 0.
The characteristics of a specific redistributor region can be read The characteristics of a specific redistributor region can be read
by presetting the index field in the attr data. by presetting the index field in the attr data.
Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
@ -52,23 +60,27 @@ Groups:
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes. KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes.
Errors: Errors:
-E2BIG: Address outside of addressable IPA range
-EINVAL: Incorrectly aligned address, bad redistributor region ======= =============================================================
-E2BIG Address outside of addressable IPA range
-EINVAL Incorrectly aligned address, bad redistributor region
count/index, mixed redistributor region attribute usage count/index, mixed redistributor region attribute usage
-EEXIST: Address already configured -EEXIST Address already configured
-ENOENT: Attempt to read the characteristics of a non existing -ENOENT Attempt to read the characteristics of a non existing
redistributor region redistributor region
-ENXIO: The group or attribute is unknown/unsupported for this device -ENXIO The group or attribute is unknown/unsupported for this device
or hardware support is missing. or hardware support is missing.
-EFAULT: Invalid user pointer for attr->addr. -EFAULT Invalid user pointer for attr->addr.
======= =============================================================
KVM_DEV_ARM_VGIC_GRP_DIST_REGS KVM_DEV_ARM_VGIC_GRP_DIST_REGS, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
KVM_DEV_ARM_VGIC_GRP_REDIST_REGS Attributes:
Attributes:
The attr field of kvm_device_attr encodes two values: The attr field of kvm_device_attr encodes two values::
bits: | 63 .... 32 | 31 .... 0 |
values: | mpidr | offset | bits: | 63 .... 32 | 31 .... 0 |
values: | mpidr | offset |
All distributor regs are (rw, 32-bit) and kvm_device_attr.addr points to a All distributor regs are (rw, 32-bit) and kvm_device_attr.addr points to a
__u32 value. 64-bit registers must be accessed by separately accessing the __u32 value. 64-bit registers must be accessed by separately accessing the
@ -93,7 +105,8 @@ Groups:
redistributor is accessed. The mpidr is ignored for the distributor. redistributor is accessed. The mpidr is ignored for the distributor.
The mpidr encoding is based on the affinity information in the The mpidr encoding is based on the affinity information in the
architecture defined MPIDR, and the field is encoded as follows: architecture defined MPIDR, and the field is encoded as follows::
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 | | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
| Aff3 | Aff2 | Aff1 | Aff0 | | Aff3 | Aff2 | Aff1 | Aff0 |
@ -148,24 +161,30 @@ Groups:
ignored. ignored.
Errors: Errors:
-ENXIO: Getting or setting this register is not yet supported
-EBUSY: One or more VCPUs are running ====== =====================================================
-ENXIO Getting or setting this register is not yet supported
-EBUSY One or more VCPUs are running
====== =====================================================
KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS
Attributes: Attributes:
The attr field of kvm_device_attr encodes two values:
bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 | The attr field of kvm_device_attr encodes two values::
values: | mpidr | RES | instr |
bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 |
values: | mpidr | RES | instr |
The mpidr field encodes the CPU ID based on the affinity information in the The mpidr field encodes the CPU ID based on the affinity information in the
architecture defined MPIDR, and the field is encoded as follows: architecture defined MPIDR, and the field is encoded as follows::
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 | | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
| Aff3 | Aff2 | Aff1 | Aff0 | | Aff3 | Aff2 | Aff1 | Aff0 |
The instr field encodes the system register to access based on the fields The instr field encodes the system register to access based on the fields
defined in the A64 instruction set encoding for system register access defined in the A64 instruction set encoding for system register access
(RES means the bits are reserved for future use and should be zero): (RES means the bits are reserved for future use and should be zero)::
| 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 | | 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 |
| Op 0 | Op1 | CRn | CRm | Op2 | | Op 0 | Op1 | CRn | CRm | Op2 |
@ -178,26 +197,35 @@ Groups:
CPU interface registers access is not implemented for AArch32 mode. CPU interface registers access is not implemented for AArch32 mode.
Error -ENXIO is returned when accessed in AArch32 mode. Error -ENXIO is returned when accessed in AArch32 mode.
Errors: Errors:
-ENXIO: Getting or setting this register is not yet supported
-EBUSY: VCPU is running ======= =====================================================
-EINVAL: Invalid mpidr or register value supplied -ENXIO Getting or setting this register is not yet supported
-EBUSY VCPU is running
-EINVAL Invalid mpidr or register value supplied
======= =====================================================
KVM_DEV_ARM_VGIC_GRP_NR_IRQS KVM_DEV_ARM_VGIC_GRP_NR_IRQS
Attributes: Attributes:
A value describing the number of interrupts (SGI, PPI and SPI) for A value describing the number of interrupts (SGI, PPI and SPI) for
this GIC instance, ranging from 64 to 1024, in increments of 32. this GIC instance, ranging from 64 to 1024, in increments of 32.
kvm_device_attr.addr points to a __u32 value. kvm_device_attr.addr points to a __u32 value.
Errors: Errors:
-EINVAL: Value set is out of the expected range
-EBUSY: Value has already be set. ======= ======================================
-EINVAL Value set is out of the expected range
-EBUSY Value has already be set.
======= ======================================
KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEV_ARM_VGIC_GRP_CTRL
Attributes: Attributes:
KVM_DEV_ARM_VGIC_CTRL_INIT KVM_DEV_ARM_VGIC_CTRL_INIT
request the initialization of the VGIC, no additional parameter in request the initialization of the VGIC, no additional parameter in
kvm_device_attr.addr. kvm_device_attr.addr.
@ -205,20 +233,26 @@ Groups:
save all LPI pending bits into guest RAM pending tables. save all LPI pending bits into guest RAM pending tables.
The first kB of the pending table is not altered by this operation. The first kB of the pending table is not altered by this operation.
Errors: Errors:
-ENXIO: VGIC not properly configured as required prior to calling
this attribute ======= ========================================================
-ENODEV: no online VCPU -ENXIO VGIC not properly configured as required prior to calling
-ENOMEM: memory shortage when allocating vgic internal data this attribute
-EFAULT: Invalid guest ram access -ENODEV no online VCPU
-EBUSY: One or more VCPUS are running -ENOMEM memory shortage when allocating vgic internal data
-EFAULT Invalid guest ram access
-EBUSY One or more VCPUS are running
======= ========================================================
KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
Attributes: Attributes:
The attr field of kvm_device_attr encodes the following values:
bits: | 63 .... 32 | 31 .... 10 | 9 .... 0 | The attr field of kvm_device_attr encodes the following values::
values: | mpidr | info | vINTID |
bits: | 63 .... 32 | 31 .... 10 | 9 .... 0 |
values: | mpidr | info | vINTID |
The vINTID specifies which set of IRQs is reported on. The vINTID specifies which set of IRQs is reported on.
@ -228,6 +262,7 @@ Groups:
VGIC_LEVEL_INFO_LINE_LEVEL: VGIC_LEVEL_INFO_LINE_LEVEL:
Get/Set the input level of the IRQ line for a set of 32 contiguously Get/Set the input level of the IRQ line for a set of 32 contiguously
numbered interrupts. numbered interrupts.
vINTID must be a multiple of 32. vINTID must be a multiple of 32.
kvm_device_attr.addr points to a __u32 value which will contain a kvm_device_attr.addr points to a __u32 value which will contain a
@ -243,9 +278,14 @@ Groups:
reported with the same value regardless of the mpidr specified. reported with the same value regardless of the mpidr specified.
The mpidr field encodes the CPU ID based on the affinity information in the The mpidr field encodes the CPU ID based on the affinity information in the
architecture defined MPIDR, and the field is encoded as follows: architecture defined MPIDR, and the field is encoded as follows::
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 | | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
| Aff3 | Aff2 | Aff1 | Aff0 | | Aff3 | Aff2 | Aff1 | Aff0 |
Errors: Errors:
-EINVAL: vINTID is not multiple of 32 or
info field is not VGIC_LEVEL_INFO_LINE_LEVEL ======= =============================================
-EINVAL vINTID is not multiple of 32 or info field is
not VGIC_LEVEL_INFO_LINE_LEVEL
======= =============================================

View File

@ -1,8 +1,12 @@
.. SPDX-License-Identifier: GPL-2.0
==================================================
ARM Virtual Generic Interrupt Controller v2 (VGIC) ARM Virtual Generic Interrupt Controller v2 (VGIC)
================================================== ==================================================
Device types supported: Device types supported:
KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
- KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
Only one VGIC instance may be instantiated through either this API or the Only one VGIC instance may be instantiated through either this API or the
legacy KVM_CREATE_IRQCHIP API. The created VGIC will act as the VM interrupt legacy KVM_CREATE_IRQCHIP API. The created VGIC will act as the VM interrupt
@ -17,7 +21,8 @@ create both a GICv3 and GICv2 device on the same VM.
Groups: Groups:
KVM_DEV_ARM_VGIC_GRP_ADDR KVM_DEV_ARM_VGIC_GRP_ADDR
Attributes: Attributes:
KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit) KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
Base address in the guest physical address space of the GIC distributor Base address in the guest physical address space of the GIC distributor
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2. register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
@ -27,19 +32,25 @@ Groups:
Base address in the guest physical address space of the GIC virtual cpu Base address in the guest physical address space of the GIC virtual cpu
interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2. interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
This address needs to be 4K aligned and the region covers 4 KByte. This address needs to be 4K aligned and the region covers 4 KByte.
Errors: Errors:
-E2BIG: Address outside of addressable IPA range
-EINVAL: Incorrectly aligned address ======= =============================================================
-EEXIST: Address already configured -E2BIG Address outside of addressable IPA range
-ENXIO: The group or attribute is unknown/unsupported for this device -EINVAL Incorrectly aligned address
-EEXIST Address already configured
-ENXIO The group or attribute is unknown/unsupported for this device
or hardware support is missing. or hardware support is missing.
-EFAULT: Invalid user pointer for attr->addr. -EFAULT Invalid user pointer for attr->addr.
======= =============================================================
KVM_DEV_ARM_VGIC_GRP_DIST_REGS KVM_DEV_ARM_VGIC_GRP_DIST_REGS
Attributes: Attributes:
The attr field of kvm_device_attr encodes two values:
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | The attr field of kvm_device_attr encodes two values::
values: | reserved | vcpu_index | offset |
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
values: | reserved | vcpu_index | offset |
All distributor regs are (rw, 32-bit) All distributor regs are (rw, 32-bit)
@ -58,16 +69,22 @@ Groups:
KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
the expected behavior. Unless GICD_IIDR has been set from userspace, writes the expected behavior. Unless GICD_IIDR has been set from userspace, writes
to the interrupt group registers (GICD_IGROUPR) are ignored. to the interrupt group registers (GICD_IGROUPR) are ignored.
Errors: Errors:
-ENXIO: Getting or setting this register is not yet supported
-EBUSY: One or more VCPUs are running ======= =====================================================
-EINVAL: Invalid vcpu_index supplied -ENXIO Getting or setting this register is not yet supported
-EBUSY One or more VCPUs are running
-EINVAL Invalid vcpu_index supplied
======= =====================================================
KVM_DEV_ARM_VGIC_GRP_CPU_REGS KVM_DEV_ARM_VGIC_GRP_CPU_REGS
Attributes: Attributes:
The attr field of kvm_device_attr encodes two values:
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | The attr field of kvm_device_attr encodes two values::
values: | reserved | vcpu_index | offset |
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
values: | reserved | vcpu_index | offset |
All CPU interface regs are (rw, 32-bit) All CPU interface regs are (rw, 32-bit)
@ -101,27 +118,39 @@ Groups:
value left by 3 places to obtain the actual priority mask level. value left by 3 places to obtain the actual priority mask level.
Errors: Errors:
-ENXIO: Getting or setting this register is not yet supported
-EBUSY: One or more VCPUs are running ======= =====================================================
-EINVAL: Invalid vcpu_index supplied -ENXIO Getting or setting this register is not yet supported
-EBUSY One or more VCPUs are running
-EINVAL Invalid vcpu_index supplied
======= =====================================================
KVM_DEV_ARM_VGIC_GRP_NR_IRQS KVM_DEV_ARM_VGIC_GRP_NR_IRQS
Attributes: Attributes:
A value describing the number of interrupts (SGI, PPI and SPI) for A value describing the number of interrupts (SGI, PPI and SPI) for
this GIC instance, ranging from 64 to 1024, in increments of 32. this GIC instance, ranging from 64 to 1024, in increments of 32.
Errors: Errors:
-EINVAL: Value set is out of the expected range
-EBUSY: Value has already be set, or GIC has already been initialized ======= =============================================================
with default values. -EINVAL Value set is out of the expected range
-EBUSY Value has already be set, or GIC has already been initialized
with default values.
======= =============================================================
KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEV_ARM_VGIC_GRP_CTRL
Attributes: Attributes:
KVM_DEV_ARM_VGIC_CTRL_INIT KVM_DEV_ARM_VGIC_CTRL_INIT
request the initialization of the VGIC or ITS, no additional parameter request the initialization of the VGIC or ITS, no additional parameter
in kvm_device_attr.addr. in kvm_device_attr.addr.
Errors: Errors:
-ENXIO: VGIC not properly configured as required prior to calling
this attribute ======= =========================================================
-ENODEV: no online VCPU -ENXIO VGIC not properly configured as required prior to calling
-ENOMEM: memory shortage when allocating vgic internal data this attribute
-ENODEV no online VCPU
-ENOMEM memory shortage when allocating vgic internal data
======= =========================================================

View File

@ -0,0 +1,19 @@
.. SPDX-License-Identifier: GPL-2.0
=======
Devices
=======
.. toctree::
:maxdepth: 2
arm-vgic-its
arm-vgic
arm-vgic-v3
mpic
s390_flic
vcpu
vfio
vm
xics
xive

View File

@ -1,9 +1,13 @@
.. SPDX-License-Identifier: GPL-2.0
=========================
MPIC interrupt controller MPIC interrupt controller
========================= =========================
Device types supported: Device types supported:
KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2 - KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
- KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2
Only one MPIC instance, of any type, may be instantiated. The created Only one MPIC instance, of any type, may be instantiated. The created
MPIC will act as the system interrupt controller, connecting to each MPIC will act as the system interrupt controller, connecting to each
@ -11,7 +15,8 @@ vcpu's interrupt inputs.
Groups: Groups:
KVM_DEV_MPIC_GRP_MISC KVM_DEV_MPIC_GRP_MISC
Attributes: Attributes:
KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit) KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
Base address of the 256 KiB MPIC register space. Must be Base address of the 256 KiB MPIC register space. Must be
naturally aligned. A value of zero disables the mapping. naturally aligned. A value of zero disables the mapping.

View File

@ -1,3 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
====================================
FLIC (floating interrupt controller) FLIC (floating interrupt controller)
==================================== ====================================
@ -31,8 +34,10 @@ Groups:
Copies all floating interrupts into a buffer provided by userspace. Copies all floating interrupts into a buffer provided by userspace.
When the buffer is too small it returns -ENOMEM, which is the indication When the buffer is too small it returns -ENOMEM, which is the indication
for userspace to try again with a bigger buffer. for userspace to try again with a bigger buffer.
-ENOBUFS is returned when the allocation of a kernelspace buffer has -ENOBUFS is returned when the allocation of a kernelspace buffer has
failed. failed.
-EFAULT is returned when copying data to userspace failed. -EFAULT is returned when copying data to userspace failed.
All interrupts remain pending, i.e. are not deleted from the list of All interrupts remain pending, i.e. are not deleted from the list of
currently pending interrupts. currently pending interrupts.
@ -60,38 +65,41 @@ Groups:
KVM_DEV_FLIC_ADAPTER_REGISTER KVM_DEV_FLIC_ADAPTER_REGISTER
Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
describing the adapter to register: describing the adapter to register::
struct kvm_s390_io_adapter { struct kvm_s390_io_adapter {
__u32 id; __u32 id;
__u8 isc; __u8 isc;
__u8 maskable; __u8 maskable;
__u8 swap; __u8 swap;
__u8 flags; __u8 flags;
}; };
id contains the unique id for the adapter, isc the I/O interruption subclass id contains the unique id for the adapter, isc the I/O interruption subclass
to use, maskable whether this adapter may be masked (interrupts turned off), to use, maskable whether this adapter may be masked (interrupts turned off),
swap whether the indicators need to be byte swapped, and flags contains swap whether the indicators need to be byte swapped, and flags contains
further characteristics of the adapter. further characteristics of the adapter.
Currently defined values for 'flags' are: Currently defined values for 'flags' are:
- KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS - KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS
(adapter-interrupt-suppression) facility. This flag only has an effect if (adapter-interrupt-suppression) facility. This flag only has an effect if
the AIS capability is enabled. the AIS capability is enabled.
Unknown flag values are ignored. Unknown flag values are ignored.
KVM_DEV_FLIC_ADAPTER_MODIFY KVM_DEV_FLIC_ADAPTER_MODIFY
Modifies attributes of an existing I/O adapter interrupt source. Takes Modifies attributes of an existing I/O adapter interrupt source. Takes
a kvm_s390_io_adapter_req specifying the adapter and the operation: a kvm_s390_io_adapter_req specifying the adapter and the operation::
struct kvm_s390_io_adapter_req { struct kvm_s390_io_adapter_req {
__u32 id; __u32 id;
__u8 type; __u8 type;
__u8 mask; __u8 mask;
__u16 pad0; __u16 pad0;
__u64 addr; __u64 addr;
}; };
id specifies the adapter and type the operation. The supported operations id specifies the adapter and type the operation. The supported operations
are: are:
@ -103,8 +111,9 @@ struct kvm_s390_io_adapter_req {
perform a gmap translation for the guest address provided in addr, perform a gmap translation for the guest address provided in addr,
pin a userspace page for the translated address and add it to the pin a userspace page for the translated address and add it to the
list of mappings list of mappings
Note: A new mapping will be created unconditionally; therefore,
the calling code should avoid making duplicate mappings. .. note:: A new mapping will be created unconditionally; therefore,
the calling code should avoid making duplicate mappings.
KVM_S390_IO_ADAPTER_UNMAP KVM_S390_IO_ADAPTER_UNMAP
release a userspace page for the translated address specified in addr release a userspace page for the translated address specified in addr
@ -112,16 +121,17 @@ struct kvm_s390_io_adapter_req {
KVM_DEV_FLIC_AISM KVM_DEV_FLIC_AISM
modify the adapter-interruption-suppression mode for a given isc if the modify the adapter-interruption-suppression mode for a given isc if the
AIS capability is enabled. Takes a kvm_s390_ais_req describing: AIS capability is enabled. Takes a kvm_s390_ais_req describing::
struct kvm_s390_ais_req { struct kvm_s390_ais_req {
__u8 isc; __u8 isc;
__u16 mode; __u16 mode;
}; };
isc contains the target I/O interruption subclass, mode the target isc contains the target I/O interruption subclass, mode the target
adapter-interruption-suppression mode. The following modes are adapter-interruption-suppression mode. The following modes are
currently supported: currently supported:
- KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection - KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection
is always allowed; is always allowed;
- KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq - KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq
@ -139,12 +149,12 @@ struct kvm_s390_ais_req {
KVM_DEV_FLIC_AISM_ALL KVM_DEV_FLIC_AISM_ALL
Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes
a kvm_s390_ais_all describing: a kvm_s390_ais_all describing::
struct kvm_s390_ais_all { struct kvm_s390_ais_all {
__u8 simm; /* Single-Interruption-Mode mask */ __u8 simm; /* Single-Interruption-Mode mask */
__u8 nimm; /* No-Interruption-Mode mask * __u8 nimm; /* No-Interruption-Mode mask *
}; };
simm contains Single-Interruption-Mode mask for all ISCs, nimm contains simm contains Single-Interruption-Mode mask for all ISCs, nimm contains
No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds
@ -159,5 +169,5 @@ ENXIO, as specified in the API documentation). It is not possible to conclude
that a FLIC operation is unavailable based on the error code resulting from a that a FLIC operation is unavailable based on the error code resulting from a
usage attempt. usage attempt.
Note: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a zero .. note:: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a
schid is specified. zero schid is specified.

View File

@ -0,0 +1,114 @@
.. SPDX-License-Identifier: GPL-2.0
======================
Generic vcpu interface
======================
The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
The groups and attributes per virtual cpu, if any, are architecture specific.
1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
==================================
:Architectures: ARM64
1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
---------------------------------------
:Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
pointer to an int
Returns:
======= ========================================================
-EBUSY The PMU overflow interrupt is already set
-ENXIO The overflow interrupt not set when attempting to get it
-ENODEV PMUv3 not supported
-EINVAL Invalid PMU overflow interrupt number supplied or
trying to set the IRQ number without using an in-kernel
irqchip.
======= ========================================================
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
type must be same for each vcpu. As a PPI, the interrupt number is the same for
all vcpus, while as an SPI it must be a separate number per vcpu.
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
---------------------------------------
:Parameters: no additional parameter in kvm_device_attr.addr
Returns:
======= ======================================================
-ENODEV PMUv3 not supported or GIC not initialized
-ENXIO PMUv3 not properly configured or in-kernel irqchip not
configured as required prior to calling this attribute
-EBUSY PMUv3 already initialized
======= ======================================================
Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel
virtual GIC implementation, this must be done after initializing the in-kernel
irqchip.
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
=================================
:Architectures: ARM, ARM64
2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER
-----------------------------------------------------------------------------
:Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
pointer to an int
Returns:
======= =================================
-EINVAL Invalid timer interrupt number
-EBUSY One or more VCPUs has already run
======= =================================
A value describing the architected timer interrupt number when connected to an
in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
attribute overrides the default values (see below).
============================= ==========================================
KVM_ARM_VCPU_TIMER_IRQ_VTIMER The EL1 virtual timer intid (default: 27)
KVM_ARM_VCPU_TIMER_IRQ_PTIMER The EL1 physical timer intid (default: 30)
============================= ==========================================
Setting the same PPI for different timers will prevent the VCPUs from running.
Setting the interrupt number on a VCPU configures all VCPUs created at that
time to use the number provided for a given timer, overwriting any previously
configured values on other VCPUs. Userspace should configure the interrupt
numbers on at least one VCPU after creating all VCPUs and before running any
VCPUs.
3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
==================================
:Architectures: ARM64
3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
--------------------------------------
:Parameters: 64-bit base address
Returns:
======= ======================================
-ENXIO Stolen time not implemented
-EEXIST Base address already set for this VCPU
-EINVAL Base address not 64 byte aligned
======= ======================================
Specifies the base address of the stolen time structure for this VCPU. The
base address must be 64 byte aligned and exist within a valid guest memory
region. See Documentation/virt/kvm/arm/pvtime.txt for more information
including the layout of the stolen time structure.

View File

@ -1,76 +0,0 @@
Generic vcpu interface
====================================
The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
The groups and attributes per virtual cpu, if any, are architecture specific.
1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
Architectures: ARM64
1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
pointer to an int
Returns: -EBUSY: The PMU overflow interrupt is already set
-ENXIO: The overflow interrupt not set when attempting to get it
-ENODEV: PMUv3 not supported
-EINVAL: Invalid PMU overflow interrupt number supplied or
trying to set the IRQ number without using an in-kernel
irqchip.
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
type must be same for each vcpu. As a PPI, the interrupt number is the same for
all vcpus, while as an SPI it must be a separate number per vcpu.
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
Parameters: no additional parameter in kvm_device_attr.addr
Returns: -ENODEV: PMUv3 not supported or GIC not initialized
-ENXIO: PMUv3 not properly configured or in-kernel irqchip not
configured as required prior to calling this attribute
-EBUSY: PMUv3 already initialized
Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel
virtual GIC implementation, this must be done after initializing the in-kernel
irqchip.
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
Architectures: ARM,ARM64
2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER
2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER
Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
pointer to an int
Returns: -EINVAL: Invalid timer interrupt number
-EBUSY: One or more VCPUs has already run
A value describing the architected timer interrupt number when connected to an
in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
attribute overrides the default values (see below).
KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27)
KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30)
Setting the same PPI for different timers will prevent the VCPUs from running.
Setting the interrupt number on a VCPU configures all VCPUs created at that
time to use the number provided for a given timer, overwriting any previously
configured values on other VCPUs. Userspace should configure the interrupt
numbers on at least one VCPU after creating all VCPUs and before running any
VCPUs.
3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
Architectures: ARM64
3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
Parameters: 64-bit base address
Returns: -ENXIO: Stolen time not implemented
-EEXIST: Base address already set for this VCPU
-EINVAL: Base address not 64 byte aligned
Specifies the base address of the stolen time structure for this VCPU. The
base address must be 64 byte aligned and exist within a valid guest memory
region. See Documentation/virt/kvm/arm/pvtime.txt for more information
including the layout of the stolen time structure.

View File

@ -1,8 +1,12 @@
.. SPDX-License-Identifier: GPL-2.0
===================
VFIO virtual device VFIO virtual device
=================== ===================
Device types supported: Device types supported:
KVM_DEV_TYPE_VFIO
- KVM_DEV_TYPE_VFIO
Only one VFIO instance may be created per VM. The created device Only one VFIO instance may be created per VM. The created device
tracks VFIO groups in use by the VM and features of those groups tracks VFIO groups in use by the VM and features of those groups
@ -23,14 +27,15 @@ KVM_DEV_VFIO_GROUP attributes:
for the VFIO group. for the VFIO group.
KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table
allocated by sPAPR KVM. allocated by sPAPR KVM.
kvm_device_attr.addr points to a struct: kvm_device_attr.addr points to a struct::
struct kvm_vfio_spapr_tce { struct kvm_vfio_spapr_tce {
__s32 groupfd; __s32 groupfd;
__s32 tablefd; __s32 tablefd;
}; };
where where:
@groupfd is a file descriptor for a VFIO group;
@tablefd is a file descriptor for a TCE table allocated via - @groupfd is a file descriptor for a VFIO group;
KVM_CREATE_SPAPR_TCE. - @tablefd is a file descriptor for a TCE table allocated via
KVM_CREATE_SPAPR_TCE.

View File

@ -1,5 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
====================
Generic vm interface Generic vm interface
==================================== ====================
The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR, The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
@ -10,30 +13,38 @@ The groups and attributes per virtual machine, if any, are architecture
specific. specific.
1. GROUP: KVM_S390_VM_MEM_CTRL 1. GROUP: KVM_S390_VM_MEM_CTRL
Architectures: s390 ==============================
:Architectures: s390
1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA 1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
Parameters: none -------------------------------------------
Returns: -EBUSY if a vcpu is already defined, otherwise 0
:Parameters: none
:Returns: -EBUSY if a vcpu is already defined, otherwise 0
Enables Collaborative Memory Management Assist (CMMA) for the virtual machine. Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA 1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
Parameters: none ----------------------------------------
Returns: -EINVAL if CMMA was not enabled
0 otherwise :Parameters: none
:Returns: -EINVAL if CMMA was not enabled;
0 otherwise
Clear the CMMA status for all guest pages, so any pages the guest marked Clear the CMMA status for all guest pages, so any pages the guest marked
as unused are again used any may not be reclaimed by the host. as unused are again used any may not be reclaimed by the host.
1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE 1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE
Parameters: in attr->addr the address for the new limit of guest memory -----------------------------------------
Returns: -EFAULT if the given address is not accessible
-EINVAL if the virtual machine is of type UCONTROL :Parameters: in attr->addr the address for the new limit of guest memory
-E2BIG if the given guest memory is to big for that machine :Returns: -EFAULT if the given address is not accessible;
-EBUSY if a vcpu is already defined -EINVAL if the virtual machine is of type UCONTROL;
-ENOMEM if not enough memory is available for a new shadow guest mapping -E2BIG if the given guest memory is to big for that machine;
0 otherwise -EBUSY if a vcpu is already defined;
-ENOMEM if not enough memory is available for a new shadow guest mapping;
0 otherwise.
Allows userspace to query the actual limit and set a new limit for Allows userspace to query the actual limit and set a new limit for
the maximum guest memory size. The limit will be rounded up to the maximum guest memory size. The limit will be rounded up to
@ -42,78 +53,92 @@ the number of page table levels. In the case that there is no limit we will set
the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX). the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX).
2. GROUP: KVM_S390_VM_CPU_MODEL 2. GROUP: KVM_S390_VM_CPU_MODEL
Architectures: s390 ===============================
:Architectures: s390
2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o) 2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
---------------------------------------------
Allows user space to retrieve machine and kvm specific cpu related information: Allows user space to retrieve machine and kvm specific cpu related information::
struct kvm_s390_vm_cpu_machine { struct kvm_s390_vm_cpu_machine {
__u64 cpuid; # CPUID of host __u64 cpuid; # CPUID of host
__u32 ibc; # IBC level range offered by host __u32 ibc; # IBC level range offered by host
__u8 pad[4]; __u8 pad[4];
__u64 fac_mask[256]; # set of cpu facilities enabled by KVM __u64 fac_mask[256]; # set of cpu facilities enabled by KVM
__u64 fac_list[256]; # set of cpu facilities offered by host __u64 fac_list[256]; # set of cpu facilities offered by host
} }
Parameters: address of buffer to store the machine related cpu data :Parameters: address of buffer to store the machine related cpu data
of type struct kvm_s390_vm_cpu_machine* of type struct kvm_s390_vm_cpu_machine*
Returns: -EFAULT if the given address is not accessible from kernel space :Returns: -EFAULT if the given address is not accessible from kernel space;
-ENOMEM if not enough memory is available to process the ioctl -ENOMEM if not enough memory is available to process the ioctl;
0 in case of success 0 in case of success.
2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w) 2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
===============================================
Allows user space to retrieve or request to change cpu related information for a vcpu: Allows user space to retrieve or request to change cpu related information for a vcpu::
struct kvm_s390_vm_cpu_processor { struct kvm_s390_vm_cpu_processor {
__u64 cpuid; # CPUID currently (to be) used by this vcpu __u64 cpuid; # CPUID currently (to be) used by this vcpu
__u16 ibc; # IBC level currently (to be) used by this vcpu __u16 ibc; # IBC level currently (to be) used by this vcpu
__u8 pad[6]; __u8 pad[6];
__u64 fac_list[256]; # set of cpu facilities currently (to be) used __u64 fac_list[256]; # set of cpu facilities currently (to be) used
# by this vcpu # by this vcpu
} }
KVM does not enforce or limit the cpu model data in any form. Take the information KVM does not enforce or limit the cpu model data in any form. Take the information
retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration
setups. Instruction interceptions triggered by additionally set facility bits that setups. Instruction interceptions triggered by additionally set facility bits that
are not handled by KVM need to by imlemented in the VM driver code. are not handled by KVM need to by imlemented in the VM driver code.
Parameters: address of buffer to store/set the processor related cpu :Parameters: address of buffer to store/set the processor related cpu
data of type struct kvm_s390_vm_cpu_processor*. data of type struct kvm_s390_vm_cpu_processor*.
Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case) :Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case);
-EFAULT if the given address is not accessible from kernel space -EFAULT if the given address is not accessible from kernel space;
-ENOMEM if not enough memory is available to process the ioctl -ENOMEM if not enough memory is available to process the ioctl;
0 in case of success 0 in case of success.
.. _KVM_S390_VM_CPU_MACHINE_FEAT:
2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o) 2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o)
--------------------------------------------------
Allows user space to retrieve available cpu features. A feature is available if Allows user space to retrieve available cpu features. A feature is available if
provided by the hardware and supported by kvm. In theory, cpu features could provided by the hardware and supported by kvm. In theory, cpu features could
even be completely emulated by kvm. even be completely emulated by kvm.
struct kvm_s390_vm_cpu_feat { ::
__u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
};
Parameters: address of a buffer to load the feature list from. struct kvm_s390_vm_cpu_feat {
Returns: -EFAULT if the given address is not accessible from kernel space. __u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
0 in case of success. };
:Parameters: address of a buffer to load the feature list from.
:Returns: -EFAULT if the given address is not accessible from kernel space;
0 in case of success.
2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w) 2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w)
----------------------------------------------------
Allows user space to retrieve or change enabled cpu features for all VCPUs of a Allows user space to retrieve or change enabled cpu features for all VCPUs of a
VM. Features that are not available cannot be enabled. VM. Features that are not available cannot be enabled.
See 2.3. for a description of the parameter struct. See :ref:`KVM_S390_VM_CPU_MACHINE_FEAT` for
a description of the parameter struct.
Parameters: address of a buffer to store/load the feature list from. :Parameters: address of a buffer to store/load the feature list from.
Returns: -EFAULT if the given address is not accessible from kernel space. :Returns: -EFAULT if the given address is not accessible from kernel space;
-EINVAL if a cpu feature that is not available is to be enabled. -EINVAL if a cpu feature that is not available is to be enabled;
-EBUSY if at least one VCPU has already been defined. -EBUSY if at least one VCPU has already been defined;
0 in case of success. 0 in case of success.
.. _KVM_S390_VM_CPU_MACHINE_SUBFUNC:
2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o) 2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o)
-----------------------------------------------------
Allows user space to retrieve available cpu subfunctions without any filtering Allows user space to retrieve available cpu subfunctions without any filtering
done by a set IBC. These subfunctions are indicated to the guest VCPU via done by a set IBC. These subfunctions are indicated to the guest VCPU via
@ -126,7 +151,9 @@ contained in the returned struct. If the affected instruction
indicates subfunctions via a "test bit" mechanism, the subfunction codes are indicates subfunctions via a "test bit" mechanism, the subfunction codes are
contained in the returned struct in MSB 0 bit numbering. contained in the returned struct in MSB 0 bit numbering.
struct kvm_s390_vm_cpu_subfunc { ::
struct kvm_s390_vm_cpu_subfunc {
u8 plo[32]; # always valid (ESA/390 feature) u8 plo[32]; # always valid (ESA/390 feature)
u8 ptff[16]; # valid with TOD-clock steering u8 ptff[16]; # valid with TOD-clock steering
u8 kmac[16]; # valid with Message-Security-Assist u8 kmac[16]; # valid with Message-Security-Assist
@ -143,13 +170,14 @@ struct kvm_s390_vm_cpu_subfunc {
u8 kma[16]; # valid with Message-Security-Assist-Extension 8 u8 kma[16]; # valid with Message-Security-Assist-Extension 8
u8 kdsa[16]; # valid with Message-Security-Assist-Extension 9 u8 kdsa[16]; # valid with Message-Security-Assist-Extension 9
u8 reserved[1792]; # reserved for future instructions u8 reserved[1792]; # reserved for future instructions
}; };
Parameters: address of a buffer to load the subfunction blocks from. :Parameters: address of a buffer to load the subfunction blocks from.
Returns: -EFAULT if the given address is not accessible from kernel space. :Returns: -EFAULT if the given address is not accessible from kernel space;
0 in case of success. 0 in case of success.
2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w) 2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w)
-------------------------------------------------------
Allows user space to retrieve or change cpu subfunctions to be indicated for Allows user space to retrieve or change cpu subfunctions to be indicated for
all VCPUs of a VM. This attribute will only be available if kernel and all VCPUs of a VM. This attribute will only be available if kernel and
@ -164,107 +192,125 @@ As long as no data has been written, a read will fail. The IBC will be used
to determine available subfunctions in this case, this will guarantee backward to determine available subfunctions in this case, this will guarantee backward
compatibility. compatibility.
See 2.5. for a description of the parameter struct. See :ref:`KVM_S390_VM_CPU_MACHINE_SUBFUNC` for a
description of the parameter struct.
Parameters: address of a buffer to store/load the subfunction blocks from. :Parameters: address of a buffer to store/load the subfunction blocks from.
Returns: -EFAULT if the given address is not accessible from kernel space. :Returns: -EFAULT if the given address is not accessible from kernel space;
-EINVAL when reading, if there was no write yet. -EINVAL when reading, if there was no write yet;
-EBUSY if at least one VCPU has already been defined. -EBUSY if at least one VCPU has already been defined;
0 in case of success. 0 in case of success.
3. GROUP: KVM_S390_VM_TOD 3. GROUP: KVM_S390_VM_TOD
Architectures: s390 =========================
:Architectures: s390
3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH 3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
------------------------------------
Allows user space to set/get the TOD clock extension (u8) (superseded by Allows user space to set/get the TOD clock extension (u8) (superseded by
KVM_S390_VM_TOD_EXT). KVM_S390_VM_TOD_EXT).
Parameters: address of a buffer in user space to store the data (u8) to :Parameters: address of a buffer in user space to store the data (u8) to
Returns: -EFAULT if the given address is not accessible from kernel space :Returns: -EFAULT if the given address is not accessible from kernel space;
-EINVAL if setting the TOD clock extension to != 0 is not supported -EINVAL if setting the TOD clock extension to != 0 is not supported
3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW 3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
-----------------------------------
Allows user space to set/get bits 0-63 of the TOD clock register as defined in Allows user space to set/get bits 0-63 of the TOD clock register as defined in
the POP (u64). the POP (u64).
Parameters: address of a buffer in user space to store the data (u64) to :Parameters: address of a buffer in user space to store the data (u64) to
Returns: -EFAULT if the given address is not accessible from kernel space :Returns: -EFAULT if the given address is not accessible from kernel space
3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT 3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
-----------------------------------
Allows user space to set/get bits 0-63 of the TOD clock register as defined in Allows user space to set/get bits 0-63 of the TOD clock register as defined in
the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it
also allows user space to get/set it. If the guest CPU model does not support also allows user space to get/set it. If the guest CPU model does not support
it, it is stored as 0 and not allowed to be set to a value != 0. it, it is stored as 0 and not allowed to be set to a value != 0.
Parameters: address of a buffer in user space to store the data :Parameters: address of a buffer in user space to store the data
(kvm_s390_vm_tod_clock) to (kvm_s390_vm_tod_clock) to
Returns: -EFAULT if the given address is not accessible from kernel space :Returns: -EFAULT if the given address is not accessible from kernel space;
-EINVAL if setting the TOD clock extension to != 0 is not supported -EINVAL if setting the TOD clock extension to != 0 is not supported
4. GROUP: KVM_S390_VM_CRYPTO 4. GROUP: KVM_S390_VM_CRYPTO
Architectures: s390 ============================
:Architectures: s390
4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o) 4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o)
------------------------------------------------------
Allows user space to enable aes key wrapping, including generating a new Allows user space to enable aes key wrapping, including generating a new
wrapping key. wrapping key.
Parameters: none :Parameters: none
Returns: 0 :Returns: 0
4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o) 4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o)
------------------------------------------------------
Allows user space to enable dea key wrapping, including generating a new Allows user space to enable dea key wrapping, including generating a new
wrapping key. wrapping key.
Parameters: none :Parameters: none
Returns: 0 :Returns: 0
4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o) 4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o)
-------------------------------------------------------
Allows user space to disable aes key wrapping, clearing the wrapping key. Allows user space to disable aes key wrapping, clearing the wrapping key.
Parameters: none :Parameters: none
Returns: 0 :Returns: 0
4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o) 4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o)
-------------------------------------------------------
Allows user space to disable dea key wrapping, clearing the wrapping key. Allows user space to disable dea key wrapping, clearing the wrapping key.
Parameters: none :Parameters: none
Returns: 0 :Returns: 0
5. GROUP: KVM_S390_VM_MIGRATION 5. GROUP: KVM_S390_VM_MIGRATION
Architectures: s390 ===============================
:Architectures: s390
5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o) 5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o)
------------------------------------------------
Allows userspace to stop migration mode, needed for PGSTE migration. Allows userspace to stop migration mode, needed for PGSTE migration.
Setting this attribute when migration mode is not active will have no Setting this attribute when migration mode is not active will have no
effects. effects.
Parameters: none :Parameters: none
Returns: 0 :Returns: 0
5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o) 5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o)
-------------------------------------------------
Allows userspace to start migration mode, needed for PGSTE migration. Allows userspace to start migration mode, needed for PGSTE migration.
Setting this attribute when migration mode is already active will have Setting this attribute when migration mode is already active will have
no effects. no effects.
Parameters: none :Parameters: none
Returns: -ENOMEM if there is not enough free memory to start migration mode :Returns: -ENOMEM if there is not enough free memory to start migration mode;
-EINVAL if the state of the VM is invalid (e.g. no memory defined) -EINVAL if the state of the VM is invalid (e.g. no memory defined);
0 in case of success. 0 in case of success.
5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o) 5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o)
--------------------------------------------------
Allows userspace to query the status of migration mode. Allows userspace to query the status of migration mode.
Parameters: address of a buffer in user space to store the data (u64) to; :Parameters: address of a buffer in user space to store the data (u64) to;
the data itself is either 0 if migration mode is disabled or 1 the data itself is either 0 if migration mode is disabled or 1
if it is enabled if it is enabled
Returns: -EFAULT if the given address is not accessible from kernel space :Returns: -EFAULT if the given address is not accessible from kernel space;
0 in case of success. 0 in case of success.

View File

@ -1,20 +1,31 @@
.. SPDX-License-Identifier: GPL-2.0
=========================
XICS interrupt controller XICS interrupt controller
=========================
Device type supported: KVM_DEV_TYPE_XICS Device type supported: KVM_DEV_TYPE_XICS
Groups: Groups:
1. KVM_DEV_XICS_GRP_SOURCES 1. KVM_DEV_XICS_GRP_SOURCES
Attributes: One per interrupt source, indexed by the source number. Attributes:
One per interrupt source, indexed by the source number.
2. KVM_DEV_XICS_GRP_CTRL 2. KVM_DEV_XICS_GRP_CTRL
Attributes: Attributes:
2.1 KVM_DEV_XICS_NR_SERVERS (write only)
2.1 KVM_DEV_XICS_NR_SERVERS (write only)
The kvm_device_attr.addr points to a __u32 value which is the number of The kvm_device_attr.addr points to a __u32 value which is the number of
interrupt server numbers (ie, highest possible vcpu id plus one). interrupt server numbers (ie, highest possible vcpu id plus one).
Errors: Errors:
-EINVAL: Value greater than KVM_MAX_VCPU_ID.
-EFAULT: Invalid user pointer for attr->addr. ======= ==========================================
-EBUSY: A vcpu is already connected to the device. -EINVAL Value greater than KVM_MAX_VCPU_ID.
-EFAULT Invalid user pointer for attr->addr.
-EBUSY A vcpu is already connected to the device.
======= ==========================================
This device emulates the XICS (eXternal Interrupt Controller This device emulates the XICS (eXternal Interrupt Controller
Specification) defined in PAPR. The XICS has a set of interrupt Specification) defined in PAPR. The XICS has a set of interrupt
@ -53,24 +64,29 @@ the interrupt source number. The 64 bit state word has the following
bitfields, starting from the least-significant end of the word: bitfields, starting from the least-significant end of the word:
* Destination (server number), 32 bits * Destination (server number), 32 bits
This specifies where the interrupt should be sent, and is the This specifies where the interrupt should be sent, and is the
interrupt server number specified for the destination vcpu. interrupt server number specified for the destination vcpu.
* Priority, 8 bits * Priority, 8 bits
This is the priority specified for this interrupt source, where 0 is This is the priority specified for this interrupt source, where 0 is
the highest priority and 255 is the lowest. An interrupt with a the highest priority and 255 is the lowest. An interrupt with a
priority of 255 will never be delivered. priority of 255 will never be delivered.
* Level sensitive flag, 1 bit * Level sensitive flag, 1 bit
This bit is 1 for a level-sensitive interrupt source, or 0 for This bit is 1 for a level-sensitive interrupt source, or 0 for
edge-sensitive (or MSI). edge-sensitive (or MSI).
* Masked flag, 1 bit * Masked flag, 1 bit
This bit is set to 1 if the interrupt is masked (cannot be delivered This bit is set to 1 if the interrupt is masked (cannot be delivered
regardless of its priority), for example by the ibm,int-off RTAS regardless of its priority), for example by the ibm,int-off RTAS
call, or 0 if it is not masked. call, or 0 if it is not masked.
* Pending flag, 1 bit * Pending flag, 1 bit
This bit is 1 if the source has a pending interrupt, otherwise 0. This bit is 1 if the source has a pending interrupt, otherwise 0.
Only one XICS instance may be created per VM. Only one XICS instance may be created per VM.

View File

@ -1,8 +1,11 @@
.. SPDX-License-Identifier: GPL-2.0
===========================================================
POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1) POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
========================================================== ===========================================================
Device types supported: Device types supported:
KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1 - KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
This device acts as a VM interrupt controller. It provides the KVM This device acts as a VM interrupt controller. It provides the KVM
interface to configure the interrupt sources of a VM in the underlying interface to configure the interrupt sources of a VM in the underlying
@ -64,72 +67,100 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
* Groups: * Groups:
1. KVM_DEV_XIVE_GRP_CTRL 1. KVM_DEV_XIVE_GRP_CTRL
Provides global controls on the device Provides global controls on the device
Attributes: Attributes:
1.1 KVM_DEV_XIVE_RESET (write only) 1.1 KVM_DEV_XIVE_RESET (write only)
Resets the interrupt controller configuration for sources and event Resets the interrupt controller configuration for sources and event
queues. To be used by kexec and kdump. queues. To be used by kexec and kdump.
Errors: none Errors: none
1.2 KVM_DEV_XIVE_EQ_SYNC (write only) 1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
Sync all the sources and queues and mark the EQ pages dirty. This Sync all the sources and queues and mark the EQ pages dirty. This
to make sure that a consistent memory state is captured when to make sure that a consistent memory state is captured when
migrating the VM. migrating the VM.
Errors: none Errors: none
1.3 KVM_DEV_XIVE_NR_SERVERS (write only) 1.3 KVM_DEV_XIVE_NR_SERVERS (write only)
The kvm_device_attr.addr points to a __u32 value which is the number of The kvm_device_attr.addr points to a __u32 value which is the number of
interrupt server numbers (ie, highest possible vcpu id plus one). interrupt server numbers (ie, highest possible vcpu id plus one).
Errors:
-EINVAL: Value greater than KVM_MAX_VCPU_ID.
-EFAULT: Invalid user pointer for attr->addr.
-EBUSY: A vCPU is already connected to the device.
2. KVM_DEV_XIVE_GRP_SOURCE (write only) Errors:
Initializes a new source in the XIVE device and mask it.
======= ==========================================
-EINVAL Value greater than KVM_MAX_VCPU_ID.
-EFAULT Invalid user pointer for attr->addr.
-EBUSY A vCPU is already connected to the device.
======= ==========================================
2. KVM_DEV_XIVE_GRP_SOURCE (write only)
Initializes a new source in the XIVE device and mask it.
Attributes: Attributes:
Interrupt source number (64-bit) Interrupt source number (64-bit)
The kvm_device_attr.addr points to a __u64 value:
bits: | 63 .... 2 | 1 | 0 The kvm_device_attr.addr points to a __u64 value::
values: | unused | level | type
bits: | 63 .... 2 | 1 | 0
values: | unused | level | type
- type: 0:MSI 1:LSI - type: 0:MSI 1:LSI
- level: assertion level in case of an LSI. - level: assertion level in case of an LSI.
Errors:
-E2BIG: Interrupt source number is out of range
-ENOMEM: Could not create a new source block
-EFAULT: Invalid user pointer for attr->addr.
-ENXIO: Could not allocate underlying HW interrupt
3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only) Errors:
Configures source targeting
======= ==========================================
-E2BIG Interrupt source number is out of range
-ENOMEM Could not create a new source block
-EFAULT Invalid user pointer for attr->addr.
-ENXIO Could not allocate underlying HW interrupt
======= ==========================================
3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
Configures source targeting
Attributes: Attributes:
Interrupt source number (64-bit) Interrupt source number (64-bit)
The kvm_device_attr.addr points to a __u64 value:
bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0 The kvm_device_attr.addr points to a __u64 value::
values: | eisn | mask | server | priority
bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
values: | eisn | mask | server | priority
- priority: 0-7 interrupt priority level - priority: 0-7 interrupt priority level
- server: CPU number chosen to handle the interrupt - server: CPU number chosen to handle the interrupt
- mask: mask flag (unused) - mask: mask flag (unused)
- eisn: Effective Interrupt Source Number - eisn: Effective Interrupt Source Number
Errors:
-ENOENT: Unknown source number
-EINVAL: Not initialized source number
-EINVAL: Invalid priority
-EINVAL: Invalid CPU number.
-EFAULT: Invalid user pointer for attr->addr.
-ENXIO: CPU event queues not configured or configuration of the
underlying HW interrupt failed
-EBUSY: No CPU available to serve interrupt
4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write) Errors:
Configures an event queue of a CPU
======= =======================================================
-ENOENT Unknown source number
-EINVAL Not initialized source number
-EINVAL Invalid priority
-EINVAL Invalid CPU number.
-EFAULT Invalid user pointer for attr->addr.
-ENXIO CPU event queues not configured or configuration of the
underlying HW interrupt failed
-EBUSY No CPU available to serve interrupt
======= =======================================================
4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
Configures an event queue of a CPU
Attributes: Attributes:
EQ descriptor identifier (64-bit) EQ descriptor identifier (64-bit)
The EQ descriptor identifier is a tuple (server, priority) :
bits: | 63 .... 32 | 31 .. 3 | 2 .. 0 The EQ descriptor identifier is a tuple (server, priority)::
values: | unused | server | priority
The kvm_device_attr.addr points to : bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
values: | unused | server | priority
The kvm_device_attr.addr points to::
struct kvm_ppc_xive_eq { struct kvm_ppc_xive_eq {
__u32 flags; __u32 flags;
__u32 qshift; __u32 qshift;
@ -138,8 +169,9 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
__u32 qindex; __u32 qindex;
__u8 pad[40]; __u8 pad[40];
}; };
- flags: queue flags - flags: queue flags
KVM_XIVE_EQ_ALWAYS_NOTIFY (required) KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
forces notification without using the coalescing mechanism forces notification without using the coalescing mechanism
provided by the XIVE END ESBs. provided by the XIVE END ESBs.
- qshift: queue size (power of 2) - qshift: queue size (power of 2)
@ -147,22 +179,31 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
- qtoggle: current queue toggle bit - qtoggle: current queue toggle bit
- qindex: current queue index - qindex: current queue index
- pad: reserved for future use - pad: reserved for future use
Errors:
-ENOENT: Invalid CPU number
-EINVAL: Invalid priority
-EINVAL: Invalid flags
-EINVAL: Invalid queue size
-EINVAL: Invalid queue address
-EFAULT: Invalid user pointer for attr->addr.
-EIO: Configuration of the underlying HW failed
5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only) Errors:
Synchronize the source to flush event notifications
======= =========================================
-ENOENT Invalid CPU number
-EINVAL Invalid priority
-EINVAL Invalid flags
-EINVAL Invalid queue size
-EINVAL Invalid queue address
-EFAULT Invalid user pointer for attr->addr.
-EIO Configuration of the underlying HW failed
======= =========================================
5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
Synchronize the source to flush event notifications
Attributes: Attributes:
Interrupt source number (64-bit) Interrupt source number (64-bit)
Errors: Errors:
-ENOENT: Unknown source number
-EINVAL: Not initialized source number ======= =============================
-ENOENT Unknown source number
-EINVAL Not initialized source number
======= =============================
* VCPU state * VCPU state
@ -175,11 +216,12 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
as it synthesizes the priorities of the pending interrupts. We as it synthesizes the priorities of the pending interrupts. We
capture a bit more to report debug information. capture a bit more to report debug information.
KVM_REG_PPC_VP_STATE (2 * 64bits) KVM_REG_PPC_VP_STATE (2 * 64bits)::
bits: | 63 .... 32 | 31 .... 0 |
values: | TIMA word0 | TIMA word1 | bits: | 63 .... 32 | 31 .... 0 |
bits: | 127 .......... 64 | values: | TIMA word0 | TIMA word1 |
values: | unused | bits: | 127 .......... 64 |
values: | unused |
* Migration: * Migration:
@ -196,7 +238,7 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
3. Capture the state of the source targeting, the EQs configuration 3. Capture the state of the source targeting, the EQs configuration
and the state of thread interrupt context registers. and the state of thread interrupt context registers.
Restore is similar : Restore is similar:
1. Restore the EQ configuration. As targeting depends on it. 1. Restore the EQ configuration. As targeting depends on it.
2. Restore targeting 2. Restore targeting

View File

@ -1,3 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
===========================
The KVM halt polling system The KVM halt polling system
=========================== ===========================
@ -68,7 +71,8 @@ steady state polling interval but will only really do a good job for wakeups
which come at an approximately constant rate, otherwise there will be constant which come at an approximately constant rate, otherwise there will be constant
adjustment of the polling interval. adjustment of the polling interval.
[0] total block time: the time between when the halt polling function is [0] total block time:
the time between when the halt polling function is
invoked and a wakeup source received (irrespective of invoked and a wakeup source received (irrespective of
whether the scheduler is invoked within that function). whether the scheduler is invoked within that function).
@ -81,31 +85,32 @@ shrunk. These variables are defined in include/linux/kvm_host.h and as module
parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
powerpc kvm-hv case. powerpc kvm-hv case.
Module Parameter | Description | Default Value +-----------------------+---------------------------+-------------------------+
-------------------------------------------------------------------------------- |Module Parameter | Description | Default Value |
halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT +-----------------------+---------------------------+-------------------------+
| interval which defines | |halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT|
| the ceiling value of the | | | interval which defines | |
| polling interval for | (per arch value) | | the ceiling value of the | |
| each vcpu. | | | polling interval for | (per arch value) |
-------------------------------------------------------------------------------- | | each vcpu. | |
halt_poll_ns_grow | The value by which the | 2 +-----------------------+---------------------------+-------------------------+
| halt polling interval is | |halt_poll_ns_grow | The value by which the | 2 |
| multiplied in the | | | halt polling interval is | |
| grow_halt_poll_ns() | | | multiplied in the | |
| function. | | | grow_halt_poll_ns() | |
-------------------------------------------------------------------------------- | | function. | |
halt_poll_ns_grow_start | The initial value to grow | 10000 +-----------------------+---------------------------+-------------------------+
| to from zero in the | |halt_poll_ns_grow_start| The initial value to grow | 10000 |
| grow_halt_poll_ns() | | | to from zero in the | |
| function. | | | grow_halt_poll_ns() | |
-------------------------------------------------------------------------------- | | function. | |
halt_poll_ns_shrink | The value by which the | 0 +-----------------------+---------------------------+-------------------------+
| halt polling interval is | |halt_poll_ns_shrink | The value by which the | 0 |
| divided in the | | | halt polling interval is | |
| shrink_halt_poll_ns() | | | divided in the | |
| function. | | | shrink_halt_poll_ns() | |
-------------------------------------------------------------------------------- | | function. | |
+-----------------------+---------------------------+-------------------------+
These module parameters can be set from the debugfs files in: These module parameters can be set from the debugfs files in:
@ -117,20 +122,19 @@ Note: that these module parameters are system wide values and are not able to
Further Notes Further Notes
============= =============
- Care should be taken when setting the halt_poll_ns module parameter as a - Care should be taken when setting the halt_poll_ns module parameter as a large value
large value has the potential to drive the cpu usage to 100% on a machine which has the potential to drive the cpu usage to 100% on a machine which would be almost
would be almost entirely idle otherwise. This is because even if a guest has entirely idle otherwise. This is because even if a guest has wakeups during which very
wakeups during which very little work is done and which are quite far apart, if little work is done and which are quite far apart, if the period is shorter than the
the period is shorter than the global max polling interval (halt_poll_ns) then global max polling interval (halt_poll_ns) then the host will always poll for the
the host will always poll for the entire block time and thus cpu utilisation entire block time and thus cpu utilisation will go to 100%.
will go to 100%.
- Halt polling essentially presents a trade off between power usage and latency - Halt polling essentially presents a trade off between power usage and latency and
and the module parameters should be used to tune the affinity for this. Idle the module parameters should be used to tune the affinity for this. Idle cpu time is
cpu time is essentially converted to host kernel time with the aim of decreasing essentially converted to host kernel time with the aim of decreasing latency when
latency when entering the guest. entering the guest.
- Halt polling will only be conducted by the host when no other tasks are - Halt polling will only be conducted by the host when no other tasks are runnable on
runnable on that cpu, otherwise the polling will cease immediately and that cpu, otherwise the polling will cease immediately and schedule will be invoked to
schedule will be invoked to allow that other task to run. Thus this doesn't allow that other task to run. Thus this doesn't allow a guest to denial of service the
allow a guest to denial of service the cpu. cpu.

View File

@ -1,5 +1,9 @@
Linux KVM Hypercall: .. SPDX-License-Identifier: GPL-2.0
=================== ===================
Linux KVM Hypercall
===================
X86: X86:
KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall
instruction. The hypervisor can replace it with instructions that are instruction. The hypervisor can replace it with instructions that are
@ -20,7 +24,7 @@ S390:
For further information on the S390 diagnose call as supported by KVM, For further information on the S390 diagnose call as supported by KVM,
refer to Documentation/virt/kvm/s390-diag.txt. refer to Documentation/virt/kvm/s390-diag.txt.
PowerPC: PowerPC:
It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers. It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
Return value is placed in R3. Return value is placed in R3.
@ -34,7 +38,8 @@ MIPS:
the return value is placed in $2 (v0). the return value is placed in $2 (v0).
KVM Hypercalls Documentation KVM Hypercalls Documentation
=========================== ============================
The template for each hypercall is: The template for each hypercall is:
1. Hypercall name. 1. Hypercall name.
2. Architecture(s) 2. Architecture(s)
@ -43,56 +48,64 @@ The template for each hypercall is:
1. KVM_HC_VAPIC_POLL_IRQ 1. KVM_HC_VAPIC_POLL_IRQ
------------------------ ------------------------
Architecture: x86
Status: active :Architecture: x86
Purpose: Trigger guest exit so that the host can check for pending :Status: active
interrupts on reentry. :Purpose: Trigger guest exit so that the host can check for pending
interrupts on reentry.
2. KVM_HC_MMU_OP 2. KVM_HC_MMU_OP
------------------------ ----------------
Architecture: x86
Status: deprecated. :Architecture: x86
Purpose: Support MMU operations such as writing to PTE, :Status: deprecated.
flushing TLB, release PT. :Purpose: Support MMU operations such as writing to PTE,
flushing TLB, release PT.
3. KVM_HC_FEATURES 3. KVM_HC_FEATURES
------------------------ ------------------
Architecture: PPC
Status: active :Architecture: PPC
Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid :Status: active
used to enumerate which hypercalls are available. On PPC, either device tree :Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
based lookup ( which is also what EPAPR dictates) OR KVM specific enumeration used to enumerate which hypercalls are available. On PPC, either
mechanism (which is this hypercall) can be used. device tree based lookup ( which is also what EPAPR dictates)
OR KVM specific enumeration mechanism (which is this hypercall)
can be used.
4. KVM_HC_PPC_MAP_MAGIC_PAGE 4. KVM_HC_PPC_MAP_MAGIC_PAGE
------------------------ ----------------------------
Architecture: PPC
Status: active :Architecture: PPC
Purpose: To enable communication between the hypervisor and guest there is a :Status: active
shared page that contains parts of supervisor visible register state. :Purpose: To enable communication between the hypervisor and guest there is a
The guest can map this shared page to access its supervisor register through shared page that contains parts of supervisor visible register state.
memory using this hypercall. The guest can map this shared page to access its supervisor register
through memory using this hypercall.
5. KVM_HC_KICK_CPU 5. KVM_HC_KICK_CPU
------------------------ ------------------
Architecture: x86
Status: active :Architecture: x86
Purpose: Hypercall used to wakeup a vcpu from HLT state :Status: active
Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest :Purpose: Hypercall used to wakeup a vcpu from HLT state
kernel mode for an event to occur (ex: a spinlock to become available) can :Usage example:
execute HLT instruction once it has busy-waited for more than a threshold A vcpu of a paravirtualized guest that is busywaiting in guest
time-interval. Execution of HLT instruction would cause the hypervisor to put kernel mode for an event to occur (ex: a spinlock to become available) can
the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the execute HLT instruction once it has busy-waited for more than a threshold
same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall, time-interval. Execution of HLT instruction would cause the hypervisor to put
specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0) the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
is used in the hypercall for future use. same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
is used in the hypercall for future use.
6. KVM_HC_CLOCK_PAIRING 6. KVM_HC_CLOCK_PAIRING
------------------------ -----------------------
Architecture: x86 :Architecture: x86
Status: active :Status: active
Purpose: Hypercall used to synchronize host and guest clocks. :Purpose: Hypercall used to synchronize host and guest clocks.
Usage: Usage:
a0: guest physical address where host copies a0: guest physical address where host copies
@ -101,6 +114,8 @@ a0: guest physical address where host copies
a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0) a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0)
is supported (corresponding to the host's CLOCK_REALTIME clock). is supported (corresponding to the host's CLOCK_REALTIME clock).
::
struct kvm_clock_pairing { struct kvm_clock_pairing {
__s64 sec; __s64 sec;
__s64 nsec; __s64 nsec;
@ -123,15 +138,16 @@ Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK. or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
6. KVM_HC_SEND_IPI 6. KVM_HC_SEND_IPI
------------------------ ------------------
Architecture: x86
Status: active
Purpose: Send IPIs to multiple vCPUs.
a0: lower part of the bitmap of destination APIC IDs :Architecture: x86
a1: higher part of the bitmap of destination APIC IDs :Status: active
a2: the lowest APIC ID in bitmap :Purpose: Send IPIs to multiple vCPUs.
a3: APIC ICR
- a0: lower part of the bitmap of destination APIC IDs
- a1: higher part of the bitmap of destination APIC IDs
- a2: the lowest APIC ID in bitmap
- a3: APIC ICR
The hypercall lets a guest send multicast IPIs, with at most 128 The hypercall lets a guest send multicast IPIs, with at most 128
128 destinations per hypercall in 64-bit mode and 64 vCPUs per 128 destinations per hypercall in 64-bit mode and 64 vCPUs per
@ -143,12 +159,13 @@ corresponds to the APIC ID a2+1, and so on.
Returns the number of CPUs to which the IPIs were delivered successfully. Returns the number of CPUs to which the IPIs were delivered successfully.
7. KVM_HC_SCHED_YIELD 7. KVM_HC_SCHED_YIELD
------------------------ ---------------------
Architecture: x86
Status: active :Architecture: x86
Purpose: Hypercall used to yield if the IPI target vCPU is preempted :Status: active
:Purpose: Hypercall used to yield if the IPI target vCPU is preempted
a0: destination APIC ID a0: destination APIC ID
Usage example: When sending a call-function IPI-many to vCPUs, yield if :Usage example: When sending a call-function IPI-many to vCPUs, yield if
any of the IPI target vCPUs was preempted. any of the IPI target vCPUs was preempted.

View File

@ -7,6 +7,22 @@ KVM
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 2
api
amd-memory-encryption amd-memory-encryption
cpuid cpuid
halt-polling
hypercalls
locking
mmu
msr
nested-vmx
ppc-pv
s390-diag
timekeeping
vcpu-requests vcpu-requests
review-checklist
arm/index
devices/index

View File

@ -0,0 +1,243 @@
.. SPDX-License-Identifier: GPL-2.0
=================
KVM Lock Overview
=================
1. Acquisition Orders
---------------------
The acquisition orders for mutexes are as follows:
- kvm->lock is taken outside vcpu->mutex
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
them together is quite rare.
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
Everything else is a leaf: no other lock is taken inside the critical
sections.
2. Exception
------------
Fast page fault:
Fast page fault is the fast path which fixes the guest page fault out of
the mmu-lock on x86. Currently, the page fault can be fast in one of the
following two cases:
1. Access Tracking: The SPTE is not present, but it is marked for access
tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
restore the saved R/X bits. This is described in more detail later below.
2. Write-Protection: The SPTE is present and the fault is
caused by write-protect. That means we just need to change the W bit of
the spte.
What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
SPTE_MMU_WRITEABLE bit on the spte:
- SPTE_HOST_WRITEABLE means the gfn is writable on host.
- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
the gfn is writable on guest mmu and it is not write-protected by shadow
page write-protection.
On fast page fault path, we will use cmpxchg to atomically set the spte W
bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
is safe because whenever changing these bits can be detected by cmpxchg.
But we need carefully check these cases:
1) The mapping from gfn to pfn
The mapping from gfn to pfn may be changed since we can only ensure the pfn
is not changed during cmpxchg. This is a ABA problem, for example, below case
will happen:
+------------------------------------------------------------------------+
| At the beginning:: |
| |
| gpte = gfn1 |
| gfn1 is mapped to pfn1 on host |
| spte is the shadow page table entry corresponding with gpte and |
| spte = pfn1 |
+------------------------------------------------------------------------+
| On fast page fault path: |
+------------------------------------+-----------------------------------+
| CPU 0: | CPU 1: |
+------------------------------------+-----------------------------------+
| :: | |
| | |
| old_spte = *spte; | |
+------------------------------------+-----------------------------------+
| | pfn1 is swapped out:: |
| | |
| | spte = 0; |
| | |
| | pfn1 is re-alloced for gfn2. |
| | |
| | gpte is changed to point to |
| | gfn2 by the guest:: |
| | |
| | spte = pfn1; |
+------------------------------------+-----------------------------------+
| :: |
| |
| if (cmpxchg(spte, old_spte, old_spte+W) |
| mark_page_dirty(vcpu->kvm, gfn1) |
| OOPS!!! |
+------------------------------------------------------------------------+
We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
For direct sp, we can easily avoid it since the spte of direct sp is fixed
to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
to pin gfn to pfn, because after gfn_to_pfn_atomic():
- We have held the refcount of pfn that means the pfn can not be freed and
be reused for another gfn.
- The pfn is writable that means it can not be shared between different gfns
by KSM.
Then, we can ensure the dirty bitmaps is correctly set for a gfn.
Currently, to simplify the whole things, we disable fast page fault for
indirect shadow page.
2) Dirty bit tracking
In the origin code, the spte can be fast updated (non-atomically) if the
spte is read-only and the Accessed bit has already been set since the
Accessed bit and Dirty bit can not be lost.
But it is not true after fast page fault since the spte can be marked
writable between reading spte and updating spte. Like below case:
+------------------------------------------------------------------------+
| At the beginning:: |
| |
| spte.W = 0 |
| spte.Accessed = 1 |
+------------------------------------+-----------------------------------+
| CPU 0: | CPU 1: |
+------------------------------------+-----------------------------------+
| In mmu_spte_clear_track_bits():: | |
| | |
| old_spte = *spte; | |
| | |
| | |
| /* 'if' condition is satisfied. */| |
| if (old_spte.Accessed == 1 && | |
| old_spte.W == 0) | |
| spte = 0ull; | |
+------------------------------------+-----------------------------------+
| | on fast page fault path:: |
| | |
| | spte.W = 1 |
| | |
| | memory write on the spte:: |
| | |
| | spte.Dirty = 1 |
+------------------------------------+-----------------------------------+
| :: | |
| | |
| else | |
| old_spte = xchg(spte, 0ull) | |
| if (old_spte.Accessed == 1) | |
| kvm_set_pfn_accessed(spte.pfn);| |
| if (old_spte.Dirty == 1) | |
| kvm_set_pfn_dirty(spte.pfn); | |
| OOPS!!! | |
+------------------------------------+-----------------------------------+
The Dirty bit is lost in this case.
In order to avoid this kind of issue, we always treat the spte as "volatile"
if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
the spte is always atomically updated in this case.
3) flush tlbs due to spte updated
If the spte is updated from writable to readonly, we should flush all TLBs,
otherwise rmap_write_protect will find a read-only spte, even though the
writable spte might be cached on a CPU's TLB.
As mentioned before, the spte can be updated to writable out of mmu-lock on
fast page fault path, in order to easily audit the path, we see if TLBs need
be flushed caused by this reason in mmu_spte_update() since this is a common
function to update spte (present -> present).
Since the spte is "volatile" if it can be updated out of mmu-lock, we always
atomically update the spte, the race caused by fast page fault can be avoided,
See the comments in spte_has_volatile_bits() and mmu_spte_update().
Lockless Access Tracking:
This is used for Intel CPUs that are using EPT but do not support the EPT A/D
bits. In this case, when the KVM MMU notifier is called to track accesses to a
page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
by clearing the RWX bits in the PTE and storing the original R & X bits in
some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
PTE (using the ignored bit 62). When the VM tries to access the page later on,
a fault is generated and the fast page fault mechanism described above is used
to atomically restore the PTE to a Present state. The W bit is not saved when
the PTE is marked for access tracking and during restoration to the Present
state, the W bit is set depending on whether or not it was a write access. If
it wasn't, then the W bit will remain clear until a write access happens, at
which time it will be set using the Dirty tracking mechanism described above.
3. Reference
------------
:Name: kvm_lock
:Type: mutex
:Arch: any
:Protects: - vm_list
:Name: kvm_count_lock
:Type: raw_spinlock_t
:Arch: any
:Protects: - hardware virtualization enable/disable
:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
migration.
:Name: kvm_arch::tsc_write_lock
:Type: raw_spinlock
:Arch: x86
:Protects: - kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
- tsc offset in vmcb
:Comment: 'raw' because updating the tsc offsets must not be preempted.
:Name: kvm->mmu_lock
:Type: spinlock_t
:Arch: any
:Protects: -shadow page/shadow tlb entry
:Comment: it is a spinlock since it is used in mmu notifier.
:Name: kvm->srcu
:Type: srcu lock
:Arch: any
:Protects: - kvm->memslots
- kvm->buses
:Comment: The srcu read lock must be held while accessing memslots (e.g.
when using gfn_to_* functions) and while accessing in-kernel
MMIO/PIO address->device structure mapping (kvm->buses).
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
if it is needed by multiple functions.
:Name: blocked_vcpu_on_cpu_lock
:Type: spinlock_t
:Arch: x86
:Protects: blocked_vcpu_on_cpu
:Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
When VT-d posted-interrupts is supported and the VM has assigned
devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
wakeup notification event since external interrupts from the
assigned devices happens, we will find the vCPU on the list to
wakeup.

View File

@ -1,215 +0,0 @@
KVM Lock Overview
=================
1. Acquisition Orders
---------------------
The acquisition orders for mutexes are as follows:
- kvm->lock is taken outside vcpu->mutex
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
them together is quite rare.
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
Everything else is a leaf: no other lock is taken inside the critical
sections.
2: Exception
------------
Fast page fault:
Fast page fault is the fast path which fixes the guest page fault out of
the mmu-lock on x86. Currently, the page fault can be fast in one of the
following two cases:
1. Access Tracking: The SPTE is not present, but it is marked for access
tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
restore the saved R/X bits. This is described in more detail later below.
2. Write-Protection: The SPTE is present and the fault is
caused by write-protect. That means we just need to change the W bit of the
spte.
What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
SPTE_MMU_WRITEABLE bit on the spte:
- SPTE_HOST_WRITEABLE means the gfn is writable on host.
- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
the gfn is writable on guest mmu and it is not write-protected by shadow
page write-protection.
On fast page fault path, we will use cmpxchg to atomically set the spte W
bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
is safe because whenever changing these bits can be detected by cmpxchg.
But we need carefully check these cases:
1): The mapping from gfn to pfn
The mapping from gfn to pfn may be changed since we can only ensure the pfn
is not changed during cmpxchg. This is a ABA problem, for example, below case
will happen:
At the beginning:
gpte = gfn1
gfn1 is mapped to pfn1 on host
spte is the shadow page table entry corresponding with gpte and
spte = pfn1
VCPU 0 VCPU0
on fast page fault path:
old_spte = *spte;
pfn1 is swapped out:
spte = 0;
pfn1 is re-alloced for gfn2.
gpte is changed to point to
gfn2 by the guest:
spte = pfn1;
if (cmpxchg(spte, old_spte, old_spte+W)
mark_page_dirty(vcpu->kvm, gfn1)
OOPS!!!
We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
For direct sp, we can easily avoid it since the spte of direct sp is fixed
to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
to pin gfn to pfn, because after gfn_to_pfn_atomic():
- We have held the refcount of pfn that means the pfn can not be freed and
be reused for another gfn.
- The pfn is writable that means it can not be shared between different gfns
by KSM.
Then, we can ensure the dirty bitmaps is correctly set for a gfn.
Currently, to simplify the whole things, we disable fast page fault for
indirect shadow page.
2): Dirty bit tracking
In the origin code, the spte can be fast updated (non-atomically) if the
spte is read-only and the Accessed bit has already been set since the
Accessed bit and Dirty bit can not be lost.
But it is not true after fast page fault since the spte can be marked
writable between reading spte and updating spte. Like below case:
At the beginning:
spte.W = 0
spte.Accessed = 1
VCPU 0 VCPU0
In mmu_spte_clear_track_bits():
old_spte = *spte;
/* 'if' condition is satisfied. */
if (old_spte.Accessed == 1 &&
old_spte.W == 0)
spte = 0ull;
on fast page fault path:
spte.W = 1
memory write on the spte:
spte.Dirty = 1
else
old_spte = xchg(spte, 0ull)
if (old_spte.Accessed == 1)
kvm_set_pfn_accessed(spte.pfn);
if (old_spte.Dirty == 1)
kvm_set_pfn_dirty(spte.pfn);
OOPS!!!
The Dirty bit is lost in this case.
In order to avoid this kind of issue, we always treat the spte as "volatile"
if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
the spte is always atomically updated in this case.
3): flush tlbs due to spte updated
If the spte is updated from writable to readonly, we should flush all TLBs,
otherwise rmap_write_protect will find a read-only spte, even though the
writable spte might be cached on a CPU's TLB.
As mentioned before, the spte can be updated to writable out of mmu-lock on
fast page fault path, in order to easily audit the path, we see if TLBs need
be flushed caused by this reason in mmu_spte_update() since this is a common
function to update spte (present -> present).
Since the spte is "volatile" if it can be updated out of mmu-lock, we always
atomically update the spte, the race caused by fast page fault can be avoided,
See the comments in spte_has_volatile_bits() and mmu_spte_update().
Lockless Access Tracking:
This is used for Intel CPUs that are using EPT but do not support the EPT A/D
bits. In this case, when the KVM MMU notifier is called to track accesses to a
page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
by clearing the RWX bits in the PTE and storing the original R & X bits in
some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
PTE (using the ignored bit 62). When the VM tries to access the page later on,
a fault is generated and the fast page fault mechanism described above is used
to atomically restore the PTE to a Present state. The W bit is not saved when
the PTE is marked for access tracking and during restoration to the Present
state, the W bit is set depending on whether or not it was a write access. If
it wasn't, then the W bit will remain clear until a write access happens, at
which time it will be set using the Dirty tracking mechanism described above.
3. Reference
------------
Name: kvm_lock
Type: mutex
Arch: any
Protects: - vm_list
Name: kvm_count_lock
Type: raw_spinlock_t
Arch: any
Protects: - hardware virtualization enable/disable
Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
migration.
Name: kvm_arch::tsc_write_lock
Type: raw_spinlock
Arch: x86
Protects: - kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
- tsc offset in vmcb
Comment: 'raw' because updating the tsc offsets must not be preempted.
Name: kvm->mmu_lock
Type: spinlock_t
Arch: any
Protects: -shadow page/shadow tlb entry
Comment: it is a spinlock since it is used in mmu notifier.
Name: kvm->srcu
Type: srcu lock
Arch: any
Protects: - kvm->memslots
- kvm->buses
Comment: The srcu read lock must be held while accessing memslots (e.g.
when using gfn_to_* functions) and while accessing in-kernel
MMIO/PIO address->device structure mapping (kvm->buses).
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
if it is needed by multiple functions.
Name: blocked_vcpu_on_cpu_lock
Type: spinlock_t
Arch: x86
Protects: blocked_vcpu_on_cpu
Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
When VT-d posted-interrupts is supported and the VM has assigned
devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
wakeup notification event since external interrupts from the
assigned devices happens, we will find the vCPU on the list to
wakeup.

View File

@ -1,3 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
======================
The x86 kvm shadow mmu The x86 kvm shadow mmu
====================== ======================
@ -7,27 +10,37 @@ physical addresses to host physical addresses.
The mmu code attempts to satisfy the following requirements: The mmu code attempts to satisfy the following requirements:
- correctness: the guest should not be able to determine that it is running - correctness:
the guest should not be able to determine that it is running
on an emulated mmu except for timing (we attempt to comply on an emulated mmu except for timing (we attempt to comply
with the specification, not emulate the characteristics of with the specification, not emulate the characteristics of
a particular implementation such as tlb size) a particular implementation such as tlb size)
- security: the guest must not be able to touch host memory not assigned - security:
the guest must not be able to touch host memory not assigned
to it to it
- performance: minimize the performance penalty imposed by the mmu - performance:
- scaling: need to scale to large memory and large vcpu guests minimize the performance penalty imposed by the mmu
- hardware: support the full range of x86 virtualization hardware - scaling:
- integration: Linux memory management code must be in control of guest memory need to scale to large memory and large vcpu guests
- hardware:
support the full range of x86 virtualization hardware
- integration:
Linux memory management code must be in control of guest memory
so that swapping, page migration, page merging, transparent so that swapping, page migration, page merging, transparent
hugepages, and similar features work without change hugepages, and similar features work without change
- dirty tracking: report writes to guest memory to enable live migration - dirty tracking:
report writes to guest memory to enable live migration
and framebuffer-based displays and framebuffer-based displays
- footprint: keep the amount of pinned kernel memory low (most memory - footprint:
keep the amount of pinned kernel memory low (most memory
should be shrinkable) should be shrinkable)
- reliability: avoid multipage or GFP_ATOMIC allocations - reliability:
avoid multipage or GFP_ATOMIC allocations
Acronyms Acronyms
======== ========
==== ====================================================================
pfn host page frame number pfn host page frame number
hpa host physical address hpa host physical address
hva host virtual address hva host virtual address
@ -41,6 +54,7 @@ pte page table entry (used also to refer generically to paging structure
gpte guest pte (referring to gfns) gpte guest pte (referring to gfns)
spte shadow pte (referring to pfns) spte shadow pte (referring to pfns)
tdp two dimensional paging (vendor neutral term for NPT and EPT) tdp two dimensional paging (vendor neutral term for NPT and EPT)
==== ====================================================================
Virtual and real hardware supported Virtual and real hardware supported
=================================== ===================================
@ -90,11 +104,13 @@ Events
The mmu is driven by events, some from the guest, some from the host. The mmu is driven by events, some from the guest, some from the host.
Guest generated events: Guest generated events:
- writes to control registers (especially cr3) - writes to control registers (especially cr3)
- invlpg/invlpga instruction execution - invlpg/invlpga instruction execution
- access to missing or protected translations - access to missing or protected translations
Host generated events: Host generated events:
- changes in the gpa->hpa translation (either through gpa->hva changes or - changes in the gpa->hpa translation (either through gpa->hva changes or
through hva->hpa changes) through hva->hpa changes)
- memory pressure (the shrinker) - memory pressure (the shrinker)
@ -117,16 +133,19 @@ Leaf ptes point at guest pages.
The following table shows translations encoded by leaf ptes, with higher-level The following table shows translations encoded by leaf ptes, with higher-level
translations in parentheses: translations in parentheses:
Non-nested guests: Non-nested guests::
nonpaging: gpa->hpa nonpaging: gpa->hpa
paging: gva->gpa->hpa paging: gva->gpa->hpa
paging, tdp: (gva->)gpa->hpa paging, tdp: (gva->)gpa->hpa
Nested guests:
Nested guests::
non-tdp: ngva->gpa->hpa (*) non-tdp: ngva->gpa->hpa (*)
tdp: (ngva->)ngpa->gpa->hpa tdp: (ngva->)ngpa->gpa->hpa
(*) the guest hypervisor will encode the ngva->gpa translation into its page (*) the guest hypervisor will encode the ngva->gpa translation into its page
tables if npt is not present tables if npt is not present
Shadow pages contain the following information: Shadow pages contain the following information:
role.level: role.level:
@ -291,28 +310,41 @@ Handling a page fault is performed as follows:
- if the RSV bit of the error code is set, the page fault is caused by guest - if the RSV bit of the error code is set, the page fault is caused by guest
accessing MMIO and cached MMIO information is available. accessing MMIO and cached MMIO information is available.
- walk shadow page table - walk shadow page table
- check for valid generation number in the spte (see "Fast invalidation of - check for valid generation number in the spte (see "Fast invalidation of
MMIO sptes" below) MMIO sptes" below)
- cache the information to vcpu->arch.mmio_gva, vcpu->arch.mmio_access and - cache the information to vcpu->arch.mmio_gva, vcpu->arch.mmio_access and
vcpu->arch.mmio_gfn, and call the emulator vcpu->arch.mmio_gfn, and call the emulator
- If both P bit and R/W bit of error code are set, this could possibly - If both P bit and R/W bit of error code are set, this could possibly
be handled as a "fast page fault" (fixed without taking the MMU lock). See be handled as a "fast page fault" (fixed without taking the MMU lock). See
the description in Documentation/virt/kvm/locking.txt. the description in Documentation/virt/kvm/locking.txt.
- if needed, walk the guest page tables to determine the guest translation - if needed, walk the guest page tables to determine the guest translation
(gva->gpa or ngpa->gpa) (gva->gpa or ngpa->gpa)
- if permissions are insufficient, reflect the fault back to the guest - if permissions are insufficient, reflect the fault back to the guest
- determine the host page - determine the host page
- if this is an mmio request, there is no host page; cache the info to - if this is an mmio request, there is no host page; cache the info to
vcpu->arch.mmio_gva, vcpu->arch.mmio_access and vcpu->arch.mmio_gfn vcpu->arch.mmio_gva, vcpu->arch.mmio_access and vcpu->arch.mmio_gfn
- walk the shadow page table to find the spte for the translation, - walk the shadow page table to find the spte for the translation,
instantiating missing intermediate page tables as necessary instantiating missing intermediate page tables as necessary
- If this is an mmio request, cache the mmio info to the spte and set some - If this is an mmio request, cache the mmio info to the spte and set some
reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask) reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask)
- try to unsynchronize the page - try to unsynchronize the page
- if successful, we can let the guest continue and modify the gpte - if successful, we can let the guest continue and modify the gpte
- emulate the instruction - emulate the instruction
- if failed, unshadow the page and let the guest continue - if failed, unshadow the page and let the guest continue
- update any translations that were modified by the instruction - update any translations that were modified by the instruction
invlpg handling: invlpg handling:
@ -324,10 +356,12 @@ invlpg handling:
Guest control register updates: Guest control register updates:
- mov to cr3 - mov to cr3
- look up new shadow roots - look up new shadow roots
- synchronize newly reachable shadow pages - synchronize newly reachable shadow pages
- mov to cr0/cr4/efer - mov to cr0/cr4/efer
- set up mmu context for new paging mode - set up mmu context for new paging mode
- look up new shadow roots - look up new shadow roots
- synchronize newly reachable shadow pages - synchronize newly reachable shadow pages
@ -358,6 +392,7 @@ on fault type:
(user write faults generate a #PF) (user write faults generate a #PF)
In the first case there are two additional complications: In the first case there are two additional complications:
- if CR4.SMEP is enabled: since we've turned the page into a kernel page, - if CR4.SMEP is enabled: since we've turned the page into a kernel page,
the kernel may now execute it. We handle this by also setting spte.nx. the kernel may now execute it. We handle this by also setting spte.nx.
If we get a user fetch or read fault, we'll change spte.u=1 and If we get a user fetch or read fault, we'll change spte.u=1 and
@ -446,4 +481,3 @@ Further reading
- NPT presentation from KVM Forum 2008 - NPT presentation from KVM Forum 2008
http://www.linux-kvm.org/images/c/c8/KvmForum2008%24kdf2008_21.pdf http://www.linux-kvm.org/images/c/c8/KvmForum2008%24kdf2008_21.pdf

View File

@ -1,6 +1,10 @@
KVM-specific MSRs. .. SPDX-License-Identifier: GPL-2.0
Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
===================================================== =================
KVM-specific MSRs
=================
:Author: Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
KVM makes use of some custom MSRs to service some requests. KVM makes use of some custom MSRs to service some requests.
@ -9,34 +13,39 @@ Custom MSRs have a range reserved for them, that goes from
but they are deprecated and their use is discouraged. but they are deprecated and their use is discouraged.
Custom MSR list Custom MSR list
-------- ---------------
The current supported Custom MSR list is: The current supported Custom MSR list is:
MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00 MSR_KVM_WALL_CLOCK_NEW:
0x4b564d00
data: 4-byte alignment physical address of a memory area which must be data:
4-byte alignment physical address of a memory area which must be
in guest RAM. This memory is expected to hold a copy of the following in guest RAM. This memory is expected to hold a copy of the following
structure: structure::
struct pvclock_wall_clock { struct pvclock_wall_clock {
u32 version; u32 version;
u32 sec; u32 sec;
u32 nsec; u32 nsec;
} __attribute__((__packed__)); } __attribute__((__packed__));
whose data will be filled in by the hypervisor. The hypervisor is only whose data will be filled in by the hypervisor. The hypervisor is only
guaranteed to update this data at the moment of MSR write. guaranteed to update this data at the moment of MSR write.
Users that want to reliably query this information more than once have Users that want to reliably query this information more than once have
to write more than once to this MSR. Fields have the following meanings: to write more than once to this MSR. Fields have the following meanings:
version: guest has to check version before and after grabbing version:
guest has to check version before and after grabbing
time information and check that they are both equal and even. time information and check that they are both equal and even.
An odd version indicates an in-progress update. An odd version indicates an in-progress update.
sec: number of seconds for wallclock at time of boot. sec:
number of seconds for wallclock at time of boot.
nsec: number of nanoseconds for wallclock at time of boot. nsec:
number of nanoseconds for wallclock at time of boot.
In order to get the current wallclock time, the system_time from In order to get the current wallclock time, the system_time from
MSR_KVM_SYSTEM_TIME_NEW needs to be added. MSR_KVM_SYSTEM_TIME_NEW needs to be added.
@ -47,13 +56,15 @@ MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
leaf prior to usage. leaf prior to usage.
MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01 MSR_KVM_SYSTEM_TIME_NEW:
0x4b564d01
data: 4-byte aligned physical address of a memory area which must be in data:
4-byte aligned physical address of a memory area which must be in
guest RAM, plus an enable bit in bit 0. This memory is expected to hold guest RAM, plus an enable bit in bit 0. This memory is expected to hold
a copy of the following structure: a copy of the following structure::
struct pvclock_vcpu_time_info { struct pvclock_vcpu_time_info {
u32 version; u32 version;
u32 pad0; u32 pad0;
u64 tsc_timestamp; u64 tsc_timestamp;
@ -62,7 +73,7 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
s8 tsc_shift; s8 tsc_shift;
u8 flags; u8 flags;
u8 pad[2]; u8 pad[2];
} __attribute__((__packed__)); /* 32 bytes */ } __attribute__((__packed__)); /* 32 bytes */
whose data will be filled in by the hypervisor periodically. Only one whose data will be filled in by the hypervisor periodically. Only one
write, or registration, is needed for each VCPU. The interval between write, or registration, is needed for each VCPU. The interval between
@ -72,23 +83,28 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
Fields have the following meanings: Fields have the following meanings:
version: guest has to check version before and after grabbing version:
guest has to check version before and after grabbing
time information and check that they are both equal and even. time information and check that they are both equal and even.
An odd version indicates an in-progress update. An odd version indicates an in-progress update.
tsc_timestamp: the tsc value at the current VCPU at the time tsc_timestamp:
the tsc value at the current VCPU at the time
of the update of this structure. Guests can subtract this value of the update of this structure. Guests can subtract this value
from current tsc to derive a notion of elapsed time since the from current tsc to derive a notion of elapsed time since the
structure update. structure update.
system_time: a host notion of monotonic time, including sleep system_time:
a host notion of monotonic time, including sleep
time at the time this structure was last updated. Unit is time at the time this structure was last updated. Unit is
nanoseconds. nanoseconds.
tsc_to_system_mul: multiplier to be used when converting tsc_to_system_mul:
multiplier to be used when converting
tsc-related quantity to nanoseconds tsc-related quantity to nanoseconds
tsc_shift: shift to be used when converting tsc-related tsc_shift:
shift to be used when converting tsc-related
quantity to nanoseconds. This shift will ensure that quantity to nanoseconds. This shift will ensure that
multiplication with tsc_to_system_mul does not overflow. multiplication with tsc_to_system_mul does not overflow.
A positive value denotes a left shift, a negative value A positive value denotes a left shift, a negative value
@ -96,7 +112,7 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
The conversion from tsc to nanoseconds involves an additional The conversion from tsc to nanoseconds involves an additional
right shift by 32 bits. With this information, guests can right shift by 32 bits. With this information, guests can
derive per-CPU time by doing: derive per-CPU time by doing::
time = (current_tsc - tsc_timestamp) time = (current_tsc - tsc_timestamp)
if (tsc_shift >= 0) if (tsc_shift >= 0)
@ -106,29 +122,34 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
time = (time * tsc_to_system_mul) >> 32 time = (time * tsc_to_system_mul) >> 32
time = time + system_time time = time + system_time
flags: bits in this field indicate extended capabilities flags:
bits in this field indicate extended capabilities
coordinated between the guest and the hypervisor. Availability coordinated between the guest and the hypervisor. Availability
of specific flags has to be checked in 0x40000001 cpuid leaf. of specific flags has to be checked in 0x40000001 cpuid leaf.
Current flags are: Current flags are:
flag bit | cpuid bit | meaning
------------------------------------------------------------- +-----------+--------------+----------------------------------+
| | time measures taken across | flag bit | cpuid bit | meaning |
0 | 24 | multiple cpus are guaranteed to +-----------+--------------+----------------------------------+
| | be monotonic | | | time measures taken across |
------------------------------------------------------------- | 0 | 24 | multiple cpus are guaranteed to |
| | guest vcpu has been paused by | | | be monotonic |
1 | N/A | the host +-----------+--------------+----------------------------------+
| | See 4.70 in api.txt | | | guest vcpu has been paused by |
------------------------------------------------------------- | 1 | N/A | the host |
| | | See 4.70 in api.txt |
+-----------+--------------+----------------------------------+
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
leaf prior to usage. leaf prior to usage.
MSR_KVM_WALL_CLOCK: 0x11 MSR_KVM_WALL_CLOCK:
0x11
data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead. data and functioning:
same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
This MSR falls outside the reserved KVM range and may be removed in the This MSR falls outside the reserved KVM range and may be removed in the
future. Its usage is deprecated. future. Its usage is deprecated.
@ -136,9 +157,11 @@ MSR_KVM_WALL_CLOCK: 0x11
Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
leaf prior to usage. leaf prior to usage.
MSR_KVM_SYSTEM_TIME: 0x12 MSR_KVM_SYSTEM_TIME:
0x12
data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead. data and functioning:
same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
This MSR falls outside the reserved KVM range and may be removed in the This MSR falls outside the reserved KVM range and may be removed in the
future. Its usage is deprecated. future. Its usage is deprecated.
@ -146,7 +169,7 @@ MSR_KVM_SYSTEM_TIME: 0x12
Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
leaf prior to usage. leaf prior to usage.
The suggested algorithm for detecting kvmclock presence is then: The suggested algorithm for detecting kvmclock presence is then::
if (!kvm_para_available()) /* refer to cpuid.txt */ if (!kvm_para_available()) /* refer to cpuid.txt */
return NON_PRESENT; return NON_PRESENT;
@ -163,8 +186,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
} else } else
return NON_PRESENT; return NON_PRESENT;
MSR_KVM_ASYNC_PF_EN: 0x4b564d02 MSR_KVM_ASYNC_PF_EN:
data: Bits 63-6 hold 64-byte aligned physical address of a 0x4b564d02
data:
Bits 63-6 hold 64-byte aligned physical address of a
64 byte memory area which must be in guest RAM and must be 64 byte memory area which must be in guest RAM and must be
zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1 zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
when asynchronous page faults are enabled on the vcpu 0 when when asynchronous page faults are enabled on the vcpu 0 when
@ -200,20 +226,22 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
Currently type 2 APF will be always delivered on the same vcpu as Currently type 2 APF will be always delivered on the same vcpu as
type 1 was, but guest should not rely on that. type 1 was, but guest should not rely on that.
MSR_KVM_STEAL_TIME: 0x4b564d03 MSR_KVM_STEAL_TIME:
0x4b564d03
data: 64-byte alignment physical address of a memory area which must be data:
64-byte alignment physical address of a memory area which must be
in guest RAM, plus an enable bit in bit 0. This memory is expected to in guest RAM, plus an enable bit in bit 0. This memory is expected to
hold a copy of the following structure: hold a copy of the following structure::
struct kvm_steal_time { struct kvm_steal_time {
__u64 steal; __u64 steal;
__u32 version; __u32 version;
__u32 flags; __u32 flags;
__u8 preempted; __u8 preempted;
__u8 u8_pad[3]; __u8 u8_pad[3];
__u32 pad[11]; __u32 pad[11];
} }
whose data will be filled in by the hypervisor periodically. Only one whose data will be filled in by the hypervisor periodically. Only one
write, or registration, is needed for each VCPU. The interval between write, or registration, is needed for each VCPU. The interval between
@ -224,25 +252,32 @@ MSR_KVM_STEAL_TIME: 0x4b564d03
Fields have the following meanings: Fields have the following meanings:
version: a sequence counter. In other words, guest has to check version:
a sequence counter. In other words, guest has to check
this field before and after grabbing time information and make this field before and after grabbing time information and make
sure they are both equal and even. An odd version indicates an sure they are both equal and even. An odd version indicates an
in-progress update. in-progress update.
flags: At this point, always zero. May be used to indicate flags:
At this point, always zero. May be used to indicate
changes in this structure in the future. changes in this structure in the future.
steal: the amount of time in which this vCPU did not run, in steal:
the amount of time in which this vCPU did not run, in
nanoseconds. Time during which the vcpu is idle, will not be nanoseconds. Time during which the vcpu is idle, will not be
reported as steal time. reported as steal time.
preempted: indicate the vCPU who owns this struct is running or preempted:
indicate the vCPU who owns this struct is running or
not. Non-zero values mean the vCPU has been preempted. Zero not. Non-zero values mean the vCPU has been preempted. Zero
means the vCPU is not preempted. NOTE, it is always zero if the means the vCPU is not preempted. NOTE, it is always zero if the
the hypervisor doesn't support this field. the hypervisor doesn't support this field.
MSR_KVM_EOI_EN: 0x4b564d04 MSR_KVM_EOI_EN:
data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0 0x4b564d04
data:
Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
when disabled. Bit 1 is reserved and must be zero. When PV end of when disabled. Bit 1 is reserved and must be zero. When PV end of
interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned
physical address of a 4 byte memory area which must be in guest RAM and physical address of a 4 byte memory area which must be in guest RAM and
@ -274,11 +309,13 @@ MSR_KVM_EOI_EN: 0x4b564d04
clear it using a single CPU instruction, such as test and clear, or clear it using a single CPU instruction, such as test and clear, or
compare and exchange. compare and exchange.
MSR_KVM_POLL_CONTROL: 0x4b564d05 MSR_KVM_POLL_CONTROL:
0x4b564d05
Control host-side polling. Control host-side polling.
data: Bit 0 enables (1) or disables (0) host-side HLT polling logic. data:
Bit 0 enables (1) or disables (0) host-side HLT polling logic.
KVM guests can request the host not to poll on HLT, for example if KVM guests can request the host not to poll on HLT, for example if
they are performing polling themselves. they are performing polling themselves.

View File

@ -1,3 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
==========
Nested VMX Nested VMX
========== ==========
@ -41,9 +44,9 @@ No modifications are required to user space (qemu). However, qemu's default
emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
explicitly enabled, by giving qemu one of the following options: explicitly enabled, by giving qemu one of the following options:
-cpu host (emulated CPU has all features of the real CPU) - cpu host (emulated CPU has all features of the real CPU)
-cpu qemu64,+vmx (add just the vmx feature to a named CPU type) - cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
ABIs ABIs
@ -75,6 +78,8 @@ of this structure changes, this can break live migration across KVM versions.
VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
struct shadow_vmcs is ever changed. struct shadow_vmcs is ever changed.
::
typedef u64 natural_width; typedef u64 natural_width;
struct __packed vmcs12 { struct __packed vmcs12 {
/* According to the Intel spec, a VMCS region must start with /* According to the Intel spec, a VMCS region must start with
@ -220,21 +225,21 @@ Authors
------- -------
These patches were written by: These patches were written by:
Abel Gordon, abelg <at> il.ibm.com - Abel Gordon, abelg <at> il.ibm.com
Nadav Har'El, nyh <at> il.ibm.com - Nadav Har'El, nyh <at> il.ibm.com
Orit Wasserman, oritw <at> il.ibm.com - Orit Wasserman, oritw <at> il.ibm.com
Ben-Ami Yassor, benami <at> il.ibm.com - Ben-Ami Yassor, benami <at> il.ibm.com
Muli Ben-Yehuda, muli <at> il.ibm.com - Muli Ben-Yehuda, muli <at> il.ibm.com
With contributions by: With contributions by:
Anthony Liguori, aliguori <at> us.ibm.com - Anthony Liguori, aliguori <at> us.ibm.com
Mike Day, mdday <at> us.ibm.com - Mike Day, mdday <at> us.ibm.com
Michael Factor, factor <at> il.ibm.com - Michael Factor, factor <at> il.ibm.com
Zvi Dubitzky, dubi <at> il.ibm.com - Zvi Dubitzky, dubi <at> il.ibm.com
And valuable reviews by: And valuable reviews by:
Avi Kivity, avi <at> redhat.com - Avi Kivity, avi <at> redhat.com
Gleb Natapov, gleb <at> redhat.com - Gleb Natapov, gleb <at> redhat.com
Marcelo Tosatti, mtosatti <at> redhat.com - Marcelo Tosatti, mtosatti <at> redhat.com
Kevin Tian, kevin.tian <at> intel.com - Kevin Tian, kevin.tian <at> intel.com
and others. - and others.

View File

@ -1,3 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
=================================
The PPC KVM paravirtual interface The PPC KVM paravirtual interface
================================= =================================
@ -34,8 +37,9 @@ up the hypercall. To call a hypercall, just call these instructions.
The parameters are as follows: The parameters are as follows:
======== ================ ================
Register IN OUT Register IN OUT
======== ================ ================
r0 - volatile r0 - volatile
r3 1st parameter Return code r3 1st parameter Return code
r4 2nd parameter 1st output value r4 2nd parameter 1st output value
@ -47,6 +51,7 @@ The parameters are as follows:
r10 8th parameter 7th output value r10 8th parameter 7th output value
r11 hypercall number 8th output value r11 hypercall number 8th output value
r12 - volatile r12 - volatile
======== ================ ================
Hypercall definitions are shared in generic code, so the same hypercall numbers Hypercall definitions are shared in generic code, so the same hypercall numbers
apply for x86 and powerpc alike with the exception that each KVM hypercall apply for x86 and powerpc alike with the exception that each KVM hypercall
@ -54,11 +59,13 @@ also needs to be ORed with the KVM vendor code which is (42 << 16).
Return codes can be as follows: Return codes can be as follows:
==== =========================
Code Meaning Code Meaning
==== =========================
0 Success 0 Success
12 Hypercall not implemented 12 Hypercall not implemented
<0 Error <0 Error
==== =========================
The magic page The magic page
============== ==============
@ -72,7 +79,7 @@ desired location. The first parameter indicates the effective address when the
MMU is enabled. The second parameter indicates the address in real mode, if MMU is enabled. The second parameter indicates the address in real mode, if
applicable to the target. For now, we always map the page to -4096. This way we applicable to the target. For now, we always map the page to -4096. This way we
can access it using absolute load and store functions. The following can access it using absolute load and store functions. The following
instruction reads the first field of the magic page: instruction reads the first field of the magic page::
ld rX, -4096(0) ld rX, -4096(0)
@ -93,8 +100,10 @@ a bitmap of available features inside the magic page.
The following enhancements to the magic page are currently available: The following enhancements to the magic page are currently available:
============================ =======================================
KVM_MAGIC_FEAT_SR Maps SR registers r/w in the magic page KVM_MAGIC_FEAT_SR Maps SR registers r/w in the magic page
KVM_MAGIC_FEAT_MAS0_TO_SPRG7 Maps MASn, ESR, PIR and high SPRGs KVM_MAGIC_FEAT_MAS0_TO_SPRG7 Maps MASn, ESR, PIR and high SPRGs
============================ =======================================
For enhanced features in the magic page, please check for the existence of the For enhanced features in the magic page, please check for the existence of the
feature before using them! feature before using them!
@ -121,8 +130,8 @@ when entering the guest or don't have any impact on the hypervisor's behavior.
The following bits are safe to be set inside the guest: The following bits are safe to be set inside the guest:
MSR_EE - MSR_EE
MSR_RI - MSR_RI
If any other bit changes in the MSR, please still use mtmsr(d). If any other bit changes in the MSR, please still use mtmsr(d).
@ -138,9 +147,9 @@ guest. Implementing any of those mappings is optional, as the instruction traps
also act on the shared page. So calling privileged instructions still works as also act on the shared page. So calling privileged instructions still works as
before. before.
======================= ================================
From To From To
==== == ======================= ================================
mfmsr rX ld rX, magic_page->msr mfmsr rX ld rX, magic_page->msr
mfsprg rX, 0 ld rX, magic_page->sprg0 mfsprg rX, 0 ld rX, magic_page->sprg0
mfsprg rX, 1 ld rX, magic_page->sprg1 mfsprg rX, 1 ld rX, magic_page->sprg1
@ -173,7 +182,7 @@ mtsrin rX, rY b <special mtsrin section>
[BookE only] [BookE only]
wrteei [0|1] b <special wrteei section> wrteei [0|1] b <special wrteei section>
======================= ================================
Some instructions require more logic to determine what's going on than a load Some instructions require more logic to determine what's going on than a load
or store instruction can deliver. To enable patching of those, we keep some or store instruction can deliver. To enable patching of those, we keep some
@ -191,6 +200,7 @@ for example.
Hypercall ABIs in KVM on PowerPC Hypercall ABIs in KVM on PowerPC
================================= =================================
1) KVM hypercalls (ePAPR) 1) KVM hypercalls (ePAPR)
These are ePAPR compliant hypercall implementation (mentioned above). Even These are ePAPR compliant hypercall implementation (mentioned above). Even

View File

@ -1,3 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
================================
Review checklist for kvm patches Review checklist for kvm patches
================================ ================================

View File

@ -1,3 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
=============================
The s390 DIAGNOSE call on KVM The s390 DIAGNOSE call on KVM
============================= =============================
@ -16,12 +19,12 @@ DIAGNOSE calls by the guest cause a mandatory intercept. This implies
all supported DIAGNOSE calls need to be handled by either KVM or its all supported DIAGNOSE calls need to be handled by either KVM or its
userspace. userspace.
All DIAGNOSE calls supported by KVM use the RS-a format: All DIAGNOSE calls supported by KVM use the RS-a format::
-------------------------------------- --------------------------------------
| '83' | R1 | R3 | B2 | D2 | | '83' | R1 | R3 | B2 | D2 |
-------------------------------------- --------------------------------------
0 8 12 16 20 31 0 8 12 16 20 31
The second-operand address (obtained by the base/displacement calculation) The second-operand address (obtained by the base/displacement calculation)
is not used to address data. Instead, bits 48-63 of this address specify is not used to address data. Instead, bits 48-63 of this address specify

View File

@ -1,17 +1,21 @@
.. SPDX-License-Identifier: GPL-2.0
Timekeeping Virtualization for X86-Based Architectures ======================================================
Timekeeping Virtualization for X86-Based Architectures
======================================================
Zachary Amsden <zamsden@redhat.com> :Author: Zachary Amsden <zamsden@redhat.com>
Copyright (c) 2010, Red Hat. All rights reserved. :Copyright: (c) 2010, Red Hat. All rights reserved.
1) Overview .. Contents
2) Timing Devices
3) TSC Hardware
4) Virtualization Problems
========================================================================= 1) Overview
2) Timing Devices
3) TSC Hardware
4) Virtualization Problems
1) Overview 1. Overview
===========
One of the most complicated parts of the X86 platform, and specifically, One of the most complicated parts of the X86 platform, and specifically,
the virtualization of this platform is the plethora of timing devices available the virtualization of this platform is the plethora of timing devices available
@ -27,15 +31,15 @@ The purpose of this document is to collect data and information relevant to
timekeeping which may be difficult to find elsewhere, specifically, timekeeping which may be difficult to find elsewhere, specifically,
information relevant to KVM and hardware-based virtualization. information relevant to KVM and hardware-based virtualization.
========================================================================= 2. Timing Devices
=================
2) Timing Devices
First we discuss the basic hardware devices available. TSC and the related First we discuss the basic hardware devices available. TSC and the related
KVM clock are special enough to warrant a full exposition and are described in KVM clock are special enough to warrant a full exposition and are described in
the following section. the following section.
2.1) i8254 - PIT 2.1. i8254 - PIT
----------------
One of the first timer devices available is the programmable interrupt timer, One of the first timer devices available is the programmable interrupt timer,
or PIT. The PIT has a fixed frequency 1.193182 MHz base clock and three or PIT. The PIT has a fixed frequency 1.193182 MHz base clock and three
@ -50,13 +54,13 @@ The PIT uses I/O ports 0x40 - 0x43. Access to the 16-bit counters is done
using single or multiple byte access to the I/O ports. There are 6 modes using single or multiple byte access to the I/O ports. There are 6 modes
available, but not all modes are available to all timers, as only timer 2 available, but not all modes are available to all timers, as only timer 2
has a connected gate input, required for modes 1 and 5. The gate line is has a connected gate input, required for modes 1 and 5. The gate line is
controlled by port 61h, bit 0, as illustrated in the following diagram. controlled by port 61h, bit 0, as illustrated in the following diagram::
-------------- ---------------- -------------- ----------------
| | | | | | | |
| 1.1932 MHz |---------->| CLOCK OUT | ---------> IRQ 0 | 1.1932 MHz|---------->| CLOCK OUT | ---------> IRQ 0
| Clock | | | | | Clock | | | |
-------------- | +->| GATE TIMER 0 | -------------- | +->| GATE TIMER 0 |
| ---------------- | ----------------
| |
| ---------------- | ----------------
@ -70,29 +74,33 @@ controlled by port 61h, bit 0, as illustrated in the following diagram.
| | | | | |
|------>| CLOCK OUT | ---------> Port 61h, bit 5 |------>| CLOCK OUT | ---------> Port 61h, bit 5
| | | | | |
Port 61h, bit 0 ---------->| GATE TIMER 2 | \_.---- ____ Port 61h, bit 0 -------->| GATE TIMER 2 | \_.---- ____
---------------- _| )--|LPF|---Speaker ---------------- _| )--|LPF|---Speaker
/ *---- \___/ / *---- \___/
Port 61h, bit 1 -----------------------------------/ Port 61h, bit 1 ---------------------------------/
The timer modes are now described. The timer modes are now described.
Mode 0: Single Timeout. This is a one-shot software timeout that counts down Mode 0: Single Timeout.
This is a one-shot software timeout that counts down
when the gate is high (always true for timers 0 and 1). When the count when the gate is high (always true for timers 0 and 1). When the count
reaches zero, the output goes high. reaches zero, the output goes high.
Mode 1: Triggered One-shot. The output is initially set high. When the gate Mode 1: Triggered One-shot.
The output is initially set high. When the gate
line is set high, a countdown is initiated (which does not stop if the gate is line is set high, a countdown is initiated (which does not stop if the gate is
lowered), during which the output is set low. When the count reaches zero, lowered), during which the output is set low. When the count reaches zero,
the output goes high. the output goes high.
Mode 2: Rate Generator. The output is initially set high. When the countdown Mode 2: Rate Generator.
The output is initially set high. When the countdown
reaches 1, the output goes low for one count and then returns high. The value reaches 1, the output goes low for one count and then returns high. The value
is reloaded and the countdown automatically resumes. If the gate line goes is reloaded and the countdown automatically resumes. If the gate line goes
low, the count is halted. If the output is low when the gate is lowered, the low, the count is halted. If the output is low when the gate is lowered, the
output automatically goes high (this only affects timer 2). output automatically goes high (this only affects timer 2).
Mode 3: Square Wave. This generates a high / low square wave. The count Mode 3: Square Wave.
This generates a high / low square wave. The count
determines the length of the pulse, which alternates between high and low determines the length of the pulse, which alternates between high and low
when zero is reached. The count only proceeds when gate is high and is when zero is reached. The count only proceeds when gate is high and is
automatically reloaded on reaching zero. The count is decremented twice at automatically reloaded on reaching zero. The count is decremented twice at
@ -103,12 +111,14 @@ Mode 3: Square Wave. This generates a high / low square wave. The count
values are not observed when reading. This is the intended mode for timer 2, values are not observed when reading. This is the intended mode for timer 2,
which generates sine-like tones by low-pass filtering the square wave output. which generates sine-like tones by low-pass filtering the square wave output.
Mode 4: Software Strobe. After programming this mode and loading the counter, Mode 4: Software Strobe.
After programming this mode and loading the counter,
the output remains high until the counter reaches zero. Then the output the output remains high until the counter reaches zero. Then the output
goes low for 1 clock cycle and returns high. The counter is not reloaded. goes low for 1 clock cycle and returns high. The counter is not reloaded.
Counting only occurs when gate is high. Counting only occurs when gate is high.
Mode 5: Hardware Strobe. After programming and loading the counter, the Mode 5: Hardware Strobe.
After programming and loading the counter, the
output remains high. When the gate is raised, a countdown is initiated output remains high. When the gate is raised, a countdown is initiated
(which does not stop if the gate is lowered). When the counter reaches zero, (which does not stop if the gate is lowered). When the counter reaches zero,
the output goes low for 1 clock cycle and then returns high. The counter is the output goes low for 1 clock cycle and then returns high. The counter is
@ -118,49 +128,49 @@ In addition to normal binary counting, the PIT supports BCD counting. The
command port, 0x43 is used to set the counter and mode for each of the three command port, 0x43 is used to set the counter and mode for each of the three
timers. timers.
PIT commands, issued to port 0x43, using the following bit encoding: PIT commands, issued to port 0x43, using the following bit encoding::
Bit 7-4: Command (See table below) Bit 7-4: Command (See table below)
Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined) Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
Bit 0 : Binary (0) / BCD (1) Bit 0 : Binary (0) / BCD (1)
Command table: Command table::
0000 - Latch Timer 0 count for port 0x40 0000 - Latch Timer 0 count for port 0x40
sample and hold the count to be read in port 0x40; sample and hold the count to be read in port 0x40;
additional commands ignored until counter is read; additional commands ignored until counter is read;
mode bits ignored. mode bits ignored.
0001 - Set Timer 0 LSB mode for port 0x40 0001 - Set Timer 0 LSB mode for port 0x40
set timer to read LSB only and force MSB to zero; set timer to read LSB only and force MSB to zero;
mode bits set timer mode mode bits set timer mode
0010 - Set Timer 0 MSB mode for port 0x40 0010 - Set Timer 0 MSB mode for port 0x40
set timer to read MSB only and force LSB to zero; set timer to read MSB only and force LSB to zero;
mode bits set timer mode mode bits set timer mode
0011 - Set Timer 0 16-bit mode for port 0x40 0011 - Set Timer 0 16-bit mode for port 0x40
set timer to read / write LSB first, then MSB; set timer to read / write LSB first, then MSB;
mode bits set timer mode mode bits set timer mode
0100 - Latch Timer 1 count for port 0x41 - as described above 0100 - Latch Timer 1 count for port 0x41 - as described above
0101 - Set Timer 1 LSB mode for port 0x41 - as described above 0101 - Set Timer 1 LSB mode for port 0x41 - as described above
0110 - Set Timer 1 MSB mode for port 0x41 - as described above 0110 - Set Timer 1 MSB mode for port 0x41 - as described above
0111 - Set Timer 1 16-bit mode for port 0x41 - as described above 0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
1000 - Latch Timer 2 count for port 0x42 - as described above 1000 - Latch Timer 2 count for port 0x42 - as described above
1001 - Set Timer 2 LSB mode for port 0x42 - as described above 1001 - Set Timer 2 LSB mode for port 0x42 - as described above
1010 - Set Timer 2 MSB mode for port 0x42 - as described above 1010 - Set Timer 2 MSB mode for port 0x42 - as described above
1011 - Set Timer 2 16-bit mode for port 0x42 as described above 1011 - Set Timer 2 16-bit mode for port 0x42 as described above
1101 - General counter latch 1101 - General counter latch
Latch combination of counters into corresponding ports Latch combination of counters into corresponding ports
Bit 3 = Counter 2 Bit 3 = Counter 2
Bit 2 = Counter 1 Bit 2 = Counter 1
Bit 1 = Counter 0 Bit 1 = Counter 0
Bit 0 = Unused Bit 0 = Unused
1110 - Latch timer status 1110 - Latch timer status
Latch combination of counter mode into corresponding ports Latch combination of counter mode into corresponding ports
Bit 3 = Counter 2 Bit 3 = Counter 2
Bit 2 = Counter 1 Bit 2 = Counter 1
@ -177,7 +187,8 @@ Command table:
Bit 3-1 = Mode Bit 3-1 = Mode
Bit 0 = Binary (0) / BCD mode (1) Bit 0 = Binary (0) / BCD mode (1)
2.2) RTC 2.2. RTC
--------
The second device which was available in the original PC was the MC146818 real The second device which was available in the original PC was the MC146818 real
time clock. The original device is now obsolete, and usually emulated by the time clock. The original device is now obsolete, and usually emulated by the
@ -201,21 +212,21 @@ in progress, as indicated in the status register.
The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be
programmed to a 32kHz divider if the RTC is to count seconds. programmed to a 32kHz divider if the RTC is to count seconds.
This is the RAM map originally used for the RTC/CMOS: This is the RAM map originally used for the RTC/CMOS::
Location Size Description Location Size Description
------------------------------------------ ------------------------------------------
00h byte Current second (BCD) 00h byte Current second (BCD)
01h byte Seconds alarm (BCD) 01h byte Seconds alarm (BCD)
02h byte Current minute (BCD) 02h byte Current minute (BCD)
03h byte Minutes alarm (BCD) 03h byte Minutes alarm (BCD)
04h byte Current hour (BCD) 04h byte Current hour (BCD)
05h byte Hours alarm (BCD) 05h byte Hours alarm (BCD)
06h byte Current day of week (BCD) 06h byte Current day of week (BCD)
07h byte Current day of month (BCD) 07h byte Current day of month (BCD)
08h byte Current month (BCD) 08h byte Current month (BCD)
09h byte Current year (BCD) 09h byte Current year (BCD)
0Ah byte Register A 0Ah byte Register A
bit 7 = Update in progress bit 7 = Update in progress
bit 6-4 = Divider for clock bit 6-4 = Divider for clock
000 = 4.194 MHz 000 = 4.194 MHz
@ -234,7 +245,7 @@ Location Size Description
1101 = 125 mS 1101 = 125 mS
1110 = 250 mS 1110 = 250 mS
1111 = 500 mS 1111 = 500 mS
0Bh byte Register B 0Bh byte Register B
bit 7 = Run (0) / Halt (1) bit 7 = Run (0) / Halt (1)
bit 6 = Periodic interrupt enable bit 6 = Periodic interrupt enable
bit 5 = Alarm interrupt enable bit 5 = Alarm interrupt enable
@ -243,19 +254,20 @@ Location Size Description
bit 2 = BCD calendar (0) / Binary (1) bit 2 = BCD calendar (0) / Binary (1)
bit 1 = 12-hour mode (0) / 24-hour mode (1) bit 1 = 12-hour mode (0) / 24-hour mode (1)
bit 0 = 0 (DST off) / 1 (DST enabled) bit 0 = 0 (DST off) / 1 (DST enabled)
OCh byte Register C (read only) OCh byte Register C (read only)
bit 7 = interrupt request flag (IRQF) bit 7 = interrupt request flag (IRQF)
bit 6 = periodic interrupt flag (PF) bit 6 = periodic interrupt flag (PF)
bit 5 = alarm interrupt flag (AF) bit 5 = alarm interrupt flag (AF)
bit 4 = update interrupt flag (UF) bit 4 = update interrupt flag (UF)
bit 3-0 = reserved bit 3-0 = reserved
ODh byte Register D (read only) ODh byte Register D (read only)
bit 7 = RTC has power bit 7 = RTC has power
bit 6-0 = reserved bit 6-0 = reserved
32h byte Current century BCD (*) 32h byte Current century BCD (*)
(*) location vendor specific and now determined from ACPI global tables (*) location vendor specific and now determined from ACPI global tables
2.3) APIC 2.3. APIC
---------
On Pentium and later processors, an on-board timer is available to each CPU On Pentium and later processors, an on-board timer is available to each CPU
as part of the Advanced Programmable Interrupt Controller. The APIC is as part of the Advanced Programmable Interrupt Controller. The APIC is
@ -276,7 +288,8 @@ timer is programmed through the LVT (local vector timer) register, is capable
of one-shot or periodic operation, and is based on the bus clock divided down of one-shot or periodic operation, and is based on the bus clock divided down
by the programmable divider register. by the programmable divider register.
2.4) HPET 2.4. HPET
---------
HPET is quite complex, and was originally intended to replace the PIT / RTC HPET is quite complex, and was originally intended to replace the PIT / RTC
support of the X86 PC. It remains to be seen whether that will be the case, as support of the X86 PC. It remains to be seen whether that will be the case, as
@ -297,7 +310,8 @@ indicated through ACPI tables by the BIOS.
Detailed specification of the HPET is beyond the current scope of this Detailed specification of the HPET is beyond the current scope of this
document, as it is also very well documented elsewhere. document, as it is also very well documented elsewhere.
2.5) Offboard Timers 2.5. Offboard Timers
--------------------
Several cards, both proprietary (watchdog boards) and commonplace (e1000) have Several cards, both proprietary (watchdog boards) and commonplace (e1000) have
timing chips built into the cards which may have registers which are accessible timing chips built into the cards which may have registers which are accessible
@ -307,9 +321,8 @@ general frowned upon as not playing by the agreed rules of the game. Such a
timer device would require additional support to be virtualized properly and is timer device would require additional support to be virtualized properly and is
not considered important at this time as no known operating system does this. not considered important at this time as no known operating system does this.
========================================================================= 3. TSC Hardware
===============
3) TSC Hardware
The TSC or time stamp counter is relatively simple in theory; it counts The TSC or time stamp counter is relatively simple in theory; it counts
instruction cycles issued by the processor, which can be used as a measure of instruction cycles issued by the processor, which can be used as a measure of
@ -340,7 +353,8 @@ allows the guest visible TSC to be offset by a constant. Newer implementations
promise to allow the TSC to additionally be scaled, but this hardware is not promise to allow the TSC to additionally be scaled, but this hardware is not
yet widely available. yet widely available.
3.1) TSC synchronization 3.1. TSC synchronization
------------------------
The TSC is a CPU-local clock in most implementations. This means, on SMP The TSC is a CPU-local clock in most implementations. This means, on SMP
platforms, the TSCs of different CPUs may start at different times depending platforms, the TSCs of different CPUs may start at different times depending
@ -357,7 +371,8 @@ practice, getting a perfectly synchronized TSC will not be possible unless all
values are read from the same clock, which generally only is possible on single values are read from the same clock, which generally only is possible on single
socket systems or those with special hardware support. socket systems or those with special hardware support.
3.2) TSC and CPU hotplug 3.2. TSC and CPU hotplug
------------------------
As touched on already, CPUs which arrive later than the boot time of the system As touched on already, CPUs which arrive later than the boot time of the system
may not have a TSC value that is synchronized with the rest of the system. may not have a TSC value that is synchronized with the rest of the system.
@ -367,7 +382,8 @@ a guarantee. This can have the effect of bringing a system from a state where
TSC is synchronized back to a state where TSC synchronization flaws, however TSC is synchronized back to a state where TSC synchronization flaws, however
small, may be exposed to the OS and any virtualization environment. small, may be exposed to the OS and any virtualization environment.
3.3) TSC and multi-socket / NUMA 3.3. TSC and multi-socket / NUMA
--------------------------------
Multi-socket systems, especially large multi-socket systems are likely to have Multi-socket systems, especially large multi-socket systems are likely to have
individual clocksources rather than a single, universally distributed clock. individual clocksources rather than a single, universally distributed clock.
@ -385,7 +401,8 @@ standards for telecommunications and computer equipment.
It is recommended not to trust the TSCs to remain synchronized on NUMA or It is recommended not to trust the TSCs to remain synchronized on NUMA or
multiple socket systems for these reasons. multiple socket systems for these reasons.
3.4) TSC and C-states 3.4. TSC and C-states
---------------------
C-states, or idling states of the processor, especially C1E and deeper sleep C-states, or idling states of the processor, especially C1E and deeper sleep
states may be problematic for TSC as well. The TSC may stop advancing in such states may be problematic for TSC as well. The TSC may stop advancing in such
@ -396,7 +413,8 @@ based on CPU and chipset identifications.
The TSC in such a case may be corrected by catching it up to a known external The TSC in such a case may be corrected by catching it up to a known external
clocksource. clocksource.
3.5) TSC frequency change / P-states 3.5. TSC frequency change / P-states
------------------------------------
To make things slightly more interesting, some CPUs may change frequency. They To make things slightly more interesting, some CPUs may change frequency. They
may or may not run the TSC at the same rate, and because the frequency change may or may not run the TSC at the same rate, and because the frequency change
@ -416,14 +434,16 @@ other processors. In such cases, the TSC on halted CPUs could advance faster
than that of non-halted processors. AMD Turion processors are known to have than that of non-halted processors. AMD Turion processors are known to have
this problem. this problem.
3.6) TSC and STPCLK / T-states 3.6. TSC and STPCLK / T-states
------------------------------
External signals given to the processor may also have the effect of stopping External signals given to the processor may also have the effect of stopping
the TSC. This is typically done for thermal emergency power control to prevent the TSC. This is typically done for thermal emergency power control to prevent
an overheating condition, and typically, there is no way to detect that this an overheating condition, and typically, there is no way to detect that this
condition has happened. condition has happened.
3.7) TSC virtualization - VMX 3.7. TSC virtualization - VMX
-----------------------------
VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
instructions, which is enough for full virtualization of TSC in any manner. In instructions, which is enough for full virtualization of TSC in any manner. In
@ -431,14 +451,16 @@ addition, VMX allows passing through the host TSC plus an additional TSC_OFFSET
field specified in the VMCS. Special instructions must be used to read and field specified in the VMCS. Special instructions must be used to read and
write the VMCS field. write the VMCS field.
3.8) TSC virtualization - SVM 3.8. TSC virtualization - SVM
-----------------------------
SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
instructions, which is enough for full virtualization of TSC in any manner. In instructions, which is enough for full virtualization of TSC in any manner. In
addition, SVM allows passing through the host TSC plus an additional offset addition, SVM allows passing through the host TSC plus an additional offset
field specified in the SVM control block. field specified in the SVM control block.
3.9) TSC feature bits in Linux 3.9. TSC feature bits in Linux
------------------------------
In summary, there is no way to guarantee the TSC remains in perfect In summary, there is no way to guarantee the TSC remains in perfect
synchronization unless it is explicitly guaranteed by the architecture. Even synchronization unless it is explicitly guaranteed by the architecture. Even
@ -448,13 +470,16 @@ despite being locally consistent.
The following feature bits are used by Linux to signal various TSC attributes, The following feature bits are used by Linux to signal various TSC attributes,
but they can only be taken to be meaningful for UP or single node systems. but they can only be taken to be meaningful for UP or single node systems.
X86_FEATURE_TSC : The TSC is available in hardware ========================= =======================================
X86_FEATURE_RDTSCP : The RDTSCP instruction is available X86_FEATURE_TSC The TSC is available in hardware
X86_FEATURE_CONSTANT_TSC : The TSC rate is unchanged with P-states X86_FEATURE_RDTSCP The RDTSCP instruction is available
X86_FEATURE_NONSTOP_TSC : The TSC does not stop in C-states X86_FEATURE_CONSTANT_TSC The TSC rate is unchanged with P-states
X86_FEATURE_TSC_RELIABLE : TSC sync checks are skipped (VMware) X86_FEATURE_NONSTOP_TSC The TSC does not stop in C-states
X86_FEATURE_TSC_RELIABLE TSC sync checks are skipped (VMware)
========================= =======================================
4) Virtualization Problems 4. Virtualization Problems
==========================
Timekeeping is especially problematic for virtualization because a number of Timekeeping is especially problematic for virtualization because a number of
challenges arise. The most obvious problem is that time is now shared between challenges arise. The most obvious problem is that time is now shared between
@ -473,7 +498,8 @@ BIOS, but not in such an extreme fashion. However, the fact that SMM mode may
cause similar problems to virtualization makes it a good justification for cause similar problems to virtualization makes it a good justification for
solving many of these problems on bare metal. solving many of these problems on bare metal.
4.1) Interrupt clocking 4.1. Interrupt clocking
-----------------------
One of the most immediate problems that occurs with legacy operating systems One of the most immediate problems that occurs with legacy operating systems
is that the system timekeeping routines are often designed to keep track of is that the system timekeeping routines are often designed to keep track of
@ -502,7 +528,8 @@ thus requires interrupt slewing to keep proper time. It does use a low enough
rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in
practice. practice.
4.2) TSC sampling and serialization 4.2. TSC sampling and serialization
-----------------------------------
As the highest precision time source available, the cycle counter of the CPU As the highest precision time source available, the cycle counter of the CPU
has aroused much interest from developers. As explained above, this timer has has aroused much interest from developers. As explained above, this timer has
@ -524,7 +551,8 @@ it may be necessary for an implementation to guard against "backwards" reads of
the TSC as seen from other CPUs, even in an otherwise perfectly synchronized the TSC as seen from other CPUs, even in an otherwise perfectly synchronized
system. system.
4.3) Timespec aliasing 4.3. Timespec aliasing
----------------------
Additionally, this lack of serialization from the TSC poses another challenge Additionally, this lack of serialization from the TSC poses another challenge
when using results of the TSC when measured against another time source. As when using results of the TSC when measured against another time source. As
@ -548,7 +576,8 @@ This aliasing requires care in the computation and recalibration of kvmclock
and any other values derived from TSC computation (such as TSC virtualization and any other values derived from TSC computation (such as TSC virtualization
itself). itself).
4.4) Migration 4.4. Migration
--------------
Migration of a virtual machine raises problems for timekeeping in two ways. Migration of a virtual machine raises problems for timekeeping in two ways.
First, the migration itself may take time, during which interrupts cannot be First, the migration itself may take time, during which interrupts cannot be
@ -566,7 +595,8 @@ always be caught up to the original rate. KVM clock avoids these problems by
simply storing multipliers and offsets against the TSC for the guest to convert simply storing multipliers and offsets against the TSC for the guest to convert
back into nanosecond resolution values. back into nanosecond resolution values.
4.5) Scheduling 4.5. Scheduling
---------------
Since scheduling may be based on precise timing and firing of interrupts, the Since scheduling may be based on precise timing and firing of interrupts, the
scheduling algorithms of an operating system may be adversely affected by scheduling algorithms of an operating system may be adversely affected by
@ -579,7 +609,8 @@ In an attempt to work around this, several implementations have provided a
paravirtualized scheduler clock, which reveals the true amount of CPU time for paravirtualized scheduler clock, which reveals the true amount of CPU time for
which a virtual machine has been running. which a virtual machine has been running.
4.6) Watchdogs 4.6. Watchdogs
--------------
Watchdog timers, such as the lock detector in Linux may fire accidentally when Watchdog timers, such as the lock detector in Linux may fire accidentally when
running under hardware virtualization due to timer interrupts being delayed or running under hardware virtualization due to timer interrupts being delayed or
@ -587,7 +618,8 @@ misinterpretation of the passage of real time. Usually, these warnings are
spurious and can be ignored, but in some circumstances it may be necessary to spurious and can be ignored, but in some circumstances it may be necessary to
disable such detection. disable such detection.
4.7) Delays and precision timing 4.7. Delays and precision timing
--------------------------------
Precise timing and delays may not be possible in a virtualized system. This Precise timing and delays may not be possible in a virtualized system. This
can happen if the system is controlling physical hardware, or issues delays to can happen if the system is controlling physical hardware, or issues delays to
@ -600,7 +632,8 @@ The second issue may cause performance problems, but this is unlikely to be a
significant issue. In many cases these delays may be eliminated through significant issue. In many cases these delays may be eliminated through
configuration or paravirtualization. configuration or paravirtualization.
4.8) Covert channels and leaks 4.8. Covert channels and leaks
------------------------------
In addition to the above problems, time information will inevitably leak to the In addition to the above problems, time information will inevitably leak to the
guest about the host in anything but a perfect implementation of virtualized guest about the host in anything but a perfect implementation of virtualized

View File

@ -2796,11 +2796,11 @@ F: drivers/block/aoe/
ATHEROS 71XX/9XXX GPIO DRIVER ATHEROS 71XX/9XXX GPIO DRIVER
M: Alban Bedel <albeu@free.fr> M: Alban Bedel <albeu@free.fr>
S: Maintained
W: https://github.com/AlbanBedel/linux W: https://github.com/AlbanBedel/linux
T: git git://github.com/AlbanBedel/linux T: git git://github.com/AlbanBedel/linux
S: Maintained
F: drivers/gpio/gpio-ath79.c
F: Documentation/devicetree/bindings/gpio/gpio-ath79.txt F: Documentation/devicetree/bindings/gpio/gpio-ath79.txt
F: drivers/gpio/gpio-ath79.c
ATHEROS 71XX/9XXX USB PHY DRIVER ATHEROS 71XX/9XXX USB PHY DRIVER
M: Alban Bedel <albeu@free.fr> M: Alban Bedel <albeu@free.fr>
@ -3422,8 +3422,8 @@ BROADCOM BRCMSTB GPIO DRIVER
M: Gregory Fong <gregory.0xf0@gmail.com> M: Gregory Fong <gregory.0xf0@gmail.com>
L: bcm-kernel-feedback-list@broadcom.com L: bcm-kernel-feedback-list@broadcom.com
S: Supported S: Supported
F: drivers/gpio/gpio-brcmstb.c
F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
F: drivers/gpio/gpio-brcmstb.c
BROADCOM BRCMSTB I2C DRIVER BROADCOM BRCMSTB I2C DRIVER
M: Kamal Dasu <kdasu.kdev@gmail.com> M: Kamal Dasu <kdasu.kdev@gmail.com>
@ -3481,8 +3481,8 @@ BROADCOM KONA GPIO DRIVER
M: Ray Jui <rjui@broadcom.com> M: Ray Jui <rjui@broadcom.com>
L: bcm-kernel-feedback-list@broadcom.com L: bcm-kernel-feedback-list@broadcom.com
S: Supported S: Supported
F: drivers/gpio/gpio-bcm-kona.c
F: Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt F: Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
F: drivers/gpio/gpio-bcm-kona.c
BROADCOM NETXTREME-E ROCE DRIVER BROADCOM NETXTREME-E ROCE DRIVER
M: Selvin Xavier <selvin.xavier@broadcom.com> M: Selvin Xavier <selvin.xavier@broadcom.com>
@ -3597,8 +3597,8 @@ F: sound/pci/bt87x.c
BT8XXGPIO DRIVER BT8XXGPIO DRIVER
M: Michael Buesch <m@bues.ch> M: Michael Buesch <m@bues.ch>
W: http://bu3sch.de/btgpio.php
S: Maintained S: Maintained
W: http://bu3sch.de/btgpio.php
F: drivers/gpio/gpio-bt8xx.c F: drivers/gpio/gpio-bt8xx.c
BTRFS FILE SYSTEM BTRFS FILE SYSTEM
@ -3649,6 +3649,7 @@ F: sound/pci/oxygen/
C-SKY ARCHITECTURE C-SKY ARCHITECTURE
M: Guo Ren <guoren@kernel.org> M: Guo Ren <guoren@kernel.org>
L: linux-csky@vger.kernel.org
T: git https://github.com/c-sky/csky-linux.git T: git https://github.com/c-sky/csky-linux.git
S: Supported S: Supported
F: arch/csky/ F: arch/csky/
@ -3909,7 +3910,7 @@ S: Supported
F: Documentation/filesystems/ceph.txt F: Documentation/filesystems/ceph.txt
F: fs/ceph/ F: fs/ceph/
CERTIFICATE HANDLING: CERTIFICATE HANDLING
M: David Howells <dhowells@redhat.com> M: David Howells <dhowells@redhat.com>
M: David Woodhouse <dwmw2@infradead.org> M: David Woodhouse <dwmw2@infradead.org>
L: keyrings@vger.kernel.org L: keyrings@vger.kernel.org
@ -3919,7 +3920,7 @@ F: certs/
F: scripts/sign-file.c F: scripts/sign-file.c
F: scripts/extract-cert.c F: scripts/extract-cert.c
CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM
L: devel@driverdev.osuosl.org L: devel@driverdev.osuosl.org
S: Obsolete S: Obsolete
F: drivers/staging/wusbcore/ F: drivers/staging/wusbcore/
@ -5932,12 +5933,12 @@ S: Maintained
F: drivers/media/dvb-frontends/ec100* F: drivers/media/dvb-frontends/ec100*
ECRYPT FILE SYSTEM ECRYPT FILE SYSTEM
M: Tyler Hicks <tyhicks@canonical.com> M: Tyler Hicks <code@tyhicks.com>
L: ecryptfs@vger.kernel.org L: ecryptfs@vger.kernel.org
W: http://ecryptfs.org W: http://ecryptfs.org
W: https://launchpad.net/ecryptfs W: https://launchpad.net/ecryptfs
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs.git
S: Supported S: Odd Fixes
F: Documentation/filesystems/ecryptfs.txt F: Documentation/filesystems/ecryptfs.txt
F: fs/ecryptfs/ F: fs/ecryptfs/
@ -7047,7 +7048,7 @@ L: kvm@vger.kernel.org
S: Supported S: Supported
F: drivers/uio/uio_pci_generic.c F: drivers/uio/uio_pci_generic.c
GENERIC VDSO LIBRARY: GENERIC VDSO LIBRARY
M: Andy Lutomirski <luto@kernel.org> M: Andy Lutomirski <luto@kernel.org>
M: Thomas Gleixner <tglx@linutronix.de> M: Thomas Gleixner <tglx@linutronix.de>
M: Vincenzo Frascino <vincenzo.frascino@arm.com> M: Vincenzo Frascino <vincenzo.frascino@arm.com>
@ -7143,18 +7144,18 @@ GPIO SUBSYSTEM
M: Linus Walleij <linus.walleij@linaro.org> M: Linus Walleij <linus.walleij@linaro.org>
M: Bartosz Golaszewski <bgolaszewski@baylibre.com> M: Bartosz Golaszewski <bgolaszewski@baylibre.com>
L: linux-gpio@vger.kernel.org L: linux-gpio@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
S: Maintained S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
F: Documentation/ABI/obsolete/sysfs-gpio
F: Documentation/ABI/testing/gpio-cdev
F: Documentation/admin-guide/gpio/
F: Documentation/devicetree/bindings/gpio/ F: Documentation/devicetree/bindings/gpio/
F: Documentation/driver-api/gpio/ F: Documentation/driver-api/gpio/
F: Documentation/admin-guide/gpio/
F: Documentation/ABI/testing/gpio-cdev
F: Documentation/ABI/obsolete/sysfs-gpio
F: drivers/gpio/ F: drivers/gpio/
F: include/asm-generic/gpio.h
F: include/linux/gpio/ F: include/linux/gpio/
F: include/linux/gpio.h F: include/linux/gpio.h
F: include/linux/of_gpio.h F: include/linux/of_gpio.h
F: include/asm-generic/gpio.h
F: include/uapi/linux/gpio.h F: include/uapi/linux/gpio.h
F: tools/gpio/ F: tools/gpio/
@ -8055,8 +8056,8 @@ F: drivers/scsi/ips.*
ICH LPC AND GPIO DRIVER ICH LPC AND GPIO DRIVER
M: Peter Tyser <ptyser@xes-inc.com> M: Peter Tyser <ptyser@xes-inc.com>
S: Maintained S: Maintained
F: drivers/mfd/lpc_ich.c
F: drivers/gpio/gpio-ich.c F: drivers/gpio/gpio-ich.c
F: drivers/mfd/lpc_ich.c
ICY I2C DRIVER ICY I2C DRIVER
M: Max Staudt <max@enpas.org> M: Max Staudt <max@enpas.org>
@ -8392,7 +8393,7 @@ M: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
M: Rodrigo Vivi <rodrigo.vivi@intel.com> M: Rodrigo Vivi <rodrigo.vivi@intel.com>
L: intel-gfx@lists.freedesktop.org L: intel-gfx@lists.freedesktop.org
W: https://01.org/linuxgraphics/ W: https://01.org/linuxgraphics/
B: https://01.org/linuxgraphics/documentation/how-report-bugs B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
C: irc://chat.freenode.net/intel-gfx C: irc://chat.freenode.net/intel-gfx
Q: http://patchwork.freedesktop.org/project/intel-gfx/ Q: http://patchwork.freedesktop.org/project/intel-gfx/
T: git git://anongit.freedesktop.org/drm-intel T: git git://anongit.freedesktop.org/drm-intel
@ -9278,7 +9279,7 @@ F: include/keys/trusted-type.h
F: security/keys/trusted.c F: security/keys/trusted.c
F: include/keys/trusted.h F: include/keys/trusted.h
KEYS/KEYRINGS: KEYS/KEYRINGS
M: David Howells <dhowells@redhat.com> M: David Howells <dhowells@redhat.com>
M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
L: keyrings@vger.kernel.org L: keyrings@vger.kernel.org
@ -11484,7 +11485,7 @@ F: drivers/scsi/mac_scsi.*
F: drivers/scsi/sun3_scsi.* F: drivers/scsi/sun3_scsi.*
F: drivers/scsi/sun3_scsi_vme.c F: drivers/scsi/sun3_scsi_vme.c
NCSI LIBRARY: NCSI LIBRARY
M: Samuel Mendoza-Jonas <sam@mendozajonas.com> M: Samuel Mendoza-Jonas <sam@mendozajonas.com>
S: Maintained S: Maintained
F: net/ncsi/ F: net/ncsi/
@ -13512,7 +13513,7 @@ L: linuxppc-dev@lists.ozlabs.org
S: Maintained S: Maintained
F: drivers/block/ps3vram.c F: drivers/block/ps3vram.c
PSAMPLE PACKET SAMPLING SUPPORT: PSAMPLE PACKET SAMPLING SUPPORT
M: Yotam Gigi <yotam.gi@gmail.com> M: Yotam Gigi <yotam.gi@gmail.com>
S: Maintained S: Maintained
F: net/psample F: net/psample
@ -14582,10 +14583,10 @@ F: drivers/media/pci/saa7146/
F: include/media/drv-intf/saa7146* F: include/media/drv-intf/saa7146*
SAFESETID SECURITY MODULE SAFESETID SECURITY MODULE
M: Micah Morton <mortonm@chromium.org> M: Micah Morton <mortonm@chromium.org>
S: Supported S: Supported
F: security/safesetid/ F: security/safesetid/
F: Documentation/admin-guide/LSM/SafeSetID.rst F: Documentation/admin-guide/LSM/SafeSetID.rst
SAMSUNG AUDIO (ASoC) DRIVERS SAMSUNG AUDIO (ASoC) DRIVERS
M: Krzysztof Kozlowski <krzk@kernel.org> M: Krzysztof Kozlowski <krzk@kernel.org>
@ -16075,8 +16076,8 @@ F: Documentation/devicetree/bindings/reset/snps,axs10x-reset.txt
SYNOPSYS CREG GPIO DRIVER SYNOPSYS CREG GPIO DRIVER
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com> M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
S: Maintained S: Maintained
F: drivers/gpio/gpio-creg-snps.c
F: Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt F: Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt
F: drivers/gpio/gpio-creg-snps.c
SYNOPSYS DESIGNWARE 8250 UART DRIVER SYNOPSYS DESIGNWARE 8250 UART DRIVER
R: Andy Shevchenko <andriy.shevchenko@linux.intel.com> R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
@ -16087,8 +16088,8 @@ SYNOPSYS DESIGNWARE APB GPIO DRIVER
M: Hoan Tran <hoan@os.amperecomputing.com> M: Hoan Tran <hoan@os.amperecomputing.com>
L: linux-gpio@vger.kernel.org L: linux-gpio@vger.kernel.org
S: Maintained S: Maintained
F: drivers/gpio/gpio-dwapb.c
F: Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt F: Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
F: drivers/gpio/gpio-dwapb.c
SYNOPSYS DESIGNWARE AXI DMAC DRIVER SYNOPSYS DESIGNWARE AXI DMAC DRIVER
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com> M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
@ -16552,8 +16553,8 @@ M: Michael Jamet <michael.jamet@intel.com>
M: Mika Westerberg <mika.westerberg@linux.intel.com> M: Mika Westerberg <mika.westerberg@linux.intel.com>
M: Yehezkel Bernat <YehezkelShB@gmail.com> M: Yehezkel Bernat <YehezkelShB@gmail.com>
L: linux-usb@vger.kernel.org L: linux-usb@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
S: Maintained S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
F: Documentation/admin-guide/thunderbolt.rst F: Documentation/admin-guide/thunderbolt.rst
F: drivers/thunderbolt/ F: drivers/thunderbolt/
F: include/linux/thunderbolt.h F: include/linux/thunderbolt.h
@ -17080,7 +17081,7 @@ S: Maintained
F: Documentation/admin-guide/ufs.rst F: Documentation/admin-guide/ufs.rst
F: fs/ufs/ F: fs/ufs/
UHID USERSPACE HID IO DRIVER: UHID USERSPACE HID IO DRIVER
M: David Herrmann <dh.herrmann@googlemail.com> M: David Herrmann <dh.herrmann@googlemail.com>
L: linux-input@vger.kernel.org L: linux-input@vger.kernel.org
S: Maintained S: Maintained
@ -17094,18 +17095,18 @@ S: Maintained
F: drivers/usb/common/ulpi.c F: drivers/usb/common/ulpi.c
F: include/linux/ulpi/ F: include/linux/ulpi/
ULTRA-WIDEBAND (UWB) SUBSYSTEM: ULTRA-WIDEBAND (UWB) SUBSYSTEM
L: devel@driverdev.osuosl.org L: devel@driverdev.osuosl.org
S: Obsolete S: Obsolete
F: drivers/staging/uwb/ F: drivers/staging/uwb/
UNICODE SUBSYSTEM: UNICODE SUBSYSTEM
M: Gabriel Krisman Bertazi <krisman@collabora.com> M: Gabriel Krisman Bertazi <krisman@collabora.com>
L: linux-fsdevel@vger.kernel.org L: linux-fsdevel@vger.kernel.org
S: Supported S: Supported
F: fs/unicode/ F: fs/unicode/
UNICORE32 ARCHITECTURE: UNICORE32 ARCHITECTURE
M: Guan Xuetao <gxt@pku.edu.cn> M: Guan Xuetao <gxt@pku.edu.cn>
W: http://mprc.pku.edu.cn/~guanxuetao/linux W: http://mprc.pku.edu.cn/~guanxuetao/linux
S: Maintained S: Maintained
@ -17398,11 +17399,14 @@ F: drivers/usb/
F: include/linux/usb.h F: include/linux/usb.h
F: include/linux/usb/ F: include/linux/usb/
USB TYPEC PI3USB30532 MUX DRIVER USB TYPEC BUS FOR ALTERNATE MODES
M: Hans de Goede <hdegoede@redhat.com> M: Heikki Krogerus <heikki.krogerus@linux.intel.com>
L: linux-usb@vger.kernel.org L: linux-usb@vger.kernel.org
S: Maintained S: Maintained
F: drivers/usb/typec/mux/pi3usb30532.c F: Documentation/ABI/testing/sysfs-bus-typec
F: Documentation/driver-api/usb/typec_bus.rst
F: drivers/usb/typec/altmodes/
F: include/linux/usb/typec_altmode.h
USB TYPEC CLASS USB TYPEC CLASS
M: Heikki Krogerus <heikki.krogerus@linux.intel.com> M: Heikki Krogerus <heikki.krogerus@linux.intel.com>
@ -17413,14 +17417,11 @@ F: Documentation/driver-api/usb/typec.rst
F: drivers/usb/typec/ F: drivers/usb/typec/
F: include/linux/usb/typec.h F: include/linux/usb/typec.h
USB TYPEC BUS FOR ALTERNATE MODES USB TYPEC PI3USB30532 MUX DRIVER
M: Heikki Krogerus <heikki.krogerus@linux.intel.com> M: Hans de Goede <hdegoede@redhat.com>
L: linux-usb@vger.kernel.org L: linux-usb@vger.kernel.org
S: Maintained S: Maintained
F: Documentation/ABI/testing/sysfs-bus-typec F: drivers/usb/typec/mux/pi3usb30532.c
F: Documentation/driver-api/usb/typec_bus.rst
F: drivers/usb/typec/altmodes/
F: include/linux/usb/typec_altmode.h
USB TYPEC PORT CONTROLLER DRIVERS USB TYPEC PORT CONTROLLER DRIVERS
M: Guenter Roeck <linux@roeck-us.net> M: Guenter Roeck <linux@roeck-us.net>
@ -17797,7 +17798,7 @@ F: include/linux/vbox_utils.h
F: include/uapi/linux/vbox*.h F: include/uapi/linux/vbox*.h
F: drivers/virt/vboxguest/ F: drivers/virt/vboxguest/
VIRTUAL BOX SHARED FOLDER VFS DRIVER: VIRTUAL BOX SHARED FOLDER VFS DRIVER
M: Hans de Goede <hdegoede@redhat.com> M: Hans de Goede <hdegoede@redhat.com>
L: linux-fsdevel@vger.kernel.org L: linux-fsdevel@vger.kernel.org
S: Maintained S: Maintained
@ -18420,8 +18421,8 @@ M: Nandor Han <nandor.han@ge.com>
M: Semi Malinen <semi.malinen@ge.com> M: Semi Malinen <semi.malinen@ge.com>
L: linux-gpio@vger.kernel.org L: linux-gpio@vger.kernel.org
S: Maintained S: Maintained
F: drivers/gpio/gpio-xra1403.c
F: Documentation/devicetree/bindings/gpio/gpio-xra1403.txt F: Documentation/devicetree/bindings/gpio/gpio-xra1403.txt
F: drivers/gpio/gpio-xra1403.c
XTENSA XTFPGA PLATFORM SUPPORT XTENSA XTFPGA PLATFORM SUPPORT
M: Max Filippov <jcmvbkbc@gmail.com> M: Max Filippov <jcmvbkbc@gmail.com>

View File

@ -2,7 +2,7 @@
VERSION = 5 VERSION = 5
PATCHLEVEL = 6 PATCHLEVEL = 6
SUBLEVEL = 0 SUBLEVEL = 0
EXTRAVERSION = -rc1 EXTRAVERSION = -rc3
NAME = Kleptomaniac Octopus NAME = Kleptomaniac Octopus
# *DOCUMENTATION* # *DOCUMENTATION*

View File

@ -178,9 +178,6 @@
phy-mode = "rgmii"; phy-mode = "rgmii";
pinctrl-0 = <&pinctrl_rgmii1 &pinctrl_rgmii1_mdio_1>; pinctrl-0 = <&pinctrl_rgmii1 &pinctrl_rgmii1_mdio_1>;
snps,phy-bus-name = "stmmac";
snps,phy-bus-id = <0>;
snps,phy-addr = <0>;
snps,reset-gpio = <&pio0 7 0>; snps,reset-gpio = <&pio0 7 0>;
snps,reset-active-low; snps,reset-active-low;
snps,reset-delays-us = <0 10000 1000000>; snps,reset-delays-us = <0 10000 1000000>;

View File

@ -46,7 +46,7 @@
/* DAC */ /* DAC */
format = "i2s"; format = "i2s";
mclk-fs = <256>; mclk-fs = <256>;
frame-inversion = <1>; frame-inversion;
cpu { cpu {
sound-dai = <&sti_uni_player2>; sound-dai = <&sti_uni_player2>;
}; };

View File

@ -11,8 +11,6 @@ CONFIG_SLAB=y
CONFIG_MODULES=y CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y CONFIG_ARCH_PXA=y
CONFIG_ARCH_GUMSTIX=y CONFIG_ARCH_GUMSTIX=y
CONFIG_PCCARD=y CONFIG_PCCARD=y

View File

@ -25,7 +25,6 @@ CONFIG_EMBEDDED=y
CONFIG_PROFILING=y CONFIG_PROFILING=y
CONFIG_MODULES=y CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
# CONFIG_IOSCHED_DEADLINE is not set
CONFIG_ARCH_AXXIA=y CONFIG_ARCH_AXXIA=y
CONFIG_GPIO_PCA953X=y CONFIG_GPIO_PCA953X=y
CONFIG_ARM_LPAE=y CONFIG_ARM_LPAE=y

View File

@ -7,7 +7,6 @@ CONFIG_EMBEDDED=y
CONFIG_SLOB=y CONFIG_SLOB=y
CONFIG_JUMP_LABEL=y CONFIG_JUMP_LABEL=y
CONFIG_PARTITION_ADVANCED=y CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_CLPS711X=y CONFIG_ARCH_CLPS711X=y
CONFIG_ARCH_AUTCPU12=y CONFIG_ARCH_AUTCPU12=y
CONFIG_ARCH_CDB89712=y CONFIG_ARCH_CDB89712=y

View File

@ -17,7 +17,7 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y CONFIG_MODVERSIONS=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
CONFIG_IOSCHED_CFQ=m CONFIG_IOSCHED_BFQ=m
CONFIG_ARCH_MULTI_V6=y CONFIG_ARCH_MULTI_V6=y
#CONFIG_ARCH_MULTI_V7 is not set #CONFIG_ARCH_MULTI_V7 is not set
CONFIG_ARCH_CNS3XXX=y CONFIG_ARCH_CNS3XXX=y

View File

@ -43,7 +43,6 @@ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_MON=y CONFIG_USB_MON=y
CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE=y
CONFIG_MMC=y CONFIG_MMC=y
# CONFIG_MMC_BLOCK_BOUNCE is not set
CONFIG_MMC_PXA=y CONFIG_MMC_PXA=y
CONFIG_EXT3_FS=y CONFIG_EXT3_FS=y
CONFIG_NFS_FS=y CONFIG_NFS_FS=y

View File

@ -7,8 +7,6 @@ CONFIG_EXPERT=y
# CONFIG_BASE_FULL is not set # CONFIG_BASE_FULL is not set
# CONFIG_EPOLL is not set # CONFIG_EPOLL is not set
CONFIG_SLOB=y CONFIG_SLOB=y
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_SA1100=y CONFIG_ARCH_SA1100=y
CONFIG_SA1100_COLLIE=y CONFIG_SA1100_COLLIE=y
CONFIG_PCCARD=y CONFIG_PCCARD=y

View File

@ -15,8 +15,6 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y CONFIG_MODVERSIONS=y
CONFIG_PARTITION_ADVANCED=y CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_MULTIPLATFORM=y CONFIG_ARCH_MULTIPLATFORM=y
CONFIG_ARCH_MULTI_V7=n CONFIG_ARCH_MULTI_V7=n
CONFIG_ARCH_MULTI_V5=y CONFIG_ARCH_MULTI_V5=y

View File

@ -12,8 +12,6 @@ CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_SLUB_DEBUG is not set # CONFIG_SLUB_DEBUG is not set
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
# CONFIG_MMU is not set # CONFIG_MMU is not set
CONFIG_ARM_SINGLE_ARMV7M=y CONFIG_ARM_SINGLE_ARMV7M=y
CONFIG_ARCH_EFM32=y CONFIG_ARCH_EFM32=y

View File

@ -11,7 +11,6 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_EP93XX=y CONFIG_ARCH_EP93XX=y
CONFIG_CRUNCH=y CONFIG_CRUNCH=y
CONFIG_MACH_ADSSPHERE=y CONFIG_MACH_ADSSPHERE=y

View File

@ -9,8 +9,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y CONFIG_ARCH_PXA=y
CONFIG_ARCH_PXA_ESERIES=y CONFIG_ARCH_PXA_ESERIES=y
# CONFIG_ARM_THUMB is not set # CONFIG_ARM_THUMB is not set

View File

@ -14,7 +14,6 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y CONFIG_MODVERSIONS=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y CONFIG_ARCH_PXA=y
CONFIG_PXA_EZX=y CONFIG_PXA_EZX=y
CONFIG_NO_HZ=y CONFIG_NO_HZ=y

View File

@ -5,8 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y CONFIG_BLK_DEV_INITRD=y
CONFIG_MODULES=y CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_SA1100=y CONFIG_ARCH_SA1100=y
CONFIG_SA1100_H3600=y CONFIG_SA1100_H3600=y
CONFIG_PCCARD=y CONFIG_PCCARD=y

View File

@ -10,7 +10,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y CONFIG_ARCH_PXA=y
CONFIG_MACH_H5000=y CONFIG_MACH_H5000=y
CONFIG_AEABI=y CONFIG_AEABI=y

View File

@ -13,7 +13,6 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y CONFIG_MODVERSIONS=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y CONFIG_ARCH_PXA=y
CONFIG_MACH_INTELMOTE2=y CONFIG_MACH_INTELMOTE2=y
CONFIG_NO_HZ=y CONFIG_NO_HZ=y

View File

@ -32,8 +32,6 @@ CONFIG_KPROBES=y
CONFIG_MODULES=y CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_NET=y CONFIG_NET=y
CONFIG_PACKET=y CONFIG_PACKET=y
CONFIG_UNIX=y CONFIG_UNIX=y

View File

@ -1,4 +1,3 @@
CONFIG_CROSS_COMPILE="arm-linux-gnueabihf-"
CONFIG_HIGH_RES_TIMERS=y CONFIG_HIGH_RES_TIMERS=y
CONFIG_PREEMPT=y CONFIG_PREEMPT=y
CONFIG_BLK_DEV_INITRD=y CONFIG_BLK_DEV_INITRD=y
@ -28,10 +27,7 @@ CONFIG_FLASH_SIZE=0x00080000
CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_TEXT=0x0
CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_ZBOOT_ROM_BSS=0x0
CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_APPENDED_DTB=y
# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_BINFMT_FLAT=y CONFIG_BINFMT_FLAT=y
CONFIG_BINFMT_ZFLAT=y CONFIG_BINFMT_ZFLAT=y
CONFIG_BINFMT_SHARED_FLAT=y CONFIG_BINFMT_SHARED_FLAT=y

View File

@ -9,8 +9,6 @@ CONFIG_SLAB=y
CONFIG_MODULES=y CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y CONFIG_ARCH_PXA=y
CONFIG_MACH_H4700=y CONFIG_MACH_H4700=y
CONFIG_MACH_MAGICIAN=y CONFIG_MACH_MAGICIAN=y

View File

@ -15,7 +15,6 @@ CONFIG_EMBEDDED=y
# CONFIG_SLUB_DEBUG is not set # CONFIG_SLUB_DEBUG is not set
# CONFIG_COMPAT_BRK is not set # CONFIG_COMPAT_BRK is not set
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
CONFIG_ARCH_MULTI_V4=y CONFIG_ARCH_MULTI_V4=y
# CONFIG_ARCH_MULTI_V7 is not set # CONFIG_ARCH_MULTI_V7 is not set
CONFIG_ARCH_MOXART=y CONFIG_ARCH_MOXART=y

View File

@ -25,8 +25,6 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y CONFIG_MODVERSIONS=y
CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_INTEGRITY=y
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_NET=y CONFIG_NET=y
CONFIG_PACKET=y CONFIG_PACKET=y
CONFIG_UNIX=y CONFIG_UNIX=y

View File

@ -18,8 +18,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_OMAP=y CONFIG_ARCH_OMAP=y
CONFIG_ARCH_OMAP1=y CONFIG_ARCH_OMAP1=y
CONFIG_OMAP_RESET_CLOCKS=y CONFIG_OMAP_RESET_CLOCKS=y

View File

@ -7,8 +7,6 @@ CONFIG_SLAB=y
CONFIG_MODULES=y CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y CONFIG_ARCH_PXA=y
CONFIG_ARCH_PXA_PALM=y CONFIG_ARCH_PXA_PALM=y
# CONFIG_MACH_PALMTX is not set # CONFIG_MACH_PALMTX is not set

View File

@ -13,8 +13,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y CONFIG_ARCH_PXA=y
CONFIG_MACH_PCM027=y CONFIG_MACH_PCM027=y
CONFIG_MACH_PCM990_BASEBOARD=y CONFIG_MACH_PCM990_BASEBOARD=y

View File

@ -6,8 +6,6 @@ CONFIG_EXPERT=y
# CONFIG_HOTPLUG is not set # CONFIG_HOTPLUG is not set
# CONFIG_SHMEM is not set # CONFIG_SHMEM is not set
CONFIG_MODULES=y CONFIG_MODULES=y
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_SA1100=y CONFIG_ARCH_SA1100=y
CONFIG_SA1100_PLEB=y CONFIG_SA1100_PLEB=y
CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_TEXT=0x0

View File

@ -8,7 +8,6 @@ CONFIG_SLAB=y
CONFIG_MODULES=y CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_MULTI_V6=y CONFIG_ARCH_MULTI_V6=y
CONFIG_ARCH_REALVIEW=y CONFIG_ARCH_REALVIEW=y
CONFIG_MACH_REALVIEW_EB=y CONFIG_MACH_REALVIEW_EB=y

View File

@ -14,8 +14,6 @@ CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_AT91=y CONFIG_ARCH_AT91=y
CONFIG_SOC_SAMA5D2=y CONFIG_SOC_SAMA5D2=y
CONFIG_SOC_SAMA5D3=y CONFIG_SOC_SAMA5D3=y
@ -182,7 +180,6 @@ CONFIG_USB_GADGET=y
CONFIG_USB_ATMEL_USBA=y CONFIG_USB_ATMEL_USBA=y
CONFIG_USB_G_SERIAL=y CONFIG_USB_G_SERIAL=y
CONFIG_MMC=y CONFIG_MMC=y
# CONFIG_MMC_BLOCK_BOUNCE is not set
CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_SDHCI_OF_AT91=y CONFIG_MMC_SDHCI_OF_AT91=y

View File

@ -14,8 +14,6 @@ CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_SLUB_DEBUG is not set # CONFIG_SLUB_DEBUG is not set
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
# CONFIG_MMU is not set # CONFIG_MMU is not set
CONFIG_ARCH_STM32=y CONFIG_ARCH_STM32=y
CONFIG_CPU_V7M_NUM_IRQ=240 CONFIG_CPU_V7M_NUM_IRQ=240

View File

@ -85,6 +85,7 @@ CONFIG_BATTERY_AXP20X=y
CONFIG_AXP20X_POWER=y CONFIG_AXP20X_POWER=y
CONFIG_THERMAL=y CONFIG_THERMAL=y
CONFIG_CPU_THERMAL=y CONFIG_CPU_THERMAL=y
CONFIG_SUN8I_THERMAL=y
CONFIG_WATCHDOG=y CONFIG_WATCHDOG=y
CONFIG_SUNXI_WATCHDOG=y CONFIG_SUNXI_WATCHDOG=y
CONFIG_MFD_AC100=y CONFIG_MFD_AC100=y

View File

@ -11,7 +11,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_CFQ is not set
# CONFIG_ARCH_MULTI_V7 is not set # CONFIG_ARCH_MULTI_V7 is not set
CONFIG_ARCH_U300=y CONFIG_ARCH_U300=y
CONFIG_MACH_U300_SPIDUMMY=y CONFIG_MACH_U300_SPIDUMMY=y
@ -46,7 +45,6 @@ CONFIG_FB=y
CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_BACKLIGHT_CLASS_DEVICE=y
# CONFIG_USB_SUPPORT is not set # CONFIG_USB_SUPPORT is not set
CONFIG_MMC=y CONFIG_MMC=y
# CONFIG_MMC_BLOCK_BOUNCE is not set
CONFIG_MMC_ARMMMCI=y CONFIG_MMC_ARMMMCI=y
CONFIG_RTC_CLASS=y CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set # CONFIG_RTC_HCTOSYS is not set

View File

@ -15,8 +15,6 @@ CONFIG_OPROFILE=y
CONFIG_MODULES=y CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_VEXPRESS=y CONFIG_ARCH_VEXPRESS=y
CONFIG_ARCH_VEXPRESS_DCSCB=y CONFIG_ARCH_VEXPRESS_DCSCB=y
CONFIG_ARCH_VEXPRESS_TC2_PM=y CONFIG_ARCH_VEXPRESS_TC2_PM=y

View File

@ -9,7 +9,6 @@ CONFIG_SLAB=y
CONFIG_MODULES=y CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y CONFIG_ARCH_PXA=y
CONFIG_ARCH_VIPER=y CONFIG_ARCH_VIPER=y
CONFIG_IWMMXT=y CONFIG_IWMMXT=y

View File

@ -4,7 +4,6 @@ CONFIG_LOG_BUF_SHIFT=13
CONFIG_MODULES=y CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y CONFIG_ARCH_PXA=y
CONFIG_MACH_ARCOM_ZEUS=y CONFIG_MACH_ARCOM_ZEUS=y
CONFIG_PCCARD=m CONFIG_PCCARD=m
@ -137,7 +136,6 @@ CONFIG_USB_MASS_STORAGE=m
CONFIG_USB_G_SERIAL=m CONFIG_USB_G_SERIAL=m
CONFIG_USB_G_PRINTER=m CONFIG_USB_G_PRINTER=m
CONFIG_MMC=y CONFIG_MMC=y
# CONFIG_MMC_BLOCK_BOUNCE is not set
CONFIG_MMC_PXA=y CONFIG_MMC_PXA=y
CONFIG_NEW_LEDS=y CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=m CONFIG_LEDS_CLASS=m

View File

@ -16,7 +16,6 @@ CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y CONFIG_PERF_EVENTS=y
CONFIG_SLAB=y CONFIG_SLAB=y
# CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_ZX=y CONFIG_ARCH_ZX=y
CONFIG_SOC_ZX296702=y CONFIG_SOC_ZX296702=y
# CONFIG_SWP_EMULATE is not set # CONFIG_SWP_EMULATE is not set

View File

@ -78,13 +78,10 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old,
{ {
unsigned long replaced; unsigned long replaced;
if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
old = __opcode_to_mem_thumb32(old); old = __opcode_to_mem_thumb32(old);
new = __opcode_to_mem_thumb32(new); else
} else {
old = __opcode_to_mem_arm(old); old = __opcode_to_mem_arm(old);
new = __opcode_to_mem_arm(new);
}
if (validate) { if (validate) {
if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE)) if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE))

View File

@ -16,10 +16,10 @@ struct patch {
unsigned int insn; unsigned int insn;
}; };
#ifdef CONFIG_MMU
static DEFINE_RAW_SPINLOCK(patch_lock); static DEFINE_RAW_SPINLOCK(patch_lock);
static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
__acquires(&patch_lock)
{ {
unsigned int uintaddr = (uintptr_t) addr; unsigned int uintaddr = (uintptr_t) addr;
bool module = !core_kernel_text(uintaddr); bool module = !core_kernel_text(uintaddr);
@ -34,8 +34,6 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
if (flags) if (flags)
raw_spin_lock_irqsave(&patch_lock, *flags); raw_spin_lock_irqsave(&patch_lock, *flags);
else
__acquire(&patch_lock);
set_fixmap(fixmap, page_to_phys(page)); set_fixmap(fixmap, page_to_phys(page));
@ -43,15 +41,19 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
} }
static void __kprobes patch_unmap(int fixmap, unsigned long *flags) static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
__releases(&patch_lock)
{ {
clear_fixmap(fixmap); clear_fixmap(fixmap);
if (flags) if (flags)
raw_spin_unlock_irqrestore(&patch_lock, *flags); raw_spin_unlock_irqrestore(&patch_lock, *flags);
else
__release(&patch_lock);
} }
#else
static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
{
return addr;
}
static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { }
#endif
void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap) void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
{ {
@ -64,8 +66,6 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
if (remap) if (remap)
waddr = patch_map(addr, FIX_TEXT_POKE0, &flags); waddr = patch_map(addr, FIX_TEXT_POKE0, &flags);
else
__acquire(&patch_lock);
if (thumb2 && __opcode_is_thumb16(insn)) { if (thumb2 && __opcode_is_thumb16(insn)) {
*(u16 *)waddr = __opcode_to_mem_thumb16(insn); *(u16 *)waddr = __opcode_to_mem_thumb16(insn);
@ -102,8 +102,7 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
if (waddr != addr) { if (waddr != addr) {
flush_kernel_vmap_range(waddr, twopage ? size / 2 : size); flush_kernel_vmap_range(waddr, twopage ? size / 2 : size);
patch_unmap(FIX_TEXT_POKE0, &flags); patch_unmap(FIX_TEXT_POKE0, &flags);
} else }
__release(&patch_lock);
flush_icache_range((uintptr_t)(addr), flush_icache_range((uintptr_t)(addr),
(uintptr_t)(addr) + size); (uintptr_t)(addr) + size);

View File

@ -11,7 +11,7 @@ config ARCH_NPCM7XX
depends on ARCH_MULTI_V7 depends on ARCH_MULTI_V7
select PINCTRL_NPCM7XX select PINCTRL_NPCM7XX
select NPCM7XX_TIMER select NPCM7XX_TIMER
select ARCH_REQUIRE_GPIOLIB select GPIOLIB
select CACHE_L2X0 select CACHE_L2X0
select ARM_GIC select ARM_GIC
select HAVE_ARM_TWD if SMP select HAVE_ARM_TWD if SMP

View File

@ -161,10 +161,10 @@
bus-range = <0x0 0x1>; bus-range = <0x0 0x1>;
reg = <0x0 0x40000000 0x0 0x10000000>; reg = <0x0 0x40000000 0x0 0x10000000>;
ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>; ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>;
interrupt-map = <0 0 0 1 &gic GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
<0 0 0 2 &gic GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, <0 0 0 2 &gic 0 0 GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>,
<0 0 0 3 &gic GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>, <0 0 0 3 &gic 0 0 GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>,
<0 0 0 4 &gic GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>; <0 0 0 4 &gic 0 0 GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>;
interrupt-map-mask = <0x0 0x0 0x0 0x7>; interrupt-map-mask = <0x0 0x0 0x0 0x7>;
msi-map = <0x0 &its 0x0 0x10000>; msi-map = <0x0 &its 0x0 0x10000>;
iommu-map = <0x0 &smmu 0x0 0x10000>; iommu-map = <0x0 &smmu 0x0 0x10000>;

View File

@ -452,6 +452,7 @@ CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
CONFIG_CPU_THERMAL=y CONFIG_CPU_THERMAL=y
CONFIG_THERMAL_EMULATION=y CONFIG_THERMAL_EMULATION=y
CONFIG_QORIQ_THERMAL=m CONFIG_QORIQ_THERMAL=m
CONFIG_SUN8I_THERMAL=y
CONFIG_ROCKCHIP_THERMAL=m CONFIG_ROCKCHIP_THERMAL=m
CONFIG_RCAR_THERMAL=y CONFIG_RCAR_THERMAL=y
CONFIG_RCAR_GEN3_THERMAL=y CONFIG_RCAR_GEN3_THERMAL=y
@ -547,6 +548,7 @@ CONFIG_ROCKCHIP_DW_MIPI_DSI=y
CONFIG_ROCKCHIP_INNO_HDMI=y CONFIG_ROCKCHIP_INNO_HDMI=y
CONFIG_DRM_RCAR_DU=m CONFIG_DRM_RCAR_DU=m
CONFIG_DRM_SUN4I=m CONFIG_DRM_SUN4I=m
CONFIG_DRM_SUN6I_DSI=m
CONFIG_DRM_SUN8I_DW_HDMI=m CONFIG_DRM_SUN8I_DW_HDMI=m
CONFIG_DRM_SUN8I_MIXER=m CONFIG_DRM_SUN8I_MIXER=m
CONFIG_DRM_MSM=m CONFIG_DRM_MSM=m
@ -681,7 +683,7 @@ CONFIG_RTC_DRV_SNVS=m
CONFIG_RTC_DRV_IMX_SC=m CONFIG_RTC_DRV_IMX_SC=m
CONFIG_RTC_DRV_XGENE=y CONFIG_RTC_DRV_XGENE=y
CONFIG_DMADEVICES=y CONFIG_DMADEVICES=y
CONFIG_DMA_BCM2835=m CONFIG_DMA_BCM2835=y
CONFIG_DMA_SUN6I=m CONFIG_DMA_SUN6I=m
CONFIG_FSL_EDMA=y CONFIG_FSL_EDMA=y
CONFIG_IMX_SDMA=y CONFIG_IMX_SDMA=y

View File

@ -33,7 +33,6 @@ static inline u32 disr_to_esr(u64 disr)
asmlinkage void enter_from_user_mode(void); asmlinkage void enter_from_user_mode(void);
void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
void do_undefinstr(struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs);
asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr); asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
@ -47,7 +46,4 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
void do_cp15instr(unsigned int esr, struct pt_regs *regs); void do_cp15instr(unsigned int esr, struct pt_regs *regs);
void do_el0_svc(struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs);
void do_el0_svc_compat(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs);
void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr,
struct pt_regs *regs);
#endif /* __ASM_EXCEPTION_H */ #endif /* __ASM_EXCEPTION_H */

View File

@ -6,7 +6,7 @@
#ifdef CONFIG_ARM64_LSE_ATOMICS #ifdef CONFIG_ARM64_LSE_ATOMICS
#define __LSE_PREAMBLE ".arch armv8-a+lse\n" #define __LSE_PREAMBLE ".arch_extension lse\n"
#include <linux/compiler_types.h> #include <linux/compiler_types.h>
#include <linux/export.h> #include <linux/export.h>

View File

@ -213,7 +213,7 @@ static inline unsigned long kaslr_offset(void)
((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55))
#define untagged_addr(addr) ({ \ #define untagged_addr(addr) ({ \
u64 __addr = (__force u64)addr; \ u64 __addr = (__force u64)(addr); \
__addr &= __untagged_addr(__addr); \ __addr &= __untagged_addr(__addr); \
(__force __typeof__(addr))__addr; \ (__force __typeof__(addr))__addr; \
}) })

View File

@ -18,6 +18,10 @@
* See: * See:
* https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net * https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net
*/ */
#define vcpu_is_preempted(cpu) false #define vcpu_is_preempted vcpu_is_preempted
static inline bool vcpu_is_preempted(int cpu)
{
return false;
}
#endif /* __ASM_SPINLOCK_H */ #endif /* __ASM_SPINLOCK_H */

View File

@ -11,6 +11,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/types.h> #include <linux/types.h>
#include <asm/archrandom.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
#include <asm/kernel-pgtable.h> #include <asm/kernel-pgtable.h>

View File

@ -466,6 +466,13 @@ static void ssbs_thread_switch(struct task_struct *next)
if (unlikely(next->flags & PF_KTHREAD)) if (unlikely(next->flags & PF_KTHREAD))
return; return;
/*
* If all CPUs implement the SSBS extension, then we just need to
* context-switch the PSTATE field.
*/
if (cpu_have_feature(cpu_feature(SSBS)))
return;
/* If the mitigation is enabled, then we leave SSBS clear. */ /* If the mitigation is enabled, then we leave SSBS clear. */
if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
test_tsk_thread_flag(next, TIF_SSBD)) test_tsk_thread_flag(next, TIF_SSBD))
@ -608,8 +615,6 @@ long get_tagged_addr_ctrl(void)
* only prevents the tagged address ABI enabling via prctl() and does not * only prevents the tagged address ABI enabling via prctl() and does not
* disable it for tasks that already opted in to the relaxed ABI. * disable it for tasks that already opted in to the relaxed ABI.
*/ */
static int zero;
static int one = 1;
static struct ctl_table tagged_addr_sysctl_table[] = { static struct ctl_table tagged_addr_sysctl_table[] = {
{ {
@ -618,8 +623,8 @@ static struct ctl_table tagged_addr_sysctl_table[] = {
.data = &tagged_addr_disabled, .data = &tagged_addr_disabled,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.proc_handler = proc_dointvec_minmax, .proc_handler = proc_dointvec_minmax,
.extra1 = &zero, .extra1 = SYSCTL_ZERO,
.extra2 = &one, .extra2 = SYSCTL_ONE,
}, },
{ } { }
}; };

View File

@ -23,7 +23,7 @@
#include <linux/irq.h> #include <linux/irq.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/clocksource.h> #include <linux/clocksource.h>
#include <linux/clk-provider.h> #include <linux/of_clk.h>
#include <linux/acpi.h> #include <linux/acpi.h>
#include <clocksource/arm_arch_timer.h> #include <clocksource/arm_arch_timer.h>

View File

@ -9,7 +9,6 @@ config CSKY
select ARCH_USE_QUEUED_RWLOCKS if NR_CPUS>2 select ARCH_USE_QUEUED_RWLOCKS if NR_CPUS>2
select COMMON_CLK select COMMON_CLK
select CLKSRC_MMIO select CLKSRC_MMIO
select CLKSRC_OF
select CSKY_MPINTC if CPU_CK860 select CSKY_MPINTC if CPU_CK860
select CSKY_MP_TIMER if CPU_CK860 select CSKY_MP_TIMER if CPU_CK860
select CSKY_APB_INTC select CSKY_APB_INTC
@ -37,6 +36,7 @@ config CSKY
select GX6605S_TIMER if CPU_CK610 select GX6605S_TIMER if CPU_CK610
select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_AUDITSYSCALL
select HAVE_COPY_THREAD_TLS
select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER
@ -47,8 +47,8 @@ config CSKY
select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS
select HAVE_PERF_REGS select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP select HAVE_PERF_USER_STACK_DUMP
select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS select HAVE_DMA_CONTIGUOUS
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS select HAVE_SYSCALL_TRACEPOINTS
select MAY_HAVE_SPARSE_IRQ select MAY_HAVE_SPARSE_IRQ
select MODULES_USE_ELF_RELA if MODULES select MODULES_USE_ELF_RELA if MODULES
@ -59,6 +59,11 @@ config CSKY
select TIMER_OF select TIMER_OF
select USB_ARCH_HAS_EHCI select USB_ARCH_HAS_EHCI
select USB_ARCH_HAS_OHCI select USB_ARCH_HAS_OHCI
select GENERIC_PCI_IOMAP
select HAVE_PCI
select PCI_DOMAINS_GENERIC if PCI
select PCI_SYSCALL if PCI
select PCI_MSI if PCI
config CPU_HAS_CACHEV2 config CPU_HAS_CACHEV2
bool bool
@ -75,7 +80,7 @@ config CPU_HAS_TLBI
config CPU_HAS_LDSTEX config CPU_HAS_LDSTEX
bool bool
help help
For SMP, CPU needs "ldex&stex" instrcutions to atomic operations. For SMP, CPU needs "ldex&stex" instructions for atomic operations.
config CPU_NEED_TLBSYNC config CPU_NEED_TLBSYNC
bool bool
@ -188,6 +193,40 @@ config CPU_PM_STOP
bool "stop" bool "stop"
endchoice endchoice
menuconfig HAVE_TCM
bool "Tightly-Coupled/Sram Memory"
select GENERIC_ALLOCATOR
help
The implementation are not only used by TCM (Tightly-Coupled Meory)
but also used by sram on SOC bus. It follow existed linux tcm
software interface, so that old tcm application codes could be
re-used directly.
if HAVE_TCM
config ITCM_RAM_BASE
hex "ITCM ram base"
default 0xffffffff
config ITCM_NR_PAGES
int "Page count of ITCM size: NR*4KB"
range 1 256
default 32
config HAVE_DTCM
bool "DTCM Support"
config DTCM_RAM_BASE
hex "DTCM ram base"
depends on HAVE_DTCM
default 0xffffffff
config DTCM_NR_PAGES
int "Page count of DTCM size: NR*4KB"
depends on HAVE_DTCM
range 1 256
default 32
endif
config CPU_HAS_VDSP config CPU_HAS_VDSP
bool "CPU has VDSP coprocessor" bool "CPU has VDSP coprocessor"
depends on CPU_HAS_FPU && CPU_HAS_FPUV2 depends on CPU_HAS_FPU && CPU_HAS_FPUV2
@ -196,6 +235,10 @@ config CPU_HAS_FPU
bool "CPU has FPU coprocessor" bool "CPU has FPU coprocessor"
depends on CPU_CK807 || CPU_CK810 || CPU_CK860 depends on CPU_CK807 || CPU_CK810 || CPU_CK860
config CPU_HAS_ICACHE_INS
bool "CPU has Icache invalidate instructions"
depends on CPU_HAS_CACHEV2
config CPU_HAS_TEE config CPU_HAS_TEE
bool "CPU has Trusted Execution Environment" bool "CPU has Trusted Execution Environment"
depends on CPU_CK810 depends on CPU_CK810
@ -235,4 +278,6 @@ config HOTPLUG_CPU
Say N if you want to disable CPU hotplug. Say N if you want to disable CPU hotplug.
endmenu endmenu
source "arch/csky/Kconfig.platforms"
source "kernel/Kconfig.hz" source "kernel/Kconfig.hz"

View File

@ -0,0 +1,9 @@
menu "Platform drivers selection"
config ARCH_CSKY_DW_APB_ICTL
bool "Select dw-apb interrupt controller"
select DW_APB_ICTL
default y
help
This enables support for snps dw-apb-ictl
endmenu

View File

@ -48,9 +48,8 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, u
#define flush_icache_page(vma, page) do {} while (0); #define flush_icache_page(vma, page) do {} while (0);
#define flush_icache_range(start, end) cache_wbinv_range(start, end) #define flush_icache_range(start, end) cache_wbinv_range(start, end)
#define flush_icache_mm_range(mm, start, end) cache_wbinv_range(start, end)
#define flush_icache_user_range(vma,page,addr,len) \ #define flush_icache_deferred(mm) do {} while (0);
flush_dcache_page(page)
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ #define copy_from_user_page(vma, page, vaddr, dst, src, len) \
do { \ do { \

View File

@ -16,14 +16,16 @@
#define LSAVE_A4 40 #define LSAVE_A4 40
#define LSAVE_A5 44 #define LSAVE_A5 44
#define usp ss1
.macro USPTOKSP .macro USPTOKSP
mtcr sp, ss1 mtcr sp, usp
mfcr sp, ss0 mfcr sp, ss0
.endm .endm
.macro KSPTOUSP .macro KSPTOUSP
mtcr sp, ss0 mtcr sp, ss0
mfcr sp, ss1 mfcr sp, usp
.endm .endm
.macro SAVE_ALL epc_inc .macro SAVE_ALL epc_inc
@ -45,7 +47,13 @@
add lr, r13 add lr, r13
stw lr, (sp, 8) stw lr, (sp, 8)
mov lr, sp
addi lr, 32
addi lr, 32
addi lr, 16
bt 2f
mfcr lr, ss1 mfcr lr, ss1
2:
stw lr, (sp, 16) stw lr, (sp, 16)
stw a0, (sp, 20) stw a0, (sp, 20)
@ -79,9 +87,10 @@
ldw a0, (sp, 12) ldw a0, (sp, 12)
mtcr a0, epsr mtcr a0, epsr
btsti a0, 31 btsti a0, 31
bt 1f
ldw a0, (sp, 16) ldw a0, (sp, 16)
mtcr a0, ss1 mtcr a0, ss1
1:
ldw a0, (sp, 24) ldw a0, (sp, 24)
ldw a1, (sp, 28) ldw a1, (sp, 28)
ldw a2, (sp, 32) ldw a2, (sp, 32)
@ -102,9 +111,9 @@
addi sp, 32 addi sp, 32
addi sp, 8 addi sp, 8
bt 1f bt 2f
KSPTOUSP KSPTOUSP
1: 2:
rte rte
.endm .endm

View File

@ -6,46 +6,80 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <asm/cache.h> #include <asm/cache.h>
void flush_icache_page(struct vm_area_struct *vma, struct page *page)
{
unsigned long start;
start = (unsigned long) kmap_atomic(page);
cache_wbinv_range(start, start + PAGE_SIZE);
kunmap_atomic((void *)start);
}
void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
unsigned long vaddr, int len)
{
unsigned long kaddr;
kaddr = (unsigned long) kmap_atomic(page) + (vaddr & ~PAGE_MASK);
cache_wbinv_range(kaddr, kaddr + len);
kunmap_atomic((void *)kaddr);
}
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t *pte) pte_t *pte)
{ {
unsigned long addr, pfn; unsigned long addr;
struct page *page; struct page *page;
pfn = pte_pfn(*pte); page = pfn_to_page(pte_pfn(*pte));
if (unlikely(!pfn_valid(pfn))) if (page == ZERO_PAGE(0))
return; return;
page = pfn_to_page(pfn); if (test_and_set_bit(PG_dcache_clean, &page->flags))
if (page == ZERO_PAGE(0))
return; return;
addr = (unsigned long) kmap_atomic(page); addr = (unsigned long) kmap_atomic(page);
cache_wbinv_range(addr, addr + PAGE_SIZE); dcache_wb_range(addr, addr + PAGE_SIZE);
if (vma->vm_flags & VM_EXEC)
icache_inv_range(addr, addr + PAGE_SIZE);
kunmap_atomic((void *) addr); kunmap_atomic((void *) addr);
} }
void flush_icache_deferred(struct mm_struct *mm)
{
unsigned int cpu = smp_processor_id();
cpumask_t *mask = &mm->context.icache_stale_mask;
if (cpumask_test_cpu(cpu, mask)) {
cpumask_clear_cpu(cpu, mask);
/*
* Ensure the remote hart's writes are visible to this hart.
* This pairs with a barrier in flush_icache_mm.
*/
smp_mb();
local_icache_inv_all(NULL);
}
}
void flush_icache_mm_range(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
unsigned int cpu;
cpumask_t others, *mask;
preempt_disable();
#ifdef CONFIG_CPU_HAS_ICACHE_INS
if (mm == current->mm) {
icache_inv_range(start, end);
preempt_enable();
return;
}
#endif
/* Mark every hart's icache as needing a flush for this MM. */
mask = &mm->context.icache_stale_mask;
cpumask_setall(mask);
/* Flush this hart's I$ now, and mark it as flushed. */
cpu = smp_processor_id();
cpumask_clear_cpu(cpu, mask);
local_icache_inv_all(NULL);
/*
* Flush the I$ of other harts concurrently executing, and mark them as
* flushed.
*/
cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
if (mm != current->active_mm || !cpumask_empty(&others)) {
on_each_cpu_mask(&others, local_icache_inv_all, NULL, 1);
cpumask_clear(mask);
}
preempt_enable();
}

View File

@ -13,24 +13,27 @@
#define flush_cache_all() do { } while (0) #define flush_cache_all() do { } while (0)
#define flush_cache_mm(mm) do { } while (0) #define flush_cache_mm(mm) do { } while (0)
#define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_dup_mm(mm) do { } while (0)
#define flush_cache_range(vma, start, end) do { } while (0)
#define flush_cache_range(vma, start, end) \
do { \
if (vma->vm_flags & VM_EXEC) \
icache_inv_all(); \
} while (0)
#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
#define flush_dcache_page(page) do { } while (0) #define PG_dcache_clean PG_arch_1
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
static inline void flush_dcache_page(struct page *page)
{
if (test_bit(PG_dcache_clean, &page->flags))
clear_bit(PG_dcache_clean, &page->flags);
}
#define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0)
#define flush_icache_page(vma, page) do { } while (0)
#define flush_icache_range(start, end) cache_wbinv_range(start, end) #define flush_icache_range(start, end) cache_wbinv_range(start, end)
void flush_icache_page(struct vm_area_struct *vma, struct page *page); void flush_icache_mm_range(struct mm_struct *mm,
void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, unsigned long start, unsigned long end);
unsigned long vaddr, int len); void flush_icache_deferred(struct mm_struct *mm);
#define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vmap(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0)
@ -38,7 +41,13 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
do { \ do { \
memcpy(dst, src, len); \ memcpy(dst, src, len); \
cache_wbinv_range((unsigned long)dst, (unsigned long)dst + len); \ if (vma->vm_flags & VM_EXEC) { \
dcache_wb_range((unsigned long)dst, \
(unsigned long)dst + len); \
flush_icache_mm_range(current->mm, \
(unsigned long)dst, \
(unsigned long)dst + len); \
} \
} while (0) } while (0)
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ #define copy_from_user_page(vma, page, vaddr, dst, src, len) \
memcpy(dst, src, len) memcpy(dst, src, len)

View File

@ -31,7 +31,13 @@
mfcr lr, epsr mfcr lr, epsr
stw lr, (sp, 12) stw lr, (sp, 12)
btsti lr, 31
bf 1f
addi lr, sp, 152
br 2f
1:
mfcr lr, usp mfcr lr, usp
2:
stw lr, (sp, 16) stw lr, (sp, 16)
stw a0, (sp, 20) stw a0, (sp, 20)
@ -64,8 +70,10 @@
mtcr a0, epc mtcr a0, epc
ldw a0, (sp, 12) ldw a0, (sp, 12)
mtcr a0, epsr mtcr a0, epsr
btsti a0, 31
ldw a0, (sp, 16) ldw a0, (sp, 16)
mtcr a0, usp mtcr a0, usp
mtcr a0, ss0
#ifdef CONFIG_CPU_HAS_HILO #ifdef CONFIG_CPU_HAS_HILO
ldw a0, (sp, 140) ldw a0, (sp, 140)
@ -86,6 +94,9 @@
addi sp, 40 addi sp, 40
ldm r16-r30, (sp) ldm r16-r30, (sp)
addi sp, 72 addi sp, 72
bf 1f
mfcr sp, ss0
1:
rte rte
.endm .endm

Some files were not shown because too many files have changed in this diff Show More