Merge 5.6-rc3 into usb-next
We need the USB fixes in here as well. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
commit
24e6aea480
2
COPYING
2
COPYING
@ -16,3 +16,5 @@ In addition, other licenses may also apply. Please see:
|
|||||||
Documentation/process/license-rules.rst
|
Documentation/process/license-rules.rst
|
||||||
|
|
||||||
for more details.
|
for more details.
|
||||||
|
|
||||||
|
All contributions to the Linux Kernel are subject to this COPYING file.
|
||||||
|
@ -129,7 +129,7 @@ this logic.
|
|||||||
|
|
||||||
As a single binary will need to support both 48-bit and 52-bit VA
|
As a single binary will need to support both 48-bit and 52-bit VA
|
||||||
spaces, the VMEMMAP must be sized large enough for 52-bit VAs and
|
spaces, the VMEMMAP must be sized large enough for 52-bit VAs and
|
||||||
also must be sized large enought to accommodate a fixed PAGE_OFFSET.
|
also must be sized large enough to accommodate a fixed PAGE_OFFSET.
|
||||||
|
|
||||||
Most code in the kernel should not need to consider the VA_BITS, for
|
Most code in the kernel should not need to consider the VA_BITS, for
|
||||||
code that does need to know the VA size the variables are
|
code that does need to know the VA size the variables are
|
||||||
|
@ -44,8 +44,15 @@ The AArch64 Tagged Address ABI has two stages of relaxation depending
|
|||||||
how the user addresses are used by the kernel:
|
how the user addresses are used by the kernel:
|
||||||
|
|
||||||
1. User addresses not accessed by the kernel but used for address space
|
1. User addresses not accessed by the kernel but used for address space
|
||||||
management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use
|
management (e.g. ``mprotect()``, ``madvise()``). The use of valid
|
||||||
of valid tagged pointers in this context is always allowed.
|
tagged pointers in this context is allowed with the exception of
|
||||||
|
``brk()``, ``mmap()`` and the ``new_address`` argument to
|
||||||
|
``mremap()`` as these have the potential to alias with existing
|
||||||
|
user addresses.
|
||||||
|
|
||||||
|
NOTE: This behaviour changed in v5.6 and so some earlier kernels may
|
||||||
|
incorrectly accept valid tagged pointers for the ``brk()``,
|
||||||
|
``mmap()`` and ``mremap()`` system calls.
|
||||||
|
|
||||||
2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
|
2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
|
||||||
relaxation is disabled by default and the application thread needs to
|
relaxation is disabled by default and the application thread needs to
|
||||||
|
@ -43,9 +43,13 @@ properties:
|
|||||||
- enum:
|
- enum:
|
||||||
- allwinner,sun8i-h3-tcon-tv
|
- allwinner,sun8i-h3-tcon-tv
|
||||||
- allwinner,sun50i-a64-tcon-tv
|
- allwinner,sun50i-a64-tcon-tv
|
||||||
- allwinner,sun50i-h6-tcon-tv
|
|
||||||
- const: allwinner,sun8i-a83t-tcon-tv
|
- const: allwinner,sun8i-a83t-tcon-tv
|
||||||
|
|
||||||
|
- items:
|
||||||
|
- enum:
|
||||||
|
- allwinner,sun50i-h6-tcon-tv
|
||||||
|
- const: allwinner,sun8i-r40-tcon-tv
|
||||||
|
|
||||||
reg:
|
reg:
|
||||||
maxItems: 1
|
maxItems: 1
|
||||||
|
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
Ilitek ILI210x/ILI2117/ILI251x touchscreen controller
|
Ilitek ILI210x/ILI2117/ILI2120/ILI251x touchscreen controller
|
||||||
|
|
||||||
Required properties:
|
Required properties:
|
||||||
- compatible:
|
- compatible:
|
||||||
ilitek,ili210x for ILI210x
|
ilitek,ili210x for ILI210x
|
||||||
ilitek,ili2117 for ILI2117
|
ilitek,ili2117 for ILI2117
|
||||||
|
ilitek,ili2120 for ILI2120
|
||||||
ilitek,ili251x for ILI251x
|
ilitek,ili251x for ILI251x
|
||||||
|
|
||||||
- reg: The I2C address of the device
|
- reg: The I2C address of the device
|
||||||
|
@ -33,24 +33,40 @@ properties:
|
|||||||
maxItems: 1
|
maxItems: 1
|
||||||
|
|
||||||
clocks:
|
clocks:
|
||||||
minItems: 2
|
oneOf:
|
||||||
maxItems: 3
|
- items:
|
||||||
items:
|
- description: The CSI interface clock
|
||||||
- description: The CSI interface clock
|
- description: The CSI DRAM clock
|
||||||
- description: The CSI ISP clock
|
|
||||||
- description: The CSI DRAM clock
|
- items:
|
||||||
|
- description: The CSI interface clock
|
||||||
|
- description: The CSI ISP clock
|
||||||
|
- description: The CSI DRAM clock
|
||||||
|
|
||||||
clock-names:
|
clock-names:
|
||||||
minItems: 2
|
oneOf:
|
||||||
maxItems: 3
|
- items:
|
||||||
items:
|
- const: bus
|
||||||
- const: bus
|
- const: ram
|
||||||
- const: isp
|
|
||||||
- const: ram
|
- items:
|
||||||
|
- const: bus
|
||||||
|
- const: isp
|
||||||
|
- const: ram
|
||||||
|
|
||||||
resets:
|
resets:
|
||||||
maxItems: 1
|
maxItems: 1
|
||||||
|
|
||||||
|
# FIXME: This should be made required eventually once every SoC will
|
||||||
|
# have the MBUS declared.
|
||||||
|
interconnects:
|
||||||
|
maxItems: 1
|
||||||
|
|
||||||
|
# FIXME: This should be made required eventually once every SoC will
|
||||||
|
# have the MBUS declared.
|
||||||
|
interconnect-names:
|
||||||
|
const: dma-mem
|
||||||
|
|
||||||
# See ./video-interfaces.txt for details
|
# See ./video-interfaces.txt for details
|
||||||
port:
|
port:
|
||||||
type: object
|
type: object
|
||||||
|
@ -347,6 +347,7 @@ examples:
|
|||||||
interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
|
interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
|
||||||
|
|
||||||
#iommu-cells = <1>;
|
#iommu-cells = <1>;
|
||||||
|
#reset-cells = <1>;
|
||||||
};
|
};
|
||||||
|
|
||||||
external-memory-controller@7001b000 {
|
external-memory-controller@7001b000 {
|
||||||
@ -363,20 +364,23 @@ examples:
|
|||||||
timing-0 {
|
timing-0 {
|
||||||
clock-frequency = <12750000>;
|
clock-frequency = <12750000>;
|
||||||
|
|
||||||
nvidia,emc-zcal-cnt-long = <0x00000042>;
|
|
||||||
nvidia,emc-auto-cal-interval = <0x001fffff>;
|
|
||||||
nvidia,emc-ctt-term-ctrl = <0x00000802>;
|
|
||||||
nvidia,emc-cfg = <0x73240000>;
|
|
||||||
nvidia,emc-cfg-2 = <0x000008c5>;
|
|
||||||
nvidia,emc-sel-dpd-ctrl = <0x00040128>;
|
|
||||||
nvidia,emc-bgbias-ctl0 = <0x00000008>;
|
|
||||||
nvidia,emc-auto-cal-config = <0xa1430000>;
|
nvidia,emc-auto-cal-config = <0xa1430000>;
|
||||||
nvidia,emc-auto-cal-config2 = <0x00000000>;
|
nvidia,emc-auto-cal-config2 = <0x00000000>;
|
||||||
nvidia,emc-auto-cal-config3 = <0x00000000>;
|
nvidia,emc-auto-cal-config3 = <0x00000000>;
|
||||||
nvidia,emc-mode-reset = <0x80001221>;
|
nvidia,emc-auto-cal-interval = <0x001fffff>;
|
||||||
|
nvidia,emc-bgbias-ctl0 = <0x00000008>;
|
||||||
|
nvidia,emc-cfg = <0x73240000>;
|
||||||
|
nvidia,emc-cfg-2 = <0x000008c5>;
|
||||||
|
nvidia,emc-ctt-term-ctrl = <0x00000802>;
|
||||||
nvidia,emc-mode-1 = <0x80100003>;
|
nvidia,emc-mode-1 = <0x80100003>;
|
||||||
nvidia,emc-mode-2 = <0x80200008>;
|
nvidia,emc-mode-2 = <0x80200008>;
|
||||||
nvidia,emc-mode-4 = <0x00000000>;
|
nvidia,emc-mode-4 = <0x00000000>;
|
||||||
|
nvidia,emc-mode-reset = <0x80001221>;
|
||||||
|
nvidia,emc-mrs-wait-cnt = <0x000e000e>;
|
||||||
|
nvidia,emc-sel-dpd-ctrl = <0x00040128>;
|
||||||
|
nvidia,emc-xm2dqspadctrl2 = <0x0130b118>;
|
||||||
|
nvidia,emc-zcal-cnt-long = <0x00000042>;
|
||||||
|
nvidia,emc-zcal-interval = <0x00000000>;
|
||||||
|
|
||||||
nvidia,emc-configuration = <
|
nvidia,emc-configuration = <
|
||||||
0x00000000 /* EMC_RC */
|
0x00000000 /* EMC_RC */
|
||||||
|
@ -124,7 +124,7 @@ not every application needs SDIO irq, e.g. MMC cards.
|
|||||||
pinctrl-1 = <&mmc1_idle>;
|
pinctrl-1 = <&mmc1_idle>;
|
||||||
pinctrl-2 = <&mmc1_sleep>;
|
pinctrl-2 = <&mmc1_sleep>;
|
||||||
...
|
...
|
||||||
interrupts-extended = <&intc 64 &gpio2 28 GPIO_ACTIVE_LOW>;
|
interrupts-extended = <&intc 64 &gpio2 28 IRQ_TYPE_LEVEL_LOW>;
|
||||||
};
|
};
|
||||||
|
|
||||||
mmc1_idle : pinmux_cirq_pin {
|
mmc1_idle : pinmux_cirq_pin {
|
||||||
|
@ -56,7 +56,6 @@ patternProperties:
|
|||||||
examples:
|
examples:
|
||||||
- |
|
- |
|
||||||
davinci_mdio: mdio@5c030000 {
|
davinci_mdio: mdio@5c030000 {
|
||||||
compatible = "ti,davinci_mdio";
|
|
||||||
reg = <0x5c030000 0x1000>;
|
reg = <0x5c030000 0x1000>;
|
||||||
#address-cells = <1>;
|
#address-cells = <1>;
|
||||||
#size-cells = <0>;
|
#size-cells = <0>;
|
||||||
|
@ -71,9 +71,13 @@ b) Example for device tree::
|
|||||||
ipmb@10 {
|
ipmb@10 {
|
||||||
compatible = "ipmb-dev";
|
compatible = "ipmb-dev";
|
||||||
reg = <0x10>;
|
reg = <0x10>;
|
||||||
|
i2c-protocol;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
If xmit of data to be done using raw i2c block vs smbus
|
||||||
|
then "i2c-protocol" needs to be defined as above.
|
||||||
|
|
||||||
2) Manually from Linux::
|
2) Manually from Linux::
|
||||||
|
|
||||||
modprobe ipmb-dev-int
|
modprobe ipmb-dev-int
|
||||||
|
@ -134,7 +134,7 @@ Sequential zone files can only be written sequentially, starting from the file
|
|||||||
end, that is, write operations can only be append writes. Zonefs makes no
|
end, that is, write operations can only be append writes. Zonefs makes no
|
||||||
attempt at accepting random writes and will fail any write request that has a
|
attempt at accepting random writes and will fail any write request that has a
|
||||||
start offset not corresponding to the end of the file, or to the end of the last
|
start offset not corresponding to the end of the file, or to the end of the last
|
||||||
write issued and still in-flight (for asynchrnous I/O operations).
|
write issued and still in-flight (for asynchronous I/O operations).
|
||||||
|
|
||||||
Since dirty page writeback by the page cache does not guarantee a sequential
|
Since dirty page writeback by the page cache does not guarantee a sequential
|
||||||
write pattern, zonefs prevents buffered writes and writeable shared mappings
|
write pattern, zonefs prevents buffered writes and writeable shared mappings
|
||||||
@ -142,7 +142,7 @@ on sequential files. Only direct I/O writes are accepted for these files.
|
|||||||
zonefs relies on the sequential delivery of write I/O requests to the device
|
zonefs relies on the sequential delivery of write I/O requests to the device
|
||||||
implemented by the block layer elevator. An elevator implementing the sequential
|
implemented by the block layer elevator. An elevator implementing the sequential
|
||||||
write feature for zoned block device (ELEVATOR_F_ZBD_SEQ_WRITE elevator feature)
|
write feature for zoned block device (ELEVATOR_F_ZBD_SEQ_WRITE elevator feature)
|
||||||
must be used. This type of elevator (e.g. mq-deadline) is the set by default
|
must be used. This type of elevator (e.g. mq-deadline) is set by default
|
||||||
for zoned block devices on device initialization.
|
for zoned block devices on device initialization.
|
||||||
|
|
||||||
There are no restrictions on the type of I/O used for read operations in
|
There are no restrictions on the type of I/O used for read operations in
|
||||||
@ -196,7 +196,7 @@ additional conditions that result in I/O errors.
|
|||||||
may still happen in the case of a partial failure of a very large direct I/O
|
may still happen in the case of a partial failure of a very large direct I/O
|
||||||
operation split into multiple BIOs/requests or asynchronous I/O operations.
|
operation split into multiple BIOs/requests or asynchronous I/O operations.
|
||||||
If one of the write request within the set of sequential write requests
|
If one of the write request within the set of sequential write requests
|
||||||
issued to the device fails, all write requests after queued after it will
|
issued to the device fails, all write requests queued after it will
|
||||||
become unaligned and fail.
|
become unaligned and fail.
|
||||||
|
|
||||||
* Delayed write errors: similarly to regular block devices, if the device side
|
* Delayed write errors: similarly to regular block devices, if the device side
|
||||||
@ -207,7 +207,7 @@ additional conditions that result in I/O errors.
|
|||||||
causing all data to be dropped after the sector that caused the error.
|
causing all data to be dropped after the sector that caused the error.
|
||||||
|
|
||||||
All I/O errors detected by zonefs are notified to the user with an error code
|
All I/O errors detected by zonefs are notified to the user with an error code
|
||||||
return for the system call that trigered or detected the error. The recovery
|
return for the system call that triggered or detected the error. The recovery
|
||||||
actions taken by zonefs in response to I/O errors depend on the I/O type (read
|
actions taken by zonefs in response to I/O errors depend on the I/O type (read
|
||||||
vs write) and on the reason for the error (bad sector, unaligned writes or zone
|
vs write) and on the reason for the error (bad sector, unaligned writes or zone
|
||||||
condition change).
|
condition change).
|
||||||
@ -222,7 +222,7 @@ condition change).
|
|||||||
* A zone condition change to read-only or offline also always triggers zonefs
|
* A zone condition change to read-only or offline also always triggers zonefs
|
||||||
I/O error recovery.
|
I/O error recovery.
|
||||||
|
|
||||||
Zonefs minimal I/O error recovery may change a file size and a file access
|
Zonefs minimal I/O error recovery may change a file size and file access
|
||||||
permissions.
|
permissions.
|
||||||
|
|
||||||
* File size changes:
|
* File size changes:
|
||||||
@ -237,7 +237,7 @@ permissions.
|
|||||||
A file size may also be reduced to reflect a delayed write error detected on
|
A file size may also be reduced to reflect a delayed write error detected on
|
||||||
fsync(): in this case, the amount of data effectively written in the zone may
|
fsync(): in this case, the amount of data effectively written in the zone may
|
||||||
be less than originally indicated by the file inode size. After such I/O
|
be less than originally indicated by the file inode size. After such I/O
|
||||||
error, zonefs always fixes a file inode size to reflect the amount of data
|
error, zonefs always fixes the file inode size to reflect the amount of data
|
||||||
persistently stored in the file zone.
|
persistently stored in the file zone.
|
||||||
|
|
||||||
* Access permission changes:
|
* Access permission changes:
|
||||||
@ -281,11 +281,11 @@ Further notes:
|
|||||||
permissions to read-only applies to all files. The file system is remounted
|
permissions to read-only applies to all files. The file system is remounted
|
||||||
read-only.
|
read-only.
|
||||||
* Access permission and file size changes due to the device transitioning zones
|
* Access permission and file size changes due to the device transitioning zones
|
||||||
to the offline condition are permanent. Remounting or reformating the device
|
to the offline condition are permanent. Remounting or reformatting the device
|
||||||
with mkfs.zonefs (mkzonefs) will not change back offline zone files to a good
|
with mkfs.zonefs (mkzonefs) will not change back offline zone files to a good
|
||||||
state.
|
state.
|
||||||
* File access permission changes to read-only due to the device transitioning
|
* File access permission changes to read-only due to the device transitioning
|
||||||
zones to the read-only condition are permanent. Remounting or reformating
|
zones to the read-only condition are permanent. Remounting or reformatting
|
||||||
the device will not re-enable file write access.
|
the device will not re-enable file write access.
|
||||||
* File access permission changes implied by the remount-ro, zone-ro and
|
* File access permission changes implied by the remount-ro, zone-ro and
|
||||||
zone-offline mount options are temporary for zones in a good condition.
|
zone-offline mount options are temporary for zones in a good condition.
|
||||||
@ -301,13 +301,13 @@ Mount options
|
|||||||
|
|
||||||
zonefs define the "errors=<behavior>" mount option to allow the user to specify
|
zonefs define the "errors=<behavior>" mount option to allow the user to specify
|
||||||
zonefs behavior in response to I/O errors, inode size inconsistencies or zone
|
zonefs behavior in response to I/O errors, inode size inconsistencies or zone
|
||||||
condition chages. The defined behaviors are as follow:
|
condition changes. The defined behaviors are as follow:
|
||||||
* remount-ro (default)
|
* remount-ro (default)
|
||||||
* zone-ro
|
* zone-ro
|
||||||
* zone-offline
|
* zone-offline
|
||||||
* repair
|
* repair
|
||||||
|
|
||||||
The I/O error actions defined for each behavior is detailed in the previous
|
The I/O error actions defined for each behavior are detailed in the previous
|
||||||
section.
|
section.
|
||||||
|
|
||||||
Zonefs User Space Tools
|
Zonefs User Space Tools
|
||||||
|
@ -24,6 +24,7 @@ This driver implements support for Infineon Multi-phase XDPE122 family
|
|||||||
dual loop voltage regulators.
|
dual loop voltage regulators.
|
||||||
The family includes XDPE12284 and XDPE12254 devices.
|
The family includes XDPE12284 and XDPE12254 devices.
|
||||||
The devices from this family complaint with:
|
The devices from this family complaint with:
|
||||||
|
|
||||||
- Intel VR13 and VR13HC rev 1.3, IMVP8 rev 1.2 and IMPVP9 rev 1.3 DC-DC
|
- Intel VR13 and VR13HC rev 1.3, IMVP8 rev 1.2 and IMPVP9 rev 1.3 DC-DC
|
||||||
converter specification.
|
converter specification.
|
||||||
- Intel SVID rev 1.9. protocol.
|
- Intel SVID rev 1.9. protocol.
|
||||||
|
@ -244,23 +244,23 @@ disclosure of a particular issue, unless requested by a response team or by
|
|||||||
an involved disclosed party. The current ambassadors list:
|
an involved disclosed party. The current ambassadors list:
|
||||||
|
|
||||||
============= ========================================================
|
============= ========================================================
|
||||||
ARM
|
ARM Grant Likely <grant.likely@arm.com>
|
||||||
AMD Tom Lendacky <tom.lendacky@amd.com>
|
AMD Tom Lendacky <tom.lendacky@amd.com>
|
||||||
IBM
|
IBM
|
||||||
Intel Tony Luck <tony.luck@intel.com>
|
Intel Tony Luck <tony.luck@intel.com>
|
||||||
Qualcomm Trilok Soni <tsoni@codeaurora.org>
|
Qualcomm Trilok Soni <tsoni@codeaurora.org>
|
||||||
|
|
||||||
Microsoft Sasha Levin <sashal@kernel.org>
|
Microsoft James Morris <jamorris@linux.microsoft.com>
|
||||||
VMware
|
VMware
|
||||||
Xen Andrew Cooper <andrew.cooper3@citrix.com>
|
Xen Andrew Cooper <andrew.cooper3@citrix.com>
|
||||||
|
|
||||||
Canonical Tyler Hicks <tyhicks@canonical.com>
|
Canonical John Johansen <john.johansen@canonical.com>
|
||||||
Debian Ben Hutchings <ben@decadent.org.uk>
|
Debian Ben Hutchings <ben@decadent.org.uk>
|
||||||
Oracle Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
Oracle Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||||
Red Hat Josh Poimboeuf <jpoimboe@redhat.com>
|
Red Hat Josh Poimboeuf <jpoimboe@redhat.com>
|
||||||
SUSE Jiri Kosina <jkosina@suse.cz>
|
SUSE Jiri Kosina <jkosina@suse.cz>
|
||||||
|
|
||||||
Amazon Peter Bowen <pzb@amzn.com>
|
Amazon
|
||||||
Google Kees Cook <keescook@chromium.org>
|
Google Kees Cook <keescook@chromium.org>
|
||||||
============= ========================================================
|
============= ========================================================
|
||||||
|
|
||||||
|
@ -183,7 +183,7 @@ CVE分配
|
|||||||
VMware
|
VMware
|
||||||
Xen Andrew Cooper <andrew.cooper3@citrix.com>
|
Xen Andrew Cooper <andrew.cooper3@citrix.com>
|
||||||
|
|
||||||
Canonical Tyler Hicks <tyhicks@canonical.com>
|
Canonical John Johansen <john.johansen@canonical.com>
|
||||||
Debian Ben Hutchings <ben@decadent.org.uk>
|
Debian Ben Hutchings <ben@decadent.org.uk>
|
||||||
Oracle Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
Oracle Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||||
Red Hat Josh Poimboeuf <jpoimboe@redhat.com>
|
Red Hat Josh Poimboeuf <jpoimboe@redhat.com>
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
|
==================
|
||||||
Guest halt polling
|
Guest halt polling
|
||||||
==================
|
==================
|
||||||
|
|
||||||
The cpuidle_haltpoll driver, with the haltpoll governor, allows
|
The cpuidle_haltpoll driver, with the haltpoll governor, allows
|
||||||
the guest vcpus to poll for a specified amount of time before
|
the guest vcpus to poll for a specified amount of time before
|
||||||
halting.
|
halting.
|
||||||
|
|
||||||
This provides the following benefits to host side polling:
|
This provides the following benefits to host side polling:
|
||||||
|
|
||||||
1) The POLL flag is set while polling is performed, which allows
|
1) The POLL flag is set while polling is performed, which allows
|
||||||
@ -29,18 +31,21 @@ Module Parameters
|
|||||||
The haltpoll governor has 5 tunable module parameters:
|
The haltpoll governor has 5 tunable module parameters:
|
||||||
|
|
||||||
1) guest_halt_poll_ns:
|
1) guest_halt_poll_ns:
|
||||||
|
|
||||||
Maximum amount of time, in nanoseconds, that polling is
|
Maximum amount of time, in nanoseconds, that polling is
|
||||||
performed before halting.
|
performed before halting.
|
||||||
|
|
||||||
Default: 200000
|
Default: 200000
|
||||||
|
|
||||||
2) guest_halt_poll_shrink:
|
2) guest_halt_poll_shrink:
|
||||||
|
|
||||||
Division factor used to shrink per-cpu guest_halt_poll_ns when
|
Division factor used to shrink per-cpu guest_halt_poll_ns when
|
||||||
wakeup event occurs after the global guest_halt_poll_ns.
|
wakeup event occurs after the global guest_halt_poll_ns.
|
||||||
|
|
||||||
Default: 2
|
Default: 2
|
||||||
|
|
||||||
3) guest_halt_poll_grow:
|
3) guest_halt_poll_grow:
|
||||||
|
|
||||||
Multiplication factor used to grow per-cpu guest_halt_poll_ns
|
Multiplication factor used to grow per-cpu guest_halt_poll_ns
|
||||||
when event occurs after per-cpu guest_halt_poll_ns
|
when event occurs after per-cpu guest_halt_poll_ns
|
||||||
but before global guest_halt_poll_ns.
|
but before global guest_halt_poll_ns.
|
||||||
@ -48,6 +53,7 @@ but before global guest_halt_poll_ns.
|
|||||||
Default: 2
|
Default: 2
|
||||||
|
|
||||||
4) guest_halt_poll_grow_start:
|
4) guest_halt_poll_grow_start:
|
||||||
|
|
||||||
The per-cpu guest_halt_poll_ns eventually reaches zero
|
The per-cpu guest_halt_poll_ns eventually reaches zero
|
||||||
in case of an idle system. This value sets the initial
|
in case of an idle system. This value sets the initial
|
||||||
per-cpu guest_halt_poll_ns when growing. This can
|
per-cpu guest_halt_poll_ns when growing. This can
|
||||||
@ -66,7 +72,7 @@ high once achieves global guest_halt_poll_ns value).
|
|||||||
|
|
||||||
Default: Y
|
Default: Y
|
||||||
|
|
||||||
The module parameters can be set from the debugfs files in:
|
The module parameters can be set from the debugfs files in::
|
||||||
|
|
||||||
/sys/module/haltpoll/parameters/
|
/sys/module/haltpoll/parameters/
|
||||||
|
|
||||||
@ -74,5 +80,5 @@ Further Notes
|
|||||||
=============
|
=============
|
||||||
|
|
||||||
- Care should be taken when setting the guest_halt_poll_ns parameter as a
|
- Care should be taken when setting the guest_halt_poll_ns parameter as a
|
||||||
large value has the potential to drive the cpu usage to 100% on a machine which
|
large value has the potential to drive the cpu usage to 100% on a machine
|
||||||
would be almost entirely idle otherwise.
|
which would be almost entirely idle otherwise.
|
@ -8,7 +8,9 @@ Linux Virtualization Support
|
|||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
kvm/index
|
kvm/index
|
||||||
|
uml/user_mode_linux
|
||||||
paravirt_ops
|
paravirt_ops
|
||||||
|
guest-halt-polling
|
||||||
|
|
||||||
.. only:: html and subproject
|
.. only:: html and subproject
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,8 @@
|
|||||||
* Internal ABI between the kernel and HYP
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
=======================================
|
||||||
|
Internal ABI between the kernel and HYP
|
||||||
|
=======================================
|
||||||
|
|
||||||
This file documents the interaction between the Linux kernel and the
|
This file documents the interaction between the Linux kernel and the
|
||||||
hypervisor layer when running Linux as a hypervisor (for example
|
hypervisor layer when running Linux as a hypervisor (for example
|
||||||
@ -19,25 +23,31 @@ and only act on individual CPUs.
|
|||||||
Unless specified otherwise, any built-in hypervisor must implement
|
Unless specified otherwise, any built-in hypervisor must implement
|
||||||
these functions (see arch/arm{,64}/include/asm/virt.h):
|
these functions (see arch/arm{,64}/include/asm/virt.h):
|
||||||
|
|
||||||
* r0/x0 = HVC_SET_VECTORS
|
* ::
|
||||||
r1/x1 = vectors
|
|
||||||
|
r0/x0 = HVC_SET_VECTORS
|
||||||
|
r1/x1 = vectors
|
||||||
|
|
||||||
Set HVBAR/VBAR_EL2 to 'vectors' to enable a hypervisor. 'vectors'
|
Set HVBAR/VBAR_EL2 to 'vectors' to enable a hypervisor. 'vectors'
|
||||||
must be a physical address, and respect the alignment requirements
|
must be a physical address, and respect the alignment requirements
|
||||||
of the architecture. Only implemented by the initial stubs, not by
|
of the architecture. Only implemented by the initial stubs, not by
|
||||||
Linux hypervisors.
|
Linux hypervisors.
|
||||||
|
|
||||||
* r0/x0 = HVC_RESET_VECTORS
|
* ::
|
||||||
|
|
||||||
|
r0/x0 = HVC_RESET_VECTORS
|
||||||
|
|
||||||
Turn HYP/EL2 MMU off, and reset HVBAR/VBAR_EL2 to the initials
|
Turn HYP/EL2 MMU off, and reset HVBAR/VBAR_EL2 to the initials
|
||||||
stubs' exception vector value. This effectively disables an existing
|
stubs' exception vector value. This effectively disables an existing
|
||||||
hypervisor.
|
hypervisor.
|
||||||
|
|
||||||
* r0/x0 = HVC_SOFT_RESTART
|
* ::
|
||||||
r1/x1 = restart address
|
|
||||||
x2 = x0's value when entering the next payload (arm64)
|
r0/x0 = HVC_SOFT_RESTART
|
||||||
x3 = x1's value when entering the next payload (arm64)
|
r1/x1 = restart address
|
||||||
x4 = x2's value when entering the next payload (arm64)
|
x2 = x0's value when entering the next payload (arm64)
|
||||||
|
x3 = x1's value when entering the next payload (arm64)
|
||||||
|
x4 = x2's value when entering the next payload (arm64)
|
||||||
|
|
||||||
Mask all exceptions, disable the MMU, move the arguments into place
|
Mask all exceptions, disable the MMU, move the arguments into place
|
||||||
(arm64 only), and jump to the restart address while at HYP/EL2. This
|
(arm64 only), and jump to the restart address while at HYP/EL2. This
|
12
Documentation/virt/kvm/arm/index.rst
Normal file
12
Documentation/virt/kvm/arm/index.rst
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
===
|
||||||
|
ARM
|
||||||
|
===
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
hyp-abi
|
||||||
|
psci
|
||||||
|
pvtime
|
@ -1,3 +1,9 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
=========================================
|
||||||
|
Power State Coordination Interface (PSCI)
|
||||||
|
=========================================
|
||||||
|
|
||||||
KVM implements the PSCI (Power State Coordination Interface)
|
KVM implements the PSCI (Power State Coordination Interface)
|
||||||
specification in order to provide services such as CPU on/off, reset
|
specification in order to provide services such as CPU on/off, reset
|
||||||
and power-off to the guest.
|
and power-off to the guest.
|
||||||
@ -30,32 +36,42 @@ The following register is defined:
|
|||||||
- Affects the whole VM (even if the register view is per-vcpu)
|
- Affects the whole VM (even if the register view is per-vcpu)
|
||||||
|
|
||||||
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
|
||||||
Holds the state of the firmware support to mitigate CVE-2017-5715, as
|
Holds the state of the firmware support to mitigate CVE-2017-5715, as
|
||||||
offered by KVM to the guest via a HVC call. The workaround is described
|
offered by KVM to the guest via a HVC call. The workaround is described
|
||||||
under SMCCC_ARCH_WORKAROUND_1 in [1].
|
under SMCCC_ARCH_WORKAROUND_1 in [1].
|
||||||
|
|
||||||
Accepted values are:
|
Accepted values are:
|
||||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer
|
|
||||||
|
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL:
|
||||||
|
KVM does not offer
|
||||||
firmware support for the workaround. The mitigation status for the
|
firmware support for the workaround. The mitigation status for the
|
||||||
guest is unknown.
|
guest is unknown.
|
||||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is
|
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL:
|
||||||
|
The workaround HVC call is
|
||||||
available to the guest and required for the mitigation.
|
available to the guest and required for the mitigation.
|
||||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call
|
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED:
|
||||||
|
The workaround HVC call
|
||||||
is available to the guest, but it is not needed on this VCPU.
|
is available to the guest, but it is not needed on this VCPU.
|
||||||
|
|
||||||
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
|
||||||
Holds the state of the firmware support to mitigate CVE-2018-3639, as
|
Holds the state of the firmware support to mitigate CVE-2018-3639, as
|
||||||
offered by KVM to the guest via a HVC call. The workaround is described
|
offered by KVM to the guest via a HVC call. The workaround is described
|
||||||
under SMCCC_ARCH_WORKAROUND_2 in [1].
|
under SMCCC_ARCH_WORKAROUND_2 in [1]_.
|
||||||
|
|
||||||
Accepted values are:
|
Accepted values are:
|
||||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not
|
|
||||||
|
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
|
||||||
|
A workaround is not
|
||||||
available. KVM does not offer firmware support for the workaround.
|
available. KVM does not offer firmware support for the workaround.
|
||||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is
|
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
|
||||||
|
The workaround state is
|
||||||
unknown. KVM does not offer firmware support for the workaround.
|
unknown. KVM does not offer firmware support for the workaround.
|
||||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available,
|
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
|
||||||
|
The workaround is available,
|
||||||
and can be disabled by a vCPU. If
|
and can be disabled by a vCPU. If
|
||||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
|
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
|
||||||
this vCPU.
|
this vCPU.
|
||||||
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is
|
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
|
||||||
always active on this vCPU or it is not needed.
|
The workaround is always active on this vCPU or it is not needed.
|
||||||
|
|
||||||
[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
|
.. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
|
@ -1,3 +1,6 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
===============================================
|
||||||
ARM Virtual Interrupt Translation Service (ITS)
|
ARM Virtual Interrupt Translation Service (ITS)
|
||||||
===============================================
|
===============================================
|
||||||
|
|
||||||
@ -12,22 +15,32 @@ There can be multiple ITS controllers per guest, each of them has to have
|
|||||||
a separate, non-overlapping MMIO region.
|
a separate, non-overlapping MMIO region.
|
||||||
|
|
||||||
|
|
||||||
Groups:
|
Groups
|
||||||
KVM_DEV_ARM_VGIC_GRP_ADDR
|
======
|
||||||
|
|
||||||
|
KVM_DEV_ARM_VGIC_GRP_ADDR
|
||||||
|
-------------------------
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
KVM_VGIC_ITS_ADDR_TYPE (rw, 64-bit)
|
KVM_VGIC_ITS_ADDR_TYPE (rw, 64-bit)
|
||||||
Base address in the guest physical address space of the GICv3 ITS
|
Base address in the guest physical address space of the GICv3 ITS
|
||||||
control register frame.
|
control register frame.
|
||||||
This address needs to be 64K aligned and the region covers 128K.
|
This address needs to be 64K aligned and the region covers 128K.
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-E2BIG: Address outside of addressable IPA range
|
|
||||||
-EINVAL: Incorrectly aligned address
|
======= =================================================
|
||||||
-EEXIST: Address already configured
|
-E2BIG Address outside of addressable IPA range
|
||||||
-EFAULT: Invalid user pointer for attr->addr.
|
-EINVAL Incorrectly aligned address
|
||||||
-ENODEV: Incorrect attribute or the ITS is not supported.
|
-EEXIST Address already configured
|
||||||
|
-EFAULT Invalid user pointer for attr->addr.
|
||||||
|
-ENODEV Incorrect attribute or the ITS is not supported.
|
||||||
|
======= =================================================
|
||||||
|
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||||
|
-------------------------
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
KVM_DEV_ARM_VGIC_CTRL_INIT
|
KVM_DEV_ARM_VGIC_CTRL_INIT
|
||||||
request the initialization of the ITS, no additional parameter in
|
request the initialization of the ITS, no additional parameter in
|
||||||
@ -58,16 +71,21 @@ Groups:
|
|||||||
"ITS Restore Sequence".
|
"ITS Restore Sequence".
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-ENXIO: ITS not properly configured as required prior to setting
|
|
||||||
this attribute
|
|
||||||
-ENOMEM: Memory shortage when allocating ITS internal data
|
|
||||||
-EINVAL: Inconsistent restored data
|
|
||||||
-EFAULT: Invalid guest ram access
|
|
||||||
-EBUSY: One or more VCPUS are running
|
|
||||||
-EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
|
|
||||||
state is not available
|
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_GRP_ITS_REGS
|
======= ==========================================================
|
||||||
|
-ENXIO ITS not properly configured as required prior to setting
|
||||||
|
this attribute
|
||||||
|
-ENOMEM Memory shortage when allocating ITS internal data
|
||||||
|
-EINVAL Inconsistent restored data
|
||||||
|
-EFAULT Invalid guest ram access
|
||||||
|
-EBUSY One or more VCPUS are running
|
||||||
|
-EACCES The virtual ITS is backed by a physical GICv4 ITS, and the
|
||||||
|
state is not available
|
||||||
|
======= ==========================================================
|
||||||
|
|
||||||
|
KVM_DEV_ARM_VGIC_GRP_ITS_REGS
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
The attr field of kvm_device_attr encodes the offset of the
|
The attr field of kvm_device_attr encodes the offset of the
|
||||||
ITS register, relative to the ITS control frame base address
|
ITS register, relative to the ITS control frame base address
|
||||||
@ -78,6 +96,7 @@ Groups:
|
|||||||
be accessed with full length.
|
be accessed with full length.
|
||||||
|
|
||||||
Writes to read-only registers are ignored by the kernel except for:
|
Writes to read-only registers are ignored by the kernel except for:
|
||||||
|
|
||||||
- GITS_CREADR. It must be restored otherwise commands in the queue
|
- GITS_CREADR. It must be restored otherwise commands in the queue
|
||||||
will be re-executed after restoring CWRITER. GITS_CREADR must be
|
will be re-executed after restoring CWRITER. GITS_CREADR must be
|
||||||
restored before restoring the GITS_CTLR which is likely to enable the
|
restored before restoring the GITS_CTLR which is likely to enable the
|
||||||
@ -91,30 +110,36 @@ Groups:
|
|||||||
|
|
||||||
For other registers, getting or setting a register has the same
|
For other registers, getting or setting a register has the same
|
||||||
effect as reading/writing the register on real hardware.
|
effect as reading/writing the register on real hardware.
|
||||||
Errors:
|
|
||||||
-ENXIO: Offset does not correspond to any supported register
|
|
||||||
-EFAULT: Invalid user pointer for attr->addr
|
|
||||||
-EINVAL: Offset is not 64-bit aligned
|
|
||||||
-EBUSY: one or more VCPUS are running
|
|
||||||
|
|
||||||
ITS Restore Sequence:
|
Errors:
|
||||||
-------------------------
|
|
||||||
|
======= ====================================================
|
||||||
|
-ENXIO Offset does not correspond to any supported register
|
||||||
|
-EFAULT Invalid user pointer for attr->addr
|
||||||
|
-EINVAL Offset is not 64-bit aligned
|
||||||
|
-EBUSY one or more VCPUS are running
|
||||||
|
======= ====================================================
|
||||||
|
|
||||||
|
ITS Restore Sequence:
|
||||||
|
---------------------
|
||||||
|
|
||||||
The following ordering must be followed when restoring the GIC and the ITS:
|
The following ordering must be followed when restoring the GIC and the ITS:
|
||||||
|
|
||||||
a) restore all guest memory and create vcpus
|
a) restore all guest memory and create vcpus
|
||||||
b) restore all redistributors
|
b) restore all redistributors
|
||||||
c) provide the ITS base address
|
c) provide the ITS base address
|
||||||
(KVM_DEV_ARM_VGIC_GRP_ADDR)
|
(KVM_DEV_ARM_VGIC_GRP_ADDR)
|
||||||
d) restore the ITS in the following order:
|
d) restore the ITS in the following order:
|
||||||
1. Restore GITS_CBASER
|
|
||||||
2. Restore all other GITS_ registers, except GITS_CTLR!
|
1. Restore GITS_CBASER
|
||||||
3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
|
2. Restore all other ``GITS_`` registers, except GITS_CTLR!
|
||||||
4. Restore GITS_CTLR
|
3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
|
||||||
|
4. Restore GITS_CTLR
|
||||||
|
|
||||||
Then vcpus can be started.
|
Then vcpus can be started.
|
||||||
|
|
||||||
ITS Table ABI REV0:
|
ITS Table ABI REV0:
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
Revision 0 of the ABI only supports the features of a virtual GICv3, and does
|
Revision 0 of the ABI only supports the features of a virtual GICv3, and does
|
||||||
not support a virtual GICv4 with support for direct injection of virtual
|
not support a virtual GICv4 with support for direct injection of virtual
|
||||||
@ -125,12 +150,13 @@ Then vcpus can be started.
|
|||||||
entries in the collection are listed in no particular order.
|
entries in the collection are listed in no particular order.
|
||||||
All entries are 8 bytes.
|
All entries are 8 bytes.
|
||||||
|
|
||||||
Device Table Entry (DTE):
|
Device Table Entry (DTE)::
|
||||||
|
|
||||||
bits: | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
|
bits: | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
|
||||||
values: | V | next | ITT_addr | Size |
|
values: | V | next | ITT_addr | Size |
|
||||||
|
|
||||||
|
where:
|
||||||
|
|
||||||
where;
|
|
||||||
- V indicates whether the entry is valid. If not, other fields
|
- V indicates whether the entry is valid. If not, other fields
|
||||||
are not meaningful.
|
are not meaningful.
|
||||||
- next: equals to 0 if this entry is the last one; otherwise it
|
- next: equals to 0 if this entry is the last one; otherwise it
|
||||||
@ -140,32 +166,34 @@ Then vcpus can be started.
|
|||||||
- Size specifies the supported number of bits for the EventID,
|
- Size specifies the supported number of bits for the EventID,
|
||||||
minus one
|
minus one
|
||||||
|
|
||||||
Collection Table Entry (CTE):
|
Collection Table Entry (CTE)::
|
||||||
|
|
||||||
bits: | 63| 62 .. 52 | 51 ... 16 | 15 ... 0 |
|
bits: | 63| 62 .. 52 | 51 ... 16 | 15 ... 0 |
|
||||||
values: | V | RES0 | RDBase | ICID |
|
values: | V | RES0 | RDBase | ICID |
|
||||||
|
|
||||||
where:
|
where:
|
||||||
|
|
||||||
- V indicates whether the entry is valid. If not, other fields are
|
- V indicates whether the entry is valid. If not, other fields are
|
||||||
not meaningful.
|
not meaningful.
|
||||||
- RES0: reserved field with Should-Be-Zero-or-Preserved behavior.
|
- RES0: reserved field with Should-Be-Zero-or-Preserved behavior.
|
||||||
- RDBase is the PE number (GICR_TYPER.Processor_Number semantic),
|
- RDBase is the PE number (GICR_TYPER.Processor_Number semantic),
|
||||||
- ICID is the collection ID
|
- ICID is the collection ID
|
||||||
|
|
||||||
Interrupt Translation Entry (ITE):
|
Interrupt Translation Entry (ITE)::
|
||||||
|
|
||||||
bits: | 63 ... 48 | 47 ... 16 | 15 ... 0 |
|
bits: | 63 ... 48 | 47 ... 16 | 15 ... 0 |
|
||||||
values: | next | pINTID | ICID |
|
values: | next | pINTID | ICID |
|
||||||
|
|
||||||
where:
|
where:
|
||||||
|
|
||||||
- next: equals to 0 if this entry is the last one; otherwise it corresponds
|
- next: equals to 0 if this entry is the last one; otherwise it corresponds
|
||||||
to the EventID offset to the next ITE capped by 2^16 -1.
|
to the EventID offset to the next ITE capped by 2^16 -1.
|
||||||
- pINTID is the physical LPI ID; if zero, it means the entry is not valid
|
- pINTID is the physical LPI ID; if zero, it means the entry is not valid
|
||||||
and other fields are not meaningful.
|
and other fields are not meaningful.
|
||||||
- ICID is the collection ID
|
- ICID is the collection ID
|
||||||
|
|
||||||
ITS Reset State:
|
ITS Reset State:
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
RESET returns the ITS to the same state that it was when first created and
|
RESET returns the ITS to the same state that it was when first created and
|
||||||
initialized. When the RESET command returns, the following things are
|
initialized. When the RESET command returns, the following things are
|
@ -1,9 +1,12 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
==============================================================
|
||||||
ARM Virtual Generic Interrupt Controller v3 and later (VGICv3)
|
ARM Virtual Generic Interrupt Controller v3 and later (VGICv3)
|
||||||
==============================================================
|
==============================================================
|
||||||
|
|
||||||
|
|
||||||
Device types supported:
|
Device types supported:
|
||||||
KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0
|
- KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0
|
||||||
|
|
||||||
Only one VGIC instance may be instantiated through this API. The created VGIC
|
Only one VGIC instance may be instantiated through this API. The created VGIC
|
||||||
will act as the VM interrupt controller, requiring emulated user-space devices
|
will act as the VM interrupt controller, requiring emulated user-space devices
|
||||||
@ -15,7 +18,8 @@ Creating a guest GICv3 device requires a host GICv3 as well.
|
|||||||
|
|
||||||
Groups:
|
Groups:
|
||||||
KVM_DEV_ARM_VGIC_GRP_ADDR
|
KVM_DEV_ARM_VGIC_GRP_ADDR
|
||||||
Attributes:
|
Attributes:
|
||||||
|
|
||||||
KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
|
KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
|
||||||
Base address in the guest physical address space of the GICv3 distributor
|
Base address in the guest physical address space of the GICv3 distributor
|
||||||
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
|
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
|
||||||
@ -29,21 +33,25 @@ Groups:
|
|||||||
This address needs to be 64K aligned.
|
This address needs to be 64K aligned.
|
||||||
|
|
||||||
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit)
|
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit)
|
||||||
The attribute data pointed to by kvm_device_attr.addr is a __u64 value:
|
The attribute data pointed to by kvm_device_attr.addr is a __u64 value::
|
||||||
bits: | 63 .... 52 | 51 .... 16 | 15 - 12 |11 - 0
|
|
||||||
values: | count | base | flags | index
|
bits: | 63 .... 52 | 51 .... 16 | 15 - 12 |11 - 0
|
||||||
|
values: | count | base | flags | index
|
||||||
|
|
||||||
- index encodes the unique redistributor region index
|
- index encodes the unique redistributor region index
|
||||||
- flags: reserved for future use, currently 0
|
- flags: reserved for future use, currently 0
|
||||||
- base field encodes bits [51:16] of the guest physical base address
|
- base field encodes bits [51:16] of the guest physical base address
|
||||||
of the first redistributor in the region.
|
of the first redistributor in the region.
|
||||||
- count encodes the number of redistributors in the region. Must be
|
- count encodes the number of redistributors in the region. Must be
|
||||||
greater than 0.
|
greater than 0.
|
||||||
|
|
||||||
There are two 64K pages for each redistributor in the region and
|
There are two 64K pages for each redistributor in the region and
|
||||||
redistributors are laid out contiguously within the region. Regions
|
redistributors are laid out contiguously within the region. Regions
|
||||||
are filled with redistributors in the index order. The sum of all
|
are filled with redistributors in the index order. The sum of all
|
||||||
region count fields must be greater than or equal to the number of
|
region count fields must be greater than or equal to the number of
|
||||||
VCPUs. Redistributor regions must be registered in the incremental
|
VCPUs. Redistributor regions must be registered in the incremental
|
||||||
index order, starting from index 0.
|
index order, starting from index 0.
|
||||||
|
|
||||||
The characteristics of a specific redistributor region can be read
|
The characteristics of a specific redistributor region can be read
|
||||||
by presetting the index field in the attr data.
|
by presetting the index field in the attr data.
|
||||||
Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
|
Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
|
||||||
@ -52,23 +60,27 @@ Groups:
|
|||||||
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes.
|
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes.
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-E2BIG: Address outside of addressable IPA range
|
|
||||||
-EINVAL: Incorrectly aligned address, bad redistributor region
|
======= =============================================================
|
||||||
|
-E2BIG Address outside of addressable IPA range
|
||||||
|
-EINVAL Incorrectly aligned address, bad redistributor region
|
||||||
count/index, mixed redistributor region attribute usage
|
count/index, mixed redistributor region attribute usage
|
||||||
-EEXIST: Address already configured
|
-EEXIST Address already configured
|
||||||
-ENOENT: Attempt to read the characteristics of a non existing
|
-ENOENT Attempt to read the characteristics of a non existing
|
||||||
redistributor region
|
redistributor region
|
||||||
-ENXIO: The group or attribute is unknown/unsupported for this device
|
-ENXIO The group or attribute is unknown/unsupported for this device
|
||||||
or hardware support is missing.
|
or hardware support is missing.
|
||||||
-EFAULT: Invalid user pointer for attr->addr.
|
-EFAULT Invalid user pointer for attr->addr.
|
||||||
|
======= =============================================================
|
||||||
|
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS
|
KVM_DEV_ARM_VGIC_GRP_DIST_REGS, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
|
||||||
KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
|
Attributes:
|
||||||
Attributes:
|
|
||||||
The attr field of kvm_device_attr encodes two values:
|
The attr field of kvm_device_attr encodes two values::
|
||||||
bits: | 63 .... 32 | 31 .... 0 |
|
|
||||||
values: | mpidr | offset |
|
bits: | 63 .... 32 | 31 .... 0 |
|
||||||
|
values: | mpidr | offset |
|
||||||
|
|
||||||
All distributor regs are (rw, 32-bit) and kvm_device_attr.addr points to a
|
All distributor regs are (rw, 32-bit) and kvm_device_attr.addr points to a
|
||||||
__u32 value. 64-bit registers must be accessed by separately accessing the
|
__u32 value. 64-bit registers must be accessed by separately accessing the
|
||||||
@ -93,7 +105,8 @@ Groups:
|
|||||||
redistributor is accessed. The mpidr is ignored for the distributor.
|
redistributor is accessed. The mpidr is ignored for the distributor.
|
||||||
|
|
||||||
The mpidr encoding is based on the affinity information in the
|
The mpidr encoding is based on the affinity information in the
|
||||||
architecture defined MPIDR, and the field is encoded as follows:
|
architecture defined MPIDR, and the field is encoded as follows::
|
||||||
|
|
||||||
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
||||||
| Aff3 | Aff2 | Aff1 | Aff0 |
|
| Aff3 | Aff2 | Aff1 | Aff0 |
|
||||||
|
|
||||||
@ -148,24 +161,30 @@ Groups:
|
|||||||
ignored.
|
ignored.
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-ENXIO: Getting or setting this register is not yet supported
|
|
||||||
-EBUSY: One or more VCPUs are running
|
====== =====================================================
|
||||||
|
-ENXIO Getting or setting this register is not yet supported
|
||||||
|
-EBUSY One or more VCPUs are running
|
||||||
|
====== =====================================================
|
||||||
|
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS
|
KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS
|
||||||
Attributes:
|
Attributes:
|
||||||
The attr field of kvm_device_attr encodes two values:
|
|
||||||
bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 |
|
The attr field of kvm_device_attr encodes two values::
|
||||||
values: | mpidr | RES | instr |
|
|
||||||
|
bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 |
|
||||||
|
values: | mpidr | RES | instr |
|
||||||
|
|
||||||
The mpidr field encodes the CPU ID based on the affinity information in the
|
The mpidr field encodes the CPU ID based on the affinity information in the
|
||||||
architecture defined MPIDR, and the field is encoded as follows:
|
architecture defined MPIDR, and the field is encoded as follows::
|
||||||
|
|
||||||
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
||||||
| Aff3 | Aff2 | Aff1 | Aff0 |
|
| Aff3 | Aff2 | Aff1 | Aff0 |
|
||||||
|
|
||||||
The instr field encodes the system register to access based on the fields
|
The instr field encodes the system register to access based on the fields
|
||||||
defined in the A64 instruction set encoding for system register access
|
defined in the A64 instruction set encoding for system register access
|
||||||
(RES means the bits are reserved for future use and should be zero):
|
(RES means the bits are reserved for future use and should be zero)::
|
||||||
|
|
||||||
| 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 |
|
| 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 |
|
||||||
| Op 0 | Op1 | CRn | CRm | Op2 |
|
| Op 0 | Op1 | CRn | CRm | Op2 |
|
||||||
@ -178,26 +197,35 @@ Groups:
|
|||||||
|
|
||||||
CPU interface registers access is not implemented for AArch32 mode.
|
CPU interface registers access is not implemented for AArch32 mode.
|
||||||
Error -ENXIO is returned when accessed in AArch32 mode.
|
Error -ENXIO is returned when accessed in AArch32 mode.
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-ENXIO: Getting or setting this register is not yet supported
|
|
||||||
-EBUSY: VCPU is running
|
======= =====================================================
|
||||||
-EINVAL: Invalid mpidr or register value supplied
|
-ENXIO Getting or setting this register is not yet supported
|
||||||
|
-EBUSY VCPU is running
|
||||||
|
-EINVAL Invalid mpidr or register value supplied
|
||||||
|
======= =====================================================
|
||||||
|
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_GRP_NR_IRQS
|
KVM_DEV_ARM_VGIC_GRP_NR_IRQS
|
||||||
Attributes:
|
Attributes:
|
||||||
|
|
||||||
A value describing the number of interrupts (SGI, PPI and SPI) for
|
A value describing the number of interrupts (SGI, PPI and SPI) for
|
||||||
this GIC instance, ranging from 64 to 1024, in increments of 32.
|
this GIC instance, ranging from 64 to 1024, in increments of 32.
|
||||||
|
|
||||||
kvm_device_attr.addr points to a __u32 value.
|
kvm_device_attr.addr points to a __u32 value.
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-EINVAL: Value set is out of the expected range
|
|
||||||
-EBUSY: Value has already be set.
|
======= ======================================
|
||||||
|
-EINVAL Value set is out of the expected range
|
||||||
|
-EBUSY Value has already be set.
|
||||||
|
======= ======================================
|
||||||
|
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||||
Attributes:
|
Attributes:
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_CTRL_INIT
|
KVM_DEV_ARM_VGIC_CTRL_INIT
|
||||||
request the initialization of the VGIC, no additional parameter in
|
request the initialization of the VGIC, no additional parameter in
|
||||||
kvm_device_attr.addr.
|
kvm_device_attr.addr.
|
||||||
@ -205,20 +233,26 @@ Groups:
|
|||||||
save all LPI pending bits into guest RAM pending tables.
|
save all LPI pending bits into guest RAM pending tables.
|
||||||
|
|
||||||
The first kB of the pending table is not altered by this operation.
|
The first kB of the pending table is not altered by this operation.
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-ENXIO: VGIC not properly configured as required prior to calling
|
|
||||||
this attribute
|
======= ========================================================
|
||||||
-ENODEV: no online VCPU
|
-ENXIO VGIC not properly configured as required prior to calling
|
||||||
-ENOMEM: memory shortage when allocating vgic internal data
|
this attribute
|
||||||
-EFAULT: Invalid guest ram access
|
-ENODEV no online VCPU
|
||||||
-EBUSY: One or more VCPUS are running
|
-ENOMEM memory shortage when allocating vgic internal data
|
||||||
|
-EFAULT Invalid guest ram access
|
||||||
|
-EBUSY One or more VCPUS are running
|
||||||
|
======= ========================================================
|
||||||
|
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
|
KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
|
||||||
Attributes:
|
Attributes:
|
||||||
The attr field of kvm_device_attr encodes the following values:
|
|
||||||
bits: | 63 .... 32 | 31 .... 10 | 9 .... 0 |
|
The attr field of kvm_device_attr encodes the following values::
|
||||||
values: | mpidr | info | vINTID |
|
|
||||||
|
bits: | 63 .... 32 | 31 .... 10 | 9 .... 0 |
|
||||||
|
values: | mpidr | info | vINTID |
|
||||||
|
|
||||||
The vINTID specifies which set of IRQs is reported on.
|
The vINTID specifies which set of IRQs is reported on.
|
||||||
|
|
||||||
@ -228,6 +262,7 @@ Groups:
|
|||||||
VGIC_LEVEL_INFO_LINE_LEVEL:
|
VGIC_LEVEL_INFO_LINE_LEVEL:
|
||||||
Get/Set the input level of the IRQ line for a set of 32 contiguously
|
Get/Set the input level of the IRQ line for a set of 32 contiguously
|
||||||
numbered interrupts.
|
numbered interrupts.
|
||||||
|
|
||||||
vINTID must be a multiple of 32.
|
vINTID must be a multiple of 32.
|
||||||
|
|
||||||
kvm_device_attr.addr points to a __u32 value which will contain a
|
kvm_device_attr.addr points to a __u32 value which will contain a
|
||||||
@ -243,9 +278,14 @@ Groups:
|
|||||||
reported with the same value regardless of the mpidr specified.
|
reported with the same value regardless of the mpidr specified.
|
||||||
|
|
||||||
The mpidr field encodes the CPU ID based on the affinity information in the
|
The mpidr field encodes the CPU ID based on the affinity information in the
|
||||||
architecture defined MPIDR, and the field is encoded as follows:
|
architecture defined MPIDR, and the field is encoded as follows::
|
||||||
|
|
||||||
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
| 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
|
||||||
| Aff3 | Aff2 | Aff1 | Aff0 |
|
| Aff3 | Aff2 | Aff1 | Aff0 |
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-EINVAL: vINTID is not multiple of 32 or
|
|
||||||
info field is not VGIC_LEVEL_INFO_LINE_LEVEL
|
======= =============================================
|
||||||
|
-EINVAL vINTID is not multiple of 32 or info field is
|
||||||
|
not VGIC_LEVEL_INFO_LINE_LEVEL
|
||||||
|
======= =============================================
|
@ -1,8 +1,12 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
==================================================
|
||||||
ARM Virtual Generic Interrupt Controller v2 (VGIC)
|
ARM Virtual Generic Interrupt Controller v2 (VGIC)
|
||||||
==================================================
|
==================================================
|
||||||
|
|
||||||
Device types supported:
|
Device types supported:
|
||||||
KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
|
|
||||||
|
- KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
|
||||||
|
|
||||||
Only one VGIC instance may be instantiated through either this API or the
|
Only one VGIC instance may be instantiated through either this API or the
|
||||||
legacy KVM_CREATE_IRQCHIP API. The created VGIC will act as the VM interrupt
|
legacy KVM_CREATE_IRQCHIP API. The created VGIC will act as the VM interrupt
|
||||||
@ -17,7 +21,8 @@ create both a GICv3 and GICv2 device on the same VM.
|
|||||||
|
|
||||||
Groups:
|
Groups:
|
||||||
KVM_DEV_ARM_VGIC_GRP_ADDR
|
KVM_DEV_ARM_VGIC_GRP_ADDR
|
||||||
Attributes:
|
Attributes:
|
||||||
|
|
||||||
KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
|
KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
|
||||||
Base address in the guest physical address space of the GIC distributor
|
Base address in the guest physical address space of the GIC distributor
|
||||||
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
|
register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||||
@ -27,19 +32,25 @@ Groups:
|
|||||||
Base address in the guest physical address space of the GIC virtual cpu
|
Base address in the guest physical address space of the GIC virtual cpu
|
||||||
interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
|
interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
|
||||||
This address needs to be 4K aligned and the region covers 4 KByte.
|
This address needs to be 4K aligned and the region covers 4 KByte.
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-E2BIG: Address outside of addressable IPA range
|
|
||||||
-EINVAL: Incorrectly aligned address
|
======= =============================================================
|
||||||
-EEXIST: Address already configured
|
-E2BIG Address outside of addressable IPA range
|
||||||
-ENXIO: The group or attribute is unknown/unsupported for this device
|
-EINVAL Incorrectly aligned address
|
||||||
|
-EEXIST Address already configured
|
||||||
|
-ENXIO The group or attribute is unknown/unsupported for this device
|
||||||
or hardware support is missing.
|
or hardware support is missing.
|
||||||
-EFAULT: Invalid user pointer for attr->addr.
|
-EFAULT Invalid user pointer for attr->addr.
|
||||||
|
======= =============================================================
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS
|
KVM_DEV_ARM_VGIC_GRP_DIST_REGS
|
||||||
Attributes:
|
Attributes:
|
||||||
The attr field of kvm_device_attr encodes two values:
|
|
||||||
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
The attr field of kvm_device_attr encodes two values::
|
||||||
values: | reserved | vcpu_index | offset |
|
|
||||||
|
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
||||||
|
values: | reserved | vcpu_index | offset |
|
||||||
|
|
||||||
All distributor regs are (rw, 32-bit)
|
All distributor regs are (rw, 32-bit)
|
||||||
|
|
||||||
@ -58,16 +69,22 @@ Groups:
|
|||||||
KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
|
KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
|
||||||
the expected behavior. Unless GICD_IIDR has been set from userspace, writes
|
the expected behavior. Unless GICD_IIDR has been set from userspace, writes
|
||||||
to the interrupt group registers (GICD_IGROUPR) are ignored.
|
to the interrupt group registers (GICD_IGROUPR) are ignored.
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-ENXIO: Getting or setting this register is not yet supported
|
|
||||||
-EBUSY: One or more VCPUs are running
|
======= =====================================================
|
||||||
-EINVAL: Invalid vcpu_index supplied
|
-ENXIO Getting or setting this register is not yet supported
|
||||||
|
-EBUSY One or more VCPUs are running
|
||||||
|
-EINVAL Invalid vcpu_index supplied
|
||||||
|
======= =====================================================
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_GRP_CPU_REGS
|
KVM_DEV_ARM_VGIC_GRP_CPU_REGS
|
||||||
Attributes:
|
Attributes:
|
||||||
The attr field of kvm_device_attr encodes two values:
|
|
||||||
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
The attr field of kvm_device_attr encodes two values::
|
||||||
values: | reserved | vcpu_index | offset |
|
|
||||||
|
bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
|
||||||
|
values: | reserved | vcpu_index | offset |
|
||||||
|
|
||||||
All CPU interface regs are (rw, 32-bit)
|
All CPU interface regs are (rw, 32-bit)
|
||||||
|
|
||||||
@ -101,27 +118,39 @@ Groups:
|
|||||||
value left by 3 places to obtain the actual priority mask level.
|
value left by 3 places to obtain the actual priority mask level.
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-ENXIO: Getting or setting this register is not yet supported
|
|
||||||
-EBUSY: One or more VCPUs are running
|
======= =====================================================
|
||||||
-EINVAL: Invalid vcpu_index supplied
|
-ENXIO Getting or setting this register is not yet supported
|
||||||
|
-EBUSY One or more VCPUs are running
|
||||||
|
-EINVAL Invalid vcpu_index supplied
|
||||||
|
======= =====================================================
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_GRP_NR_IRQS
|
KVM_DEV_ARM_VGIC_GRP_NR_IRQS
|
||||||
Attributes:
|
Attributes:
|
||||||
|
|
||||||
A value describing the number of interrupts (SGI, PPI and SPI) for
|
A value describing the number of interrupts (SGI, PPI and SPI) for
|
||||||
this GIC instance, ranging from 64 to 1024, in increments of 32.
|
this GIC instance, ranging from 64 to 1024, in increments of 32.
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-EINVAL: Value set is out of the expected range
|
|
||||||
-EBUSY: Value has already be set, or GIC has already been initialized
|
======= =============================================================
|
||||||
with default values.
|
-EINVAL Value set is out of the expected range
|
||||||
|
-EBUSY Value has already be set, or GIC has already been initialized
|
||||||
|
with default values.
|
||||||
|
======= =============================================================
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_GRP_CTRL
|
KVM_DEV_ARM_VGIC_GRP_CTRL
|
||||||
Attributes:
|
Attributes:
|
||||||
|
|
||||||
KVM_DEV_ARM_VGIC_CTRL_INIT
|
KVM_DEV_ARM_VGIC_CTRL_INIT
|
||||||
request the initialization of the VGIC or ITS, no additional parameter
|
request the initialization of the VGIC or ITS, no additional parameter
|
||||||
in kvm_device_attr.addr.
|
in kvm_device_attr.addr.
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-ENXIO: VGIC not properly configured as required prior to calling
|
|
||||||
this attribute
|
======= =========================================================
|
||||||
-ENODEV: no online VCPU
|
-ENXIO VGIC not properly configured as required prior to calling
|
||||||
-ENOMEM: memory shortage when allocating vgic internal data
|
this attribute
|
||||||
|
-ENODEV no online VCPU
|
||||||
|
-ENOMEM memory shortage when allocating vgic internal data
|
||||||
|
======= =========================================================
|
19
Documentation/virt/kvm/devices/index.rst
Normal file
19
Documentation/virt/kvm/devices/index.rst
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
=======
|
||||||
|
Devices
|
||||||
|
=======
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
arm-vgic-its
|
||||||
|
arm-vgic
|
||||||
|
arm-vgic-v3
|
||||||
|
mpic
|
||||||
|
s390_flic
|
||||||
|
vcpu
|
||||||
|
vfio
|
||||||
|
vm
|
||||||
|
xics
|
||||||
|
xive
|
@ -1,9 +1,13 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
=========================
|
||||||
MPIC interrupt controller
|
MPIC interrupt controller
|
||||||
=========================
|
=========================
|
||||||
|
|
||||||
Device types supported:
|
Device types supported:
|
||||||
KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
|
|
||||||
KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2
|
- KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
|
||||||
|
- KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2
|
||||||
|
|
||||||
Only one MPIC instance, of any type, may be instantiated. The created
|
Only one MPIC instance, of any type, may be instantiated. The created
|
||||||
MPIC will act as the system interrupt controller, connecting to each
|
MPIC will act as the system interrupt controller, connecting to each
|
||||||
@ -11,7 +15,8 @@ vcpu's interrupt inputs.
|
|||||||
|
|
||||||
Groups:
|
Groups:
|
||||||
KVM_DEV_MPIC_GRP_MISC
|
KVM_DEV_MPIC_GRP_MISC
|
||||||
Attributes:
|
Attributes:
|
||||||
|
|
||||||
KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
|
KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
|
||||||
Base address of the 256 KiB MPIC register space. Must be
|
Base address of the 256 KiB MPIC register space. Must be
|
||||||
naturally aligned. A value of zero disables the mapping.
|
naturally aligned. A value of zero disables the mapping.
|
@ -1,3 +1,6 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
====================================
|
||||||
FLIC (floating interrupt controller)
|
FLIC (floating interrupt controller)
|
||||||
====================================
|
====================================
|
||||||
|
|
||||||
@ -31,8 +34,10 @@ Groups:
|
|||||||
Copies all floating interrupts into a buffer provided by userspace.
|
Copies all floating interrupts into a buffer provided by userspace.
|
||||||
When the buffer is too small it returns -ENOMEM, which is the indication
|
When the buffer is too small it returns -ENOMEM, which is the indication
|
||||||
for userspace to try again with a bigger buffer.
|
for userspace to try again with a bigger buffer.
|
||||||
|
|
||||||
-ENOBUFS is returned when the allocation of a kernelspace buffer has
|
-ENOBUFS is returned when the allocation of a kernelspace buffer has
|
||||||
failed.
|
failed.
|
||||||
|
|
||||||
-EFAULT is returned when copying data to userspace failed.
|
-EFAULT is returned when copying data to userspace failed.
|
||||||
All interrupts remain pending, i.e. are not deleted from the list of
|
All interrupts remain pending, i.e. are not deleted from the list of
|
||||||
currently pending interrupts.
|
currently pending interrupts.
|
||||||
@ -60,38 +65,41 @@ Groups:
|
|||||||
|
|
||||||
KVM_DEV_FLIC_ADAPTER_REGISTER
|
KVM_DEV_FLIC_ADAPTER_REGISTER
|
||||||
Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
|
Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
|
||||||
describing the adapter to register:
|
describing the adapter to register::
|
||||||
|
|
||||||
struct kvm_s390_io_adapter {
|
struct kvm_s390_io_adapter {
|
||||||
__u32 id;
|
__u32 id;
|
||||||
__u8 isc;
|
__u8 isc;
|
||||||
__u8 maskable;
|
__u8 maskable;
|
||||||
__u8 swap;
|
__u8 swap;
|
||||||
__u8 flags;
|
__u8 flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
id contains the unique id for the adapter, isc the I/O interruption subclass
|
id contains the unique id for the adapter, isc the I/O interruption subclass
|
||||||
to use, maskable whether this adapter may be masked (interrupts turned off),
|
to use, maskable whether this adapter may be masked (interrupts turned off),
|
||||||
swap whether the indicators need to be byte swapped, and flags contains
|
swap whether the indicators need to be byte swapped, and flags contains
|
||||||
further characteristics of the adapter.
|
further characteristics of the adapter.
|
||||||
|
|
||||||
Currently defined values for 'flags' are:
|
Currently defined values for 'flags' are:
|
||||||
|
|
||||||
- KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS
|
- KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS
|
||||||
(adapter-interrupt-suppression) facility. This flag only has an effect if
|
(adapter-interrupt-suppression) facility. This flag only has an effect if
|
||||||
the AIS capability is enabled.
|
the AIS capability is enabled.
|
||||||
|
|
||||||
Unknown flag values are ignored.
|
Unknown flag values are ignored.
|
||||||
|
|
||||||
|
|
||||||
KVM_DEV_FLIC_ADAPTER_MODIFY
|
KVM_DEV_FLIC_ADAPTER_MODIFY
|
||||||
Modifies attributes of an existing I/O adapter interrupt source. Takes
|
Modifies attributes of an existing I/O adapter interrupt source. Takes
|
||||||
a kvm_s390_io_adapter_req specifying the adapter and the operation:
|
a kvm_s390_io_adapter_req specifying the adapter and the operation::
|
||||||
|
|
||||||
struct kvm_s390_io_adapter_req {
|
struct kvm_s390_io_adapter_req {
|
||||||
__u32 id;
|
__u32 id;
|
||||||
__u8 type;
|
__u8 type;
|
||||||
__u8 mask;
|
__u8 mask;
|
||||||
__u16 pad0;
|
__u16 pad0;
|
||||||
__u64 addr;
|
__u64 addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
id specifies the adapter and type the operation. The supported operations
|
id specifies the adapter and type the operation. The supported operations
|
||||||
are:
|
are:
|
||||||
@ -103,8 +111,9 @@ struct kvm_s390_io_adapter_req {
|
|||||||
perform a gmap translation for the guest address provided in addr,
|
perform a gmap translation for the guest address provided in addr,
|
||||||
pin a userspace page for the translated address and add it to the
|
pin a userspace page for the translated address and add it to the
|
||||||
list of mappings
|
list of mappings
|
||||||
Note: A new mapping will be created unconditionally; therefore,
|
|
||||||
the calling code should avoid making duplicate mappings.
|
.. note:: A new mapping will be created unconditionally; therefore,
|
||||||
|
the calling code should avoid making duplicate mappings.
|
||||||
|
|
||||||
KVM_S390_IO_ADAPTER_UNMAP
|
KVM_S390_IO_ADAPTER_UNMAP
|
||||||
release a userspace page for the translated address specified in addr
|
release a userspace page for the translated address specified in addr
|
||||||
@ -112,16 +121,17 @@ struct kvm_s390_io_adapter_req {
|
|||||||
|
|
||||||
KVM_DEV_FLIC_AISM
|
KVM_DEV_FLIC_AISM
|
||||||
modify the adapter-interruption-suppression mode for a given isc if the
|
modify the adapter-interruption-suppression mode for a given isc if the
|
||||||
AIS capability is enabled. Takes a kvm_s390_ais_req describing:
|
AIS capability is enabled. Takes a kvm_s390_ais_req describing::
|
||||||
|
|
||||||
struct kvm_s390_ais_req {
|
struct kvm_s390_ais_req {
|
||||||
__u8 isc;
|
__u8 isc;
|
||||||
__u16 mode;
|
__u16 mode;
|
||||||
};
|
};
|
||||||
|
|
||||||
isc contains the target I/O interruption subclass, mode the target
|
isc contains the target I/O interruption subclass, mode the target
|
||||||
adapter-interruption-suppression mode. The following modes are
|
adapter-interruption-suppression mode. The following modes are
|
||||||
currently supported:
|
currently supported:
|
||||||
|
|
||||||
- KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection
|
- KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection
|
||||||
is always allowed;
|
is always allowed;
|
||||||
- KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq
|
- KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq
|
||||||
@ -139,12 +149,12 @@ struct kvm_s390_ais_req {
|
|||||||
|
|
||||||
KVM_DEV_FLIC_AISM_ALL
|
KVM_DEV_FLIC_AISM_ALL
|
||||||
Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes
|
Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes
|
||||||
a kvm_s390_ais_all describing:
|
a kvm_s390_ais_all describing::
|
||||||
|
|
||||||
struct kvm_s390_ais_all {
|
struct kvm_s390_ais_all {
|
||||||
__u8 simm; /* Single-Interruption-Mode mask */
|
__u8 simm; /* Single-Interruption-Mode mask */
|
||||||
__u8 nimm; /* No-Interruption-Mode mask *
|
__u8 nimm; /* No-Interruption-Mode mask *
|
||||||
};
|
};
|
||||||
|
|
||||||
simm contains Single-Interruption-Mode mask for all ISCs, nimm contains
|
simm contains Single-Interruption-Mode mask for all ISCs, nimm contains
|
||||||
No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds
|
No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds
|
||||||
@ -159,5 +169,5 @@ ENXIO, as specified in the API documentation). It is not possible to conclude
|
|||||||
that a FLIC operation is unavailable based on the error code resulting from a
|
that a FLIC operation is unavailable based on the error code resulting from a
|
||||||
usage attempt.
|
usage attempt.
|
||||||
|
|
||||||
Note: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a zero
|
.. note:: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a
|
||||||
schid is specified.
|
zero schid is specified.
|
114
Documentation/virt/kvm/devices/vcpu.rst
Normal file
114
Documentation/virt/kvm/devices/vcpu.rst
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
======================
|
||||||
|
Generic vcpu interface
|
||||||
|
======================
|
||||||
|
|
||||||
|
The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
|
||||||
|
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
|
||||||
|
kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
|
||||||
|
|
||||||
|
The groups and attributes per virtual cpu, if any, are architecture specific.
|
||||||
|
|
||||||
|
1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
|
||||||
|
==================================
|
||||||
|
|
||||||
|
:Architectures: ARM64
|
||||||
|
|
||||||
|
1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
|
||||||
|
---------------------------------------
|
||||||
|
|
||||||
|
:Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
|
||||||
|
pointer to an int
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
======= ========================================================
|
||||||
|
-EBUSY The PMU overflow interrupt is already set
|
||||||
|
-ENXIO The overflow interrupt not set when attempting to get it
|
||||||
|
-ENODEV PMUv3 not supported
|
||||||
|
-EINVAL Invalid PMU overflow interrupt number supplied or
|
||||||
|
trying to set the IRQ number without using an in-kernel
|
||||||
|
irqchip.
|
||||||
|
======= ========================================================
|
||||||
|
|
||||||
|
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
|
||||||
|
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
|
||||||
|
type must be same for each vcpu. As a PPI, the interrupt number is the same for
|
||||||
|
all vcpus, while as an SPI it must be a separate number per vcpu.
|
||||||
|
|
||||||
|
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
|
||||||
|
---------------------------------------
|
||||||
|
|
||||||
|
:Parameters: no additional parameter in kvm_device_attr.addr
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
======= ======================================================
|
||||||
|
-ENODEV PMUv3 not supported or GIC not initialized
|
||||||
|
-ENXIO PMUv3 not properly configured or in-kernel irqchip not
|
||||||
|
configured as required prior to calling this attribute
|
||||||
|
-EBUSY PMUv3 already initialized
|
||||||
|
======= ======================================================
|
||||||
|
|
||||||
|
Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel
|
||||||
|
virtual GIC implementation, this must be done after initializing the in-kernel
|
||||||
|
irqchip.
|
||||||
|
|
||||||
|
|
||||||
|
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
|
||||||
|
=================================
|
||||||
|
|
||||||
|
:Architectures: ARM, ARM64
|
||||||
|
|
||||||
|
2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
:Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
|
||||||
|
pointer to an int
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
======= =================================
|
||||||
|
-EINVAL Invalid timer interrupt number
|
||||||
|
-EBUSY One or more VCPUs has already run
|
||||||
|
======= =================================
|
||||||
|
|
||||||
|
A value describing the architected timer interrupt number when connected to an
|
||||||
|
in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
|
||||||
|
attribute overrides the default values (see below).
|
||||||
|
|
||||||
|
============================= ==========================================
|
||||||
|
KVM_ARM_VCPU_TIMER_IRQ_VTIMER The EL1 virtual timer intid (default: 27)
|
||||||
|
KVM_ARM_VCPU_TIMER_IRQ_PTIMER The EL1 physical timer intid (default: 30)
|
||||||
|
============================= ==========================================
|
||||||
|
|
||||||
|
Setting the same PPI for different timers will prevent the VCPUs from running.
|
||||||
|
Setting the interrupt number on a VCPU configures all VCPUs created at that
|
||||||
|
time to use the number provided for a given timer, overwriting any previously
|
||||||
|
configured values on other VCPUs. Userspace should configure the interrupt
|
||||||
|
numbers on at least one VCPU after creating all VCPUs and before running any
|
||||||
|
VCPUs.
|
||||||
|
|
||||||
|
3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
|
||||||
|
==================================
|
||||||
|
|
||||||
|
:Architectures: ARM64
|
||||||
|
|
||||||
|
3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
:Parameters: 64-bit base address
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
======= ======================================
|
||||||
|
-ENXIO Stolen time not implemented
|
||||||
|
-EEXIST Base address already set for this VCPU
|
||||||
|
-EINVAL Base address not 64 byte aligned
|
||||||
|
======= ======================================
|
||||||
|
|
||||||
|
Specifies the base address of the stolen time structure for this VCPU. The
|
||||||
|
base address must be 64 byte aligned and exist within a valid guest memory
|
||||||
|
region. See Documentation/virt/kvm/arm/pvtime.txt for more information
|
||||||
|
including the layout of the stolen time structure.
|
@ -1,76 +0,0 @@
|
|||||||
Generic vcpu interface
|
|
||||||
====================================
|
|
||||||
|
|
||||||
The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
|
|
||||||
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
|
|
||||||
kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
|
|
||||||
|
|
||||||
The groups and attributes per virtual cpu, if any, are architecture specific.
|
|
||||||
|
|
||||||
1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
|
|
||||||
Architectures: ARM64
|
|
||||||
|
|
||||||
1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
|
|
||||||
Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
|
|
||||||
pointer to an int
|
|
||||||
Returns: -EBUSY: The PMU overflow interrupt is already set
|
|
||||||
-ENXIO: The overflow interrupt not set when attempting to get it
|
|
||||||
-ENODEV: PMUv3 not supported
|
|
||||||
-EINVAL: Invalid PMU overflow interrupt number supplied or
|
|
||||||
trying to set the IRQ number without using an in-kernel
|
|
||||||
irqchip.
|
|
||||||
|
|
||||||
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
|
|
||||||
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
|
|
||||||
type must be same for each vcpu. As a PPI, the interrupt number is the same for
|
|
||||||
all vcpus, while as an SPI it must be a separate number per vcpu.
|
|
||||||
|
|
||||||
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
|
|
||||||
Parameters: no additional parameter in kvm_device_attr.addr
|
|
||||||
Returns: -ENODEV: PMUv3 not supported or GIC not initialized
|
|
||||||
-ENXIO: PMUv3 not properly configured or in-kernel irqchip not
|
|
||||||
configured as required prior to calling this attribute
|
|
||||||
-EBUSY: PMUv3 already initialized
|
|
||||||
|
|
||||||
Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel
|
|
||||||
virtual GIC implementation, this must be done after initializing the in-kernel
|
|
||||||
irqchip.
|
|
||||||
|
|
||||||
|
|
||||||
2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
|
|
||||||
Architectures: ARM,ARM64
|
|
||||||
|
|
||||||
2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER
|
|
||||||
2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER
|
|
||||||
Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
|
|
||||||
pointer to an int
|
|
||||||
Returns: -EINVAL: Invalid timer interrupt number
|
|
||||||
-EBUSY: One or more VCPUs has already run
|
|
||||||
|
|
||||||
A value describing the architected timer interrupt number when connected to an
|
|
||||||
in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the
|
|
||||||
attribute overrides the default values (see below).
|
|
||||||
|
|
||||||
KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27)
|
|
||||||
KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30)
|
|
||||||
|
|
||||||
Setting the same PPI for different timers will prevent the VCPUs from running.
|
|
||||||
Setting the interrupt number on a VCPU configures all VCPUs created at that
|
|
||||||
time to use the number provided for a given timer, overwriting any previously
|
|
||||||
configured values on other VCPUs. Userspace should configure the interrupt
|
|
||||||
numbers on at least one VCPU after creating all VCPUs and before running any
|
|
||||||
VCPUs.
|
|
||||||
|
|
||||||
3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
|
|
||||||
Architectures: ARM64
|
|
||||||
|
|
||||||
3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
|
|
||||||
Parameters: 64-bit base address
|
|
||||||
Returns: -ENXIO: Stolen time not implemented
|
|
||||||
-EEXIST: Base address already set for this VCPU
|
|
||||||
-EINVAL: Base address not 64 byte aligned
|
|
||||||
|
|
||||||
Specifies the base address of the stolen time structure for this VCPU. The
|
|
||||||
base address must be 64 byte aligned and exist within a valid guest memory
|
|
||||||
region. See Documentation/virt/kvm/arm/pvtime.txt for more information
|
|
||||||
including the layout of the stolen time structure.
|
|
@ -1,8 +1,12 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
===================
|
||||||
VFIO virtual device
|
VFIO virtual device
|
||||||
===================
|
===================
|
||||||
|
|
||||||
Device types supported:
|
Device types supported:
|
||||||
KVM_DEV_TYPE_VFIO
|
|
||||||
|
- KVM_DEV_TYPE_VFIO
|
||||||
|
|
||||||
Only one VFIO instance may be created per VM. The created device
|
Only one VFIO instance may be created per VM. The created device
|
||||||
tracks VFIO groups in use by the VM and features of those groups
|
tracks VFIO groups in use by the VM and features of those groups
|
||||||
@ -23,14 +27,15 @@ KVM_DEV_VFIO_GROUP attributes:
|
|||||||
for the VFIO group.
|
for the VFIO group.
|
||||||
KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table
|
KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table
|
||||||
allocated by sPAPR KVM.
|
allocated by sPAPR KVM.
|
||||||
kvm_device_attr.addr points to a struct:
|
kvm_device_attr.addr points to a struct::
|
||||||
|
|
||||||
struct kvm_vfio_spapr_tce {
|
struct kvm_vfio_spapr_tce {
|
||||||
__s32 groupfd;
|
__s32 groupfd;
|
||||||
__s32 tablefd;
|
__s32 tablefd;
|
||||||
};
|
};
|
||||||
|
|
||||||
where
|
where:
|
||||||
@groupfd is a file descriptor for a VFIO group;
|
|
||||||
@tablefd is a file descriptor for a TCE table allocated via
|
- @groupfd is a file descriptor for a VFIO group;
|
||||||
KVM_CREATE_SPAPR_TCE.
|
- @tablefd is a file descriptor for a TCE table allocated via
|
||||||
|
KVM_CREATE_SPAPR_TCE.
|
@ -1,5 +1,8 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
====================
|
||||||
Generic vm interface
|
Generic vm interface
|
||||||
====================================
|
====================
|
||||||
|
|
||||||
The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
|
The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
|
||||||
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
|
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
|
||||||
@ -10,30 +13,38 @@ The groups and attributes per virtual machine, if any, are architecture
|
|||||||
specific.
|
specific.
|
||||||
|
|
||||||
1. GROUP: KVM_S390_VM_MEM_CTRL
|
1. GROUP: KVM_S390_VM_MEM_CTRL
|
||||||
Architectures: s390
|
==============================
|
||||||
|
|
||||||
|
:Architectures: s390
|
||||||
|
|
||||||
1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
|
1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
|
||||||
Parameters: none
|
-------------------------------------------
|
||||||
Returns: -EBUSY if a vcpu is already defined, otherwise 0
|
|
||||||
|
:Parameters: none
|
||||||
|
:Returns: -EBUSY if a vcpu is already defined, otherwise 0
|
||||||
|
|
||||||
Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
|
Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
|
||||||
|
|
||||||
1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
|
1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
|
||||||
Parameters: none
|
----------------------------------------
|
||||||
Returns: -EINVAL if CMMA was not enabled
|
|
||||||
0 otherwise
|
:Parameters: none
|
||||||
|
:Returns: -EINVAL if CMMA was not enabled;
|
||||||
|
0 otherwise
|
||||||
|
|
||||||
Clear the CMMA status for all guest pages, so any pages the guest marked
|
Clear the CMMA status for all guest pages, so any pages the guest marked
|
||||||
as unused are again used any may not be reclaimed by the host.
|
as unused are again used any may not be reclaimed by the host.
|
||||||
|
|
||||||
1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE
|
1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE
|
||||||
Parameters: in attr->addr the address for the new limit of guest memory
|
-----------------------------------------
|
||||||
Returns: -EFAULT if the given address is not accessible
|
|
||||||
-EINVAL if the virtual machine is of type UCONTROL
|
:Parameters: in attr->addr the address for the new limit of guest memory
|
||||||
-E2BIG if the given guest memory is to big for that machine
|
:Returns: -EFAULT if the given address is not accessible;
|
||||||
-EBUSY if a vcpu is already defined
|
-EINVAL if the virtual machine is of type UCONTROL;
|
||||||
-ENOMEM if not enough memory is available for a new shadow guest mapping
|
-E2BIG if the given guest memory is to big for that machine;
|
||||||
0 otherwise
|
-EBUSY if a vcpu is already defined;
|
||||||
|
-ENOMEM if not enough memory is available for a new shadow guest mapping;
|
||||||
|
0 otherwise.
|
||||||
|
|
||||||
Allows userspace to query the actual limit and set a new limit for
|
Allows userspace to query the actual limit and set a new limit for
|
||||||
the maximum guest memory size. The limit will be rounded up to
|
the maximum guest memory size. The limit will be rounded up to
|
||||||
@ -42,78 +53,92 @@ the number of page table levels. In the case that there is no limit we will set
|
|||||||
the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX).
|
the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX).
|
||||||
|
|
||||||
2. GROUP: KVM_S390_VM_CPU_MODEL
|
2. GROUP: KVM_S390_VM_CPU_MODEL
|
||||||
Architectures: s390
|
===============================
|
||||||
|
|
||||||
|
:Architectures: s390
|
||||||
|
|
||||||
2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
|
2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
|
||||||
|
---------------------------------------------
|
||||||
|
|
||||||
Allows user space to retrieve machine and kvm specific cpu related information:
|
Allows user space to retrieve machine and kvm specific cpu related information::
|
||||||
|
|
||||||
struct kvm_s390_vm_cpu_machine {
|
struct kvm_s390_vm_cpu_machine {
|
||||||
__u64 cpuid; # CPUID of host
|
__u64 cpuid; # CPUID of host
|
||||||
__u32 ibc; # IBC level range offered by host
|
__u32 ibc; # IBC level range offered by host
|
||||||
__u8 pad[4];
|
__u8 pad[4];
|
||||||
__u64 fac_mask[256]; # set of cpu facilities enabled by KVM
|
__u64 fac_mask[256]; # set of cpu facilities enabled by KVM
|
||||||
__u64 fac_list[256]; # set of cpu facilities offered by host
|
__u64 fac_list[256]; # set of cpu facilities offered by host
|
||||||
}
|
}
|
||||||
|
|
||||||
Parameters: address of buffer to store the machine related cpu data
|
:Parameters: address of buffer to store the machine related cpu data
|
||||||
of type struct kvm_s390_vm_cpu_machine*
|
of type struct kvm_s390_vm_cpu_machine*
|
||||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||||
-ENOMEM if not enough memory is available to process the ioctl
|
-ENOMEM if not enough memory is available to process the ioctl;
|
||||||
0 in case of success
|
0 in case of success.
|
||||||
|
|
||||||
2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
|
2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
|
||||||
|
===============================================
|
||||||
|
|
||||||
Allows user space to retrieve or request to change cpu related information for a vcpu:
|
Allows user space to retrieve or request to change cpu related information for a vcpu::
|
||||||
|
|
||||||
struct kvm_s390_vm_cpu_processor {
|
struct kvm_s390_vm_cpu_processor {
|
||||||
__u64 cpuid; # CPUID currently (to be) used by this vcpu
|
__u64 cpuid; # CPUID currently (to be) used by this vcpu
|
||||||
__u16 ibc; # IBC level currently (to be) used by this vcpu
|
__u16 ibc; # IBC level currently (to be) used by this vcpu
|
||||||
__u8 pad[6];
|
__u8 pad[6];
|
||||||
__u64 fac_list[256]; # set of cpu facilities currently (to be) used
|
__u64 fac_list[256]; # set of cpu facilities currently (to be) used
|
||||||
# by this vcpu
|
# by this vcpu
|
||||||
}
|
}
|
||||||
|
|
||||||
KVM does not enforce or limit the cpu model data in any form. Take the information
|
KVM does not enforce or limit the cpu model data in any form. Take the information
|
||||||
retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration
|
retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration
|
||||||
setups. Instruction interceptions triggered by additionally set facility bits that
|
setups. Instruction interceptions triggered by additionally set facility bits that
|
||||||
are not handled by KVM need to by imlemented in the VM driver code.
|
are not handled by KVM need to by imlemented in the VM driver code.
|
||||||
|
|
||||||
Parameters: address of buffer to store/set the processor related cpu
|
:Parameters: address of buffer to store/set the processor related cpu
|
||||||
data of type struct kvm_s390_vm_cpu_processor*.
|
data of type struct kvm_s390_vm_cpu_processor*.
|
||||||
Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case)
|
:Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case);
|
||||||
-EFAULT if the given address is not accessible from kernel space
|
-EFAULT if the given address is not accessible from kernel space;
|
||||||
-ENOMEM if not enough memory is available to process the ioctl
|
-ENOMEM if not enough memory is available to process the ioctl;
|
||||||
0 in case of success
|
0 in case of success.
|
||||||
|
|
||||||
|
.. _KVM_S390_VM_CPU_MACHINE_FEAT:
|
||||||
|
|
||||||
2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o)
|
2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o)
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
Allows user space to retrieve available cpu features. A feature is available if
|
Allows user space to retrieve available cpu features. A feature is available if
|
||||||
provided by the hardware and supported by kvm. In theory, cpu features could
|
provided by the hardware and supported by kvm. In theory, cpu features could
|
||||||
even be completely emulated by kvm.
|
even be completely emulated by kvm.
|
||||||
|
|
||||||
struct kvm_s390_vm_cpu_feat {
|
::
|
||||||
__u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
|
|
||||||
};
|
|
||||||
|
|
||||||
Parameters: address of a buffer to load the feature list from.
|
struct kvm_s390_vm_cpu_feat {
|
||||||
Returns: -EFAULT if the given address is not accessible from kernel space.
|
__u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
|
||||||
0 in case of success.
|
};
|
||||||
|
|
||||||
|
:Parameters: address of a buffer to load the feature list from.
|
||||||
|
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||||
|
0 in case of success.
|
||||||
|
|
||||||
2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w)
|
2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w)
|
||||||
|
----------------------------------------------------
|
||||||
|
|
||||||
Allows user space to retrieve or change enabled cpu features for all VCPUs of a
|
Allows user space to retrieve or change enabled cpu features for all VCPUs of a
|
||||||
VM. Features that are not available cannot be enabled.
|
VM. Features that are not available cannot be enabled.
|
||||||
|
|
||||||
See 2.3. for a description of the parameter struct.
|
See :ref:`KVM_S390_VM_CPU_MACHINE_FEAT` for
|
||||||
|
a description of the parameter struct.
|
||||||
|
|
||||||
Parameters: address of a buffer to store/load the feature list from.
|
:Parameters: address of a buffer to store/load the feature list from.
|
||||||
Returns: -EFAULT if the given address is not accessible from kernel space.
|
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||||
-EINVAL if a cpu feature that is not available is to be enabled.
|
-EINVAL if a cpu feature that is not available is to be enabled;
|
||||||
-EBUSY if at least one VCPU has already been defined.
|
-EBUSY if at least one VCPU has already been defined;
|
||||||
0 in case of success.
|
0 in case of success.
|
||||||
|
|
||||||
|
.. _KVM_S390_VM_CPU_MACHINE_SUBFUNC:
|
||||||
|
|
||||||
2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o)
|
2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o)
|
||||||
|
-----------------------------------------------------
|
||||||
|
|
||||||
Allows user space to retrieve available cpu subfunctions without any filtering
|
Allows user space to retrieve available cpu subfunctions without any filtering
|
||||||
done by a set IBC. These subfunctions are indicated to the guest VCPU via
|
done by a set IBC. These subfunctions are indicated to the guest VCPU via
|
||||||
@ -126,7 +151,9 @@ contained in the returned struct. If the affected instruction
|
|||||||
indicates subfunctions via a "test bit" mechanism, the subfunction codes are
|
indicates subfunctions via a "test bit" mechanism, the subfunction codes are
|
||||||
contained in the returned struct in MSB 0 bit numbering.
|
contained in the returned struct in MSB 0 bit numbering.
|
||||||
|
|
||||||
struct kvm_s390_vm_cpu_subfunc {
|
::
|
||||||
|
|
||||||
|
struct kvm_s390_vm_cpu_subfunc {
|
||||||
u8 plo[32]; # always valid (ESA/390 feature)
|
u8 plo[32]; # always valid (ESA/390 feature)
|
||||||
u8 ptff[16]; # valid with TOD-clock steering
|
u8 ptff[16]; # valid with TOD-clock steering
|
||||||
u8 kmac[16]; # valid with Message-Security-Assist
|
u8 kmac[16]; # valid with Message-Security-Assist
|
||||||
@ -143,13 +170,14 @@ struct kvm_s390_vm_cpu_subfunc {
|
|||||||
u8 kma[16]; # valid with Message-Security-Assist-Extension 8
|
u8 kma[16]; # valid with Message-Security-Assist-Extension 8
|
||||||
u8 kdsa[16]; # valid with Message-Security-Assist-Extension 9
|
u8 kdsa[16]; # valid with Message-Security-Assist-Extension 9
|
||||||
u8 reserved[1792]; # reserved for future instructions
|
u8 reserved[1792]; # reserved for future instructions
|
||||||
};
|
};
|
||||||
|
|
||||||
Parameters: address of a buffer to load the subfunction blocks from.
|
:Parameters: address of a buffer to load the subfunction blocks from.
|
||||||
Returns: -EFAULT if the given address is not accessible from kernel space.
|
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||||
0 in case of success.
|
0 in case of success.
|
||||||
|
|
||||||
2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w)
|
2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w)
|
||||||
|
-------------------------------------------------------
|
||||||
|
|
||||||
Allows user space to retrieve or change cpu subfunctions to be indicated for
|
Allows user space to retrieve or change cpu subfunctions to be indicated for
|
||||||
all VCPUs of a VM. This attribute will only be available if kernel and
|
all VCPUs of a VM. This attribute will only be available if kernel and
|
||||||
@ -164,107 +192,125 @@ As long as no data has been written, a read will fail. The IBC will be used
|
|||||||
to determine available subfunctions in this case, this will guarantee backward
|
to determine available subfunctions in this case, this will guarantee backward
|
||||||
compatibility.
|
compatibility.
|
||||||
|
|
||||||
See 2.5. for a description of the parameter struct.
|
See :ref:`KVM_S390_VM_CPU_MACHINE_SUBFUNC` for a
|
||||||
|
description of the parameter struct.
|
||||||
|
|
||||||
Parameters: address of a buffer to store/load the subfunction blocks from.
|
:Parameters: address of a buffer to store/load the subfunction blocks from.
|
||||||
Returns: -EFAULT if the given address is not accessible from kernel space.
|
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||||
-EINVAL when reading, if there was no write yet.
|
-EINVAL when reading, if there was no write yet;
|
||||||
-EBUSY if at least one VCPU has already been defined.
|
-EBUSY if at least one VCPU has already been defined;
|
||||||
0 in case of success.
|
0 in case of success.
|
||||||
|
|
||||||
3. GROUP: KVM_S390_VM_TOD
|
3. GROUP: KVM_S390_VM_TOD
|
||||||
Architectures: s390
|
=========================
|
||||||
|
|
||||||
|
:Architectures: s390
|
||||||
|
|
||||||
3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
|
3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
Allows user space to set/get the TOD clock extension (u8) (superseded by
|
Allows user space to set/get the TOD clock extension (u8) (superseded by
|
||||||
KVM_S390_VM_TOD_EXT).
|
KVM_S390_VM_TOD_EXT).
|
||||||
|
|
||||||
Parameters: address of a buffer in user space to store the data (u8) to
|
:Parameters: address of a buffer in user space to store the data (u8) to
|
||||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||||
-EINVAL if setting the TOD clock extension to != 0 is not supported
|
-EINVAL if setting the TOD clock extension to != 0 is not supported
|
||||||
|
|
||||||
3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
|
3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
|
||||||
|
-----------------------------------
|
||||||
|
|
||||||
Allows user space to set/get bits 0-63 of the TOD clock register as defined in
|
Allows user space to set/get bits 0-63 of the TOD clock register as defined in
|
||||||
the POP (u64).
|
the POP (u64).
|
||||||
|
|
||||||
Parameters: address of a buffer in user space to store the data (u64) to
|
:Parameters: address of a buffer in user space to store the data (u64) to
|
||||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
:Returns: -EFAULT if the given address is not accessible from kernel space
|
||||||
|
|
||||||
3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
|
3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
|
||||||
|
-----------------------------------
|
||||||
|
|
||||||
Allows user space to set/get bits 0-63 of the TOD clock register as defined in
|
Allows user space to set/get bits 0-63 of the TOD clock register as defined in
|
||||||
the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it
|
the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it
|
||||||
also allows user space to get/set it. If the guest CPU model does not support
|
also allows user space to get/set it. If the guest CPU model does not support
|
||||||
it, it is stored as 0 and not allowed to be set to a value != 0.
|
it, it is stored as 0 and not allowed to be set to a value != 0.
|
||||||
|
|
||||||
Parameters: address of a buffer in user space to store the data
|
:Parameters: address of a buffer in user space to store the data
|
||||||
(kvm_s390_vm_tod_clock) to
|
(kvm_s390_vm_tod_clock) to
|
||||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||||
-EINVAL if setting the TOD clock extension to != 0 is not supported
|
-EINVAL if setting the TOD clock extension to != 0 is not supported
|
||||||
|
|
||||||
4. GROUP: KVM_S390_VM_CRYPTO
|
4. GROUP: KVM_S390_VM_CRYPTO
|
||||||
Architectures: s390
|
============================
|
||||||
|
|
||||||
|
:Architectures: s390
|
||||||
|
|
||||||
4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o)
|
4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o)
|
||||||
|
------------------------------------------------------
|
||||||
|
|
||||||
Allows user space to enable aes key wrapping, including generating a new
|
Allows user space to enable aes key wrapping, including generating a new
|
||||||
wrapping key.
|
wrapping key.
|
||||||
|
|
||||||
Parameters: none
|
:Parameters: none
|
||||||
Returns: 0
|
:Returns: 0
|
||||||
|
|
||||||
4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o)
|
4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o)
|
||||||
|
------------------------------------------------------
|
||||||
|
|
||||||
Allows user space to enable dea key wrapping, including generating a new
|
Allows user space to enable dea key wrapping, including generating a new
|
||||||
wrapping key.
|
wrapping key.
|
||||||
|
|
||||||
Parameters: none
|
:Parameters: none
|
||||||
Returns: 0
|
:Returns: 0
|
||||||
|
|
||||||
4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o)
|
4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o)
|
||||||
|
-------------------------------------------------------
|
||||||
|
|
||||||
Allows user space to disable aes key wrapping, clearing the wrapping key.
|
Allows user space to disable aes key wrapping, clearing the wrapping key.
|
||||||
|
|
||||||
Parameters: none
|
:Parameters: none
|
||||||
Returns: 0
|
:Returns: 0
|
||||||
|
|
||||||
4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o)
|
4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o)
|
||||||
|
-------------------------------------------------------
|
||||||
|
|
||||||
Allows user space to disable dea key wrapping, clearing the wrapping key.
|
Allows user space to disable dea key wrapping, clearing the wrapping key.
|
||||||
|
|
||||||
Parameters: none
|
:Parameters: none
|
||||||
Returns: 0
|
:Returns: 0
|
||||||
|
|
||||||
5. GROUP: KVM_S390_VM_MIGRATION
|
5. GROUP: KVM_S390_VM_MIGRATION
|
||||||
Architectures: s390
|
===============================
|
||||||
|
|
||||||
|
:Architectures: s390
|
||||||
|
|
||||||
5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o)
|
5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o)
|
||||||
|
------------------------------------------------
|
||||||
|
|
||||||
Allows userspace to stop migration mode, needed for PGSTE migration.
|
Allows userspace to stop migration mode, needed for PGSTE migration.
|
||||||
Setting this attribute when migration mode is not active will have no
|
Setting this attribute when migration mode is not active will have no
|
||||||
effects.
|
effects.
|
||||||
|
|
||||||
Parameters: none
|
:Parameters: none
|
||||||
Returns: 0
|
:Returns: 0
|
||||||
|
|
||||||
5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o)
|
5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o)
|
||||||
|
-------------------------------------------------
|
||||||
|
|
||||||
Allows userspace to start migration mode, needed for PGSTE migration.
|
Allows userspace to start migration mode, needed for PGSTE migration.
|
||||||
Setting this attribute when migration mode is already active will have
|
Setting this attribute when migration mode is already active will have
|
||||||
no effects.
|
no effects.
|
||||||
|
|
||||||
Parameters: none
|
:Parameters: none
|
||||||
Returns: -ENOMEM if there is not enough free memory to start migration mode
|
:Returns: -ENOMEM if there is not enough free memory to start migration mode;
|
||||||
-EINVAL if the state of the VM is invalid (e.g. no memory defined)
|
-EINVAL if the state of the VM is invalid (e.g. no memory defined);
|
||||||
0 in case of success.
|
0 in case of success.
|
||||||
|
|
||||||
5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o)
|
5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o)
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
Allows userspace to query the status of migration mode.
|
Allows userspace to query the status of migration mode.
|
||||||
|
|
||||||
Parameters: address of a buffer in user space to store the data (u64) to;
|
:Parameters: address of a buffer in user space to store the data (u64) to;
|
||||||
the data itself is either 0 if migration mode is disabled or 1
|
the data itself is either 0 if migration mode is disabled or 1
|
||||||
if it is enabled
|
if it is enabled
|
||||||
Returns: -EFAULT if the given address is not accessible from kernel space
|
:Returns: -EFAULT if the given address is not accessible from kernel space;
|
||||||
0 in case of success.
|
0 in case of success.
|
@ -1,20 +1,31 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
=========================
|
||||||
XICS interrupt controller
|
XICS interrupt controller
|
||||||
|
=========================
|
||||||
|
|
||||||
Device type supported: KVM_DEV_TYPE_XICS
|
Device type supported: KVM_DEV_TYPE_XICS
|
||||||
|
|
||||||
Groups:
|
Groups:
|
||||||
1. KVM_DEV_XICS_GRP_SOURCES
|
1. KVM_DEV_XICS_GRP_SOURCES
|
||||||
Attributes: One per interrupt source, indexed by the source number.
|
Attributes:
|
||||||
|
|
||||||
|
One per interrupt source, indexed by the source number.
|
||||||
2. KVM_DEV_XICS_GRP_CTRL
|
2. KVM_DEV_XICS_GRP_CTRL
|
||||||
Attributes:
|
Attributes:
|
||||||
2.1 KVM_DEV_XICS_NR_SERVERS (write only)
|
|
||||||
|
2.1 KVM_DEV_XICS_NR_SERVERS (write only)
|
||||||
|
|
||||||
The kvm_device_attr.addr points to a __u32 value which is the number of
|
The kvm_device_attr.addr points to a __u32 value which is the number of
|
||||||
interrupt server numbers (ie, highest possible vcpu id plus one).
|
interrupt server numbers (ie, highest possible vcpu id plus one).
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-EINVAL: Value greater than KVM_MAX_VCPU_ID.
|
|
||||||
-EFAULT: Invalid user pointer for attr->addr.
|
======= ==========================================
|
||||||
-EBUSY: A vcpu is already connected to the device.
|
-EINVAL Value greater than KVM_MAX_VCPU_ID.
|
||||||
|
-EFAULT Invalid user pointer for attr->addr.
|
||||||
|
-EBUSY A vcpu is already connected to the device.
|
||||||
|
======= ==========================================
|
||||||
|
|
||||||
This device emulates the XICS (eXternal Interrupt Controller
|
This device emulates the XICS (eXternal Interrupt Controller
|
||||||
Specification) defined in PAPR. The XICS has a set of interrupt
|
Specification) defined in PAPR. The XICS has a set of interrupt
|
||||||
@ -53,24 +64,29 @@ the interrupt source number. The 64 bit state word has the following
|
|||||||
bitfields, starting from the least-significant end of the word:
|
bitfields, starting from the least-significant end of the word:
|
||||||
|
|
||||||
* Destination (server number), 32 bits
|
* Destination (server number), 32 bits
|
||||||
|
|
||||||
This specifies where the interrupt should be sent, and is the
|
This specifies where the interrupt should be sent, and is the
|
||||||
interrupt server number specified for the destination vcpu.
|
interrupt server number specified for the destination vcpu.
|
||||||
|
|
||||||
* Priority, 8 bits
|
* Priority, 8 bits
|
||||||
|
|
||||||
This is the priority specified for this interrupt source, where 0 is
|
This is the priority specified for this interrupt source, where 0 is
|
||||||
the highest priority and 255 is the lowest. An interrupt with a
|
the highest priority and 255 is the lowest. An interrupt with a
|
||||||
priority of 255 will never be delivered.
|
priority of 255 will never be delivered.
|
||||||
|
|
||||||
* Level sensitive flag, 1 bit
|
* Level sensitive flag, 1 bit
|
||||||
|
|
||||||
This bit is 1 for a level-sensitive interrupt source, or 0 for
|
This bit is 1 for a level-sensitive interrupt source, or 0 for
|
||||||
edge-sensitive (or MSI).
|
edge-sensitive (or MSI).
|
||||||
|
|
||||||
* Masked flag, 1 bit
|
* Masked flag, 1 bit
|
||||||
|
|
||||||
This bit is set to 1 if the interrupt is masked (cannot be delivered
|
This bit is set to 1 if the interrupt is masked (cannot be delivered
|
||||||
regardless of its priority), for example by the ibm,int-off RTAS
|
regardless of its priority), for example by the ibm,int-off RTAS
|
||||||
call, or 0 if it is not masked.
|
call, or 0 if it is not masked.
|
||||||
|
|
||||||
* Pending flag, 1 bit
|
* Pending flag, 1 bit
|
||||||
|
|
||||||
This bit is 1 if the source has a pending interrupt, otherwise 0.
|
This bit is 1 if the source has a pending interrupt, otherwise 0.
|
||||||
|
|
||||||
Only one XICS instance may be created per VM.
|
Only one XICS instance may be created per VM.
|
@ -1,8 +1,11 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
===========================================================
|
||||||
POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
|
POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
|
||||||
==========================================================
|
===========================================================
|
||||||
|
|
||||||
Device types supported:
|
Device types supported:
|
||||||
KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
|
- KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
|
||||||
|
|
||||||
This device acts as a VM interrupt controller. It provides the KVM
|
This device acts as a VM interrupt controller. It provides the KVM
|
||||||
interface to configure the interrupt sources of a VM in the underlying
|
interface to configure the interrupt sources of a VM in the underlying
|
||||||
@ -64,72 +67,100 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
|||||||
|
|
||||||
* Groups:
|
* Groups:
|
||||||
|
|
||||||
1. KVM_DEV_XIVE_GRP_CTRL
|
1. KVM_DEV_XIVE_GRP_CTRL
|
||||||
Provides global controls on the device
|
Provides global controls on the device
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
1.1 KVM_DEV_XIVE_RESET (write only)
|
1.1 KVM_DEV_XIVE_RESET (write only)
|
||||||
Resets the interrupt controller configuration for sources and event
|
Resets the interrupt controller configuration for sources and event
|
||||||
queues. To be used by kexec and kdump.
|
queues. To be used by kexec and kdump.
|
||||||
|
|
||||||
Errors: none
|
Errors: none
|
||||||
|
|
||||||
1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
|
1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
|
||||||
Sync all the sources and queues and mark the EQ pages dirty. This
|
Sync all the sources and queues and mark the EQ pages dirty. This
|
||||||
to make sure that a consistent memory state is captured when
|
to make sure that a consistent memory state is captured when
|
||||||
migrating the VM.
|
migrating the VM.
|
||||||
|
|
||||||
Errors: none
|
Errors: none
|
||||||
|
|
||||||
1.3 KVM_DEV_XIVE_NR_SERVERS (write only)
|
1.3 KVM_DEV_XIVE_NR_SERVERS (write only)
|
||||||
The kvm_device_attr.addr points to a __u32 value which is the number of
|
The kvm_device_attr.addr points to a __u32 value which is the number of
|
||||||
interrupt server numbers (ie, highest possible vcpu id plus one).
|
interrupt server numbers (ie, highest possible vcpu id plus one).
|
||||||
Errors:
|
|
||||||
-EINVAL: Value greater than KVM_MAX_VCPU_ID.
|
|
||||||
-EFAULT: Invalid user pointer for attr->addr.
|
|
||||||
-EBUSY: A vCPU is already connected to the device.
|
|
||||||
|
|
||||||
2. KVM_DEV_XIVE_GRP_SOURCE (write only)
|
Errors:
|
||||||
Initializes a new source in the XIVE device and mask it.
|
|
||||||
|
======= ==========================================
|
||||||
|
-EINVAL Value greater than KVM_MAX_VCPU_ID.
|
||||||
|
-EFAULT Invalid user pointer for attr->addr.
|
||||||
|
-EBUSY A vCPU is already connected to the device.
|
||||||
|
======= ==========================================
|
||||||
|
|
||||||
|
2. KVM_DEV_XIVE_GRP_SOURCE (write only)
|
||||||
|
Initializes a new source in the XIVE device and mask it.
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
Interrupt source number (64-bit)
|
Interrupt source number (64-bit)
|
||||||
The kvm_device_attr.addr points to a __u64 value:
|
|
||||||
bits: | 63 .... 2 | 1 | 0
|
The kvm_device_attr.addr points to a __u64 value::
|
||||||
values: | unused | level | type
|
|
||||||
|
bits: | 63 .... 2 | 1 | 0
|
||||||
|
values: | unused | level | type
|
||||||
|
|
||||||
- type: 0:MSI 1:LSI
|
- type: 0:MSI 1:LSI
|
||||||
- level: assertion level in case of an LSI.
|
- level: assertion level in case of an LSI.
|
||||||
Errors:
|
|
||||||
-E2BIG: Interrupt source number is out of range
|
|
||||||
-ENOMEM: Could not create a new source block
|
|
||||||
-EFAULT: Invalid user pointer for attr->addr.
|
|
||||||
-ENXIO: Could not allocate underlying HW interrupt
|
|
||||||
|
|
||||||
3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
|
Errors:
|
||||||
Configures source targeting
|
|
||||||
|
======= ==========================================
|
||||||
|
-E2BIG Interrupt source number is out of range
|
||||||
|
-ENOMEM Could not create a new source block
|
||||||
|
-EFAULT Invalid user pointer for attr->addr.
|
||||||
|
-ENXIO Could not allocate underlying HW interrupt
|
||||||
|
======= ==========================================
|
||||||
|
|
||||||
|
3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
|
||||||
|
Configures source targeting
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
Interrupt source number (64-bit)
|
Interrupt source number (64-bit)
|
||||||
The kvm_device_attr.addr points to a __u64 value:
|
|
||||||
bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
|
The kvm_device_attr.addr points to a __u64 value::
|
||||||
values: | eisn | mask | server | priority
|
|
||||||
|
bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
|
||||||
|
values: | eisn | mask | server | priority
|
||||||
|
|
||||||
- priority: 0-7 interrupt priority level
|
- priority: 0-7 interrupt priority level
|
||||||
- server: CPU number chosen to handle the interrupt
|
- server: CPU number chosen to handle the interrupt
|
||||||
- mask: mask flag (unused)
|
- mask: mask flag (unused)
|
||||||
- eisn: Effective Interrupt Source Number
|
- eisn: Effective Interrupt Source Number
|
||||||
Errors:
|
|
||||||
-ENOENT: Unknown source number
|
|
||||||
-EINVAL: Not initialized source number
|
|
||||||
-EINVAL: Invalid priority
|
|
||||||
-EINVAL: Invalid CPU number.
|
|
||||||
-EFAULT: Invalid user pointer for attr->addr.
|
|
||||||
-ENXIO: CPU event queues not configured or configuration of the
|
|
||||||
underlying HW interrupt failed
|
|
||||||
-EBUSY: No CPU available to serve interrupt
|
|
||||||
|
|
||||||
4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
|
Errors:
|
||||||
Configures an event queue of a CPU
|
|
||||||
|
======= =======================================================
|
||||||
|
-ENOENT Unknown source number
|
||||||
|
-EINVAL Not initialized source number
|
||||||
|
-EINVAL Invalid priority
|
||||||
|
-EINVAL Invalid CPU number.
|
||||||
|
-EFAULT Invalid user pointer for attr->addr.
|
||||||
|
-ENXIO CPU event queues not configured or configuration of the
|
||||||
|
underlying HW interrupt failed
|
||||||
|
-EBUSY No CPU available to serve interrupt
|
||||||
|
======= =======================================================
|
||||||
|
|
||||||
|
4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
|
||||||
|
Configures an event queue of a CPU
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
EQ descriptor identifier (64-bit)
|
EQ descriptor identifier (64-bit)
|
||||||
The EQ descriptor identifier is a tuple (server, priority) :
|
|
||||||
bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
|
The EQ descriptor identifier is a tuple (server, priority)::
|
||||||
values: | unused | server | priority
|
|
||||||
The kvm_device_attr.addr points to :
|
bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
|
||||||
|
values: | unused | server | priority
|
||||||
|
|
||||||
|
The kvm_device_attr.addr points to::
|
||||||
|
|
||||||
struct kvm_ppc_xive_eq {
|
struct kvm_ppc_xive_eq {
|
||||||
__u32 flags;
|
__u32 flags;
|
||||||
__u32 qshift;
|
__u32 qshift;
|
||||||
@ -138,8 +169,9 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
|||||||
__u32 qindex;
|
__u32 qindex;
|
||||||
__u8 pad[40];
|
__u8 pad[40];
|
||||||
};
|
};
|
||||||
|
|
||||||
- flags: queue flags
|
- flags: queue flags
|
||||||
KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
|
KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
|
||||||
forces notification without using the coalescing mechanism
|
forces notification without using the coalescing mechanism
|
||||||
provided by the XIVE END ESBs.
|
provided by the XIVE END ESBs.
|
||||||
- qshift: queue size (power of 2)
|
- qshift: queue size (power of 2)
|
||||||
@ -147,22 +179,31 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
|||||||
- qtoggle: current queue toggle bit
|
- qtoggle: current queue toggle bit
|
||||||
- qindex: current queue index
|
- qindex: current queue index
|
||||||
- pad: reserved for future use
|
- pad: reserved for future use
|
||||||
Errors:
|
|
||||||
-ENOENT: Invalid CPU number
|
|
||||||
-EINVAL: Invalid priority
|
|
||||||
-EINVAL: Invalid flags
|
|
||||||
-EINVAL: Invalid queue size
|
|
||||||
-EINVAL: Invalid queue address
|
|
||||||
-EFAULT: Invalid user pointer for attr->addr.
|
|
||||||
-EIO: Configuration of the underlying HW failed
|
|
||||||
|
|
||||||
5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
|
Errors:
|
||||||
Synchronize the source to flush event notifications
|
|
||||||
|
======= =========================================
|
||||||
|
-ENOENT Invalid CPU number
|
||||||
|
-EINVAL Invalid priority
|
||||||
|
-EINVAL Invalid flags
|
||||||
|
-EINVAL Invalid queue size
|
||||||
|
-EINVAL Invalid queue address
|
||||||
|
-EFAULT Invalid user pointer for attr->addr.
|
||||||
|
-EIO Configuration of the underlying HW failed
|
||||||
|
======= =========================================
|
||||||
|
|
||||||
|
5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
|
||||||
|
Synchronize the source to flush event notifications
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
Interrupt source number (64-bit)
|
Interrupt source number (64-bit)
|
||||||
|
|
||||||
Errors:
|
Errors:
|
||||||
-ENOENT: Unknown source number
|
|
||||||
-EINVAL: Not initialized source number
|
======= =============================
|
||||||
|
-ENOENT Unknown source number
|
||||||
|
-EINVAL Not initialized source number
|
||||||
|
======= =============================
|
||||||
|
|
||||||
* VCPU state
|
* VCPU state
|
||||||
|
|
||||||
@ -175,11 +216,12 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
|||||||
as it synthesizes the priorities of the pending interrupts. We
|
as it synthesizes the priorities of the pending interrupts. We
|
||||||
capture a bit more to report debug information.
|
capture a bit more to report debug information.
|
||||||
|
|
||||||
KVM_REG_PPC_VP_STATE (2 * 64bits)
|
KVM_REG_PPC_VP_STATE (2 * 64bits)::
|
||||||
bits: | 63 .... 32 | 31 .... 0 |
|
|
||||||
values: | TIMA word0 | TIMA word1 |
|
bits: | 63 .... 32 | 31 .... 0 |
|
||||||
bits: | 127 .......... 64 |
|
values: | TIMA word0 | TIMA word1 |
|
||||||
values: | unused |
|
bits: | 127 .......... 64 |
|
||||||
|
values: | unused |
|
||||||
|
|
||||||
* Migration:
|
* Migration:
|
||||||
|
|
||||||
@ -196,7 +238,7 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
|
|||||||
3. Capture the state of the source targeting, the EQs configuration
|
3. Capture the state of the source targeting, the EQs configuration
|
||||||
and the state of thread interrupt context registers.
|
and the state of thread interrupt context registers.
|
||||||
|
|
||||||
Restore is similar :
|
Restore is similar:
|
||||||
|
|
||||||
1. Restore the EQ configuration. As targeting depends on it.
|
1. Restore the EQ configuration. As targeting depends on it.
|
||||||
2. Restore targeting
|
2. Restore targeting
|
@ -1,3 +1,6 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
===========================
|
||||||
The KVM halt polling system
|
The KVM halt polling system
|
||||||
===========================
|
===========================
|
||||||
|
|
||||||
@ -68,7 +71,8 @@ steady state polling interval but will only really do a good job for wakeups
|
|||||||
which come at an approximately constant rate, otherwise there will be constant
|
which come at an approximately constant rate, otherwise there will be constant
|
||||||
adjustment of the polling interval.
|
adjustment of the polling interval.
|
||||||
|
|
||||||
[0] total block time: the time between when the halt polling function is
|
[0] total block time:
|
||||||
|
the time between when the halt polling function is
|
||||||
invoked and a wakeup source received (irrespective of
|
invoked and a wakeup source received (irrespective of
|
||||||
whether the scheduler is invoked within that function).
|
whether the scheduler is invoked within that function).
|
||||||
|
|
||||||
@ -81,31 +85,32 @@ shrunk. These variables are defined in include/linux/kvm_host.h and as module
|
|||||||
parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
|
parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
|
||||||
powerpc kvm-hv case.
|
powerpc kvm-hv case.
|
||||||
|
|
||||||
Module Parameter | Description | Default Value
|
+-----------------------+---------------------------+-------------------------+
|
||||||
--------------------------------------------------------------------------------
|
|Module Parameter | Description | Default Value |
|
||||||
halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT
|
+-----------------------+---------------------------+-------------------------+
|
||||||
| interval which defines |
|
|halt_poll_ns | The global max polling | KVM_HALT_POLL_NS_DEFAULT|
|
||||||
| the ceiling value of the |
|
| | interval which defines | |
|
||||||
| polling interval for | (per arch value)
|
| | the ceiling value of the | |
|
||||||
| each vcpu. |
|
| | polling interval for | (per arch value) |
|
||||||
--------------------------------------------------------------------------------
|
| | each vcpu. | |
|
||||||
halt_poll_ns_grow | The value by which the | 2
|
+-----------------------+---------------------------+-------------------------+
|
||||||
| halt polling interval is |
|
|halt_poll_ns_grow | The value by which the | 2 |
|
||||||
| multiplied in the |
|
| | halt polling interval is | |
|
||||||
| grow_halt_poll_ns() |
|
| | multiplied in the | |
|
||||||
| function. |
|
| | grow_halt_poll_ns() | |
|
||||||
--------------------------------------------------------------------------------
|
| | function. | |
|
||||||
halt_poll_ns_grow_start | The initial value to grow | 10000
|
+-----------------------+---------------------------+-------------------------+
|
||||||
| to from zero in the |
|
|halt_poll_ns_grow_start| The initial value to grow | 10000 |
|
||||||
| grow_halt_poll_ns() |
|
| | to from zero in the | |
|
||||||
| function. |
|
| | grow_halt_poll_ns() | |
|
||||||
--------------------------------------------------------------------------------
|
| | function. | |
|
||||||
halt_poll_ns_shrink | The value by which the | 0
|
+-----------------------+---------------------------+-------------------------+
|
||||||
| halt polling interval is |
|
|halt_poll_ns_shrink | The value by which the | 0 |
|
||||||
| divided in the |
|
| | halt polling interval is | |
|
||||||
| shrink_halt_poll_ns() |
|
| | divided in the | |
|
||||||
| function. |
|
| | shrink_halt_poll_ns() | |
|
||||||
--------------------------------------------------------------------------------
|
| | function. | |
|
||||||
|
+-----------------------+---------------------------+-------------------------+
|
||||||
|
|
||||||
These module parameters can be set from the debugfs files in:
|
These module parameters can be set from the debugfs files in:
|
||||||
|
|
||||||
@ -117,20 +122,19 @@ Note: that these module parameters are system wide values and are not able to
|
|||||||
Further Notes
|
Further Notes
|
||||||
=============
|
=============
|
||||||
|
|
||||||
- Care should be taken when setting the halt_poll_ns module parameter as a
|
- Care should be taken when setting the halt_poll_ns module parameter as a large value
|
||||||
large value has the potential to drive the cpu usage to 100% on a machine which
|
has the potential to drive the cpu usage to 100% on a machine which would be almost
|
||||||
would be almost entirely idle otherwise. This is because even if a guest has
|
entirely idle otherwise. This is because even if a guest has wakeups during which very
|
||||||
wakeups during which very little work is done and which are quite far apart, if
|
little work is done and which are quite far apart, if the period is shorter than the
|
||||||
the period is shorter than the global max polling interval (halt_poll_ns) then
|
global max polling interval (halt_poll_ns) then the host will always poll for the
|
||||||
the host will always poll for the entire block time and thus cpu utilisation
|
entire block time and thus cpu utilisation will go to 100%.
|
||||||
will go to 100%.
|
|
||||||
|
|
||||||
- Halt polling essentially presents a trade off between power usage and latency
|
- Halt polling essentially presents a trade off between power usage and latency and
|
||||||
and the module parameters should be used to tune the affinity for this. Idle
|
the module parameters should be used to tune the affinity for this. Idle cpu time is
|
||||||
cpu time is essentially converted to host kernel time with the aim of decreasing
|
essentially converted to host kernel time with the aim of decreasing latency when
|
||||||
latency when entering the guest.
|
entering the guest.
|
||||||
|
|
||||||
- Halt polling will only be conducted by the host when no other tasks are
|
- Halt polling will only be conducted by the host when no other tasks are runnable on
|
||||||
runnable on that cpu, otherwise the polling will cease immediately and
|
that cpu, otherwise the polling will cease immediately and schedule will be invoked to
|
||||||
schedule will be invoked to allow that other task to run. Thus this doesn't
|
allow that other task to run. Thus this doesn't allow a guest to denial of service the
|
||||||
allow a guest to denial of service the cpu.
|
cpu.
|
@ -1,5 +1,9 @@
|
|||||||
Linux KVM Hypercall:
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
===================
|
===================
|
||||||
|
Linux KVM Hypercall
|
||||||
|
===================
|
||||||
|
|
||||||
X86:
|
X86:
|
||||||
KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall
|
KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall
|
||||||
instruction. The hypervisor can replace it with instructions that are
|
instruction. The hypervisor can replace it with instructions that are
|
||||||
@ -20,7 +24,7 @@ S390:
|
|||||||
For further information on the S390 diagnose call as supported by KVM,
|
For further information on the S390 diagnose call as supported by KVM,
|
||||||
refer to Documentation/virt/kvm/s390-diag.txt.
|
refer to Documentation/virt/kvm/s390-diag.txt.
|
||||||
|
|
||||||
PowerPC:
|
PowerPC:
|
||||||
It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
|
It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
|
||||||
Return value is placed in R3.
|
Return value is placed in R3.
|
||||||
|
|
||||||
@ -34,7 +38,8 @@ MIPS:
|
|||||||
the return value is placed in $2 (v0).
|
the return value is placed in $2 (v0).
|
||||||
|
|
||||||
KVM Hypercalls Documentation
|
KVM Hypercalls Documentation
|
||||||
===========================
|
============================
|
||||||
|
|
||||||
The template for each hypercall is:
|
The template for each hypercall is:
|
||||||
1. Hypercall name.
|
1. Hypercall name.
|
||||||
2. Architecture(s)
|
2. Architecture(s)
|
||||||
@ -43,56 +48,64 @@ The template for each hypercall is:
|
|||||||
|
|
||||||
1. KVM_HC_VAPIC_POLL_IRQ
|
1. KVM_HC_VAPIC_POLL_IRQ
|
||||||
------------------------
|
------------------------
|
||||||
Architecture: x86
|
|
||||||
Status: active
|
:Architecture: x86
|
||||||
Purpose: Trigger guest exit so that the host can check for pending
|
:Status: active
|
||||||
interrupts on reentry.
|
:Purpose: Trigger guest exit so that the host can check for pending
|
||||||
|
interrupts on reentry.
|
||||||
|
|
||||||
2. KVM_HC_MMU_OP
|
2. KVM_HC_MMU_OP
|
||||||
------------------------
|
----------------
|
||||||
Architecture: x86
|
|
||||||
Status: deprecated.
|
:Architecture: x86
|
||||||
Purpose: Support MMU operations such as writing to PTE,
|
:Status: deprecated.
|
||||||
flushing TLB, release PT.
|
:Purpose: Support MMU operations such as writing to PTE,
|
||||||
|
flushing TLB, release PT.
|
||||||
|
|
||||||
3. KVM_HC_FEATURES
|
3. KVM_HC_FEATURES
|
||||||
------------------------
|
------------------
|
||||||
Architecture: PPC
|
|
||||||
Status: active
|
:Architecture: PPC
|
||||||
Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
|
:Status: active
|
||||||
used to enumerate which hypercalls are available. On PPC, either device tree
|
:Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
|
||||||
based lookup ( which is also what EPAPR dictates) OR KVM specific enumeration
|
used to enumerate which hypercalls are available. On PPC, either
|
||||||
mechanism (which is this hypercall) can be used.
|
device tree based lookup ( which is also what EPAPR dictates)
|
||||||
|
OR KVM specific enumeration mechanism (which is this hypercall)
|
||||||
|
can be used.
|
||||||
|
|
||||||
4. KVM_HC_PPC_MAP_MAGIC_PAGE
|
4. KVM_HC_PPC_MAP_MAGIC_PAGE
|
||||||
------------------------
|
----------------------------
|
||||||
Architecture: PPC
|
|
||||||
Status: active
|
:Architecture: PPC
|
||||||
Purpose: To enable communication between the hypervisor and guest there is a
|
:Status: active
|
||||||
shared page that contains parts of supervisor visible register state.
|
:Purpose: To enable communication between the hypervisor and guest there is a
|
||||||
The guest can map this shared page to access its supervisor register through
|
shared page that contains parts of supervisor visible register state.
|
||||||
memory using this hypercall.
|
The guest can map this shared page to access its supervisor register
|
||||||
|
through memory using this hypercall.
|
||||||
|
|
||||||
5. KVM_HC_KICK_CPU
|
5. KVM_HC_KICK_CPU
|
||||||
------------------------
|
------------------
|
||||||
Architecture: x86
|
|
||||||
Status: active
|
:Architecture: x86
|
||||||
Purpose: Hypercall used to wakeup a vcpu from HLT state
|
:Status: active
|
||||||
Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest
|
:Purpose: Hypercall used to wakeup a vcpu from HLT state
|
||||||
kernel mode for an event to occur (ex: a spinlock to become available) can
|
:Usage example:
|
||||||
execute HLT instruction once it has busy-waited for more than a threshold
|
A vcpu of a paravirtualized guest that is busywaiting in guest
|
||||||
time-interval. Execution of HLT instruction would cause the hypervisor to put
|
kernel mode for an event to occur (ex: a spinlock to become available) can
|
||||||
the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
|
execute HLT instruction once it has busy-waited for more than a threshold
|
||||||
same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
|
time-interval. Execution of HLT instruction would cause the hypervisor to put
|
||||||
specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
|
the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
|
||||||
is used in the hypercall for future use.
|
same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
|
||||||
|
specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
|
||||||
|
is used in the hypercall for future use.
|
||||||
|
|
||||||
|
|
||||||
6. KVM_HC_CLOCK_PAIRING
|
6. KVM_HC_CLOCK_PAIRING
|
||||||
------------------------
|
-----------------------
|
||||||
Architecture: x86
|
:Architecture: x86
|
||||||
Status: active
|
:Status: active
|
||||||
Purpose: Hypercall used to synchronize host and guest clocks.
|
:Purpose: Hypercall used to synchronize host and guest clocks.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
|
|
||||||
a0: guest physical address where host copies
|
a0: guest physical address where host copies
|
||||||
@ -101,6 +114,8 @@ a0: guest physical address where host copies
|
|||||||
a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0)
|
a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0)
|
||||||
is supported (corresponding to the host's CLOCK_REALTIME clock).
|
is supported (corresponding to the host's CLOCK_REALTIME clock).
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
struct kvm_clock_pairing {
|
struct kvm_clock_pairing {
|
||||||
__s64 sec;
|
__s64 sec;
|
||||||
__s64 nsec;
|
__s64 nsec;
|
||||||
@ -123,15 +138,16 @@ Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
|
|||||||
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
|
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
|
||||||
|
|
||||||
6. KVM_HC_SEND_IPI
|
6. KVM_HC_SEND_IPI
|
||||||
------------------------
|
------------------
|
||||||
Architecture: x86
|
|
||||||
Status: active
|
|
||||||
Purpose: Send IPIs to multiple vCPUs.
|
|
||||||
|
|
||||||
a0: lower part of the bitmap of destination APIC IDs
|
:Architecture: x86
|
||||||
a1: higher part of the bitmap of destination APIC IDs
|
:Status: active
|
||||||
a2: the lowest APIC ID in bitmap
|
:Purpose: Send IPIs to multiple vCPUs.
|
||||||
a3: APIC ICR
|
|
||||||
|
- a0: lower part of the bitmap of destination APIC IDs
|
||||||
|
- a1: higher part of the bitmap of destination APIC IDs
|
||||||
|
- a2: the lowest APIC ID in bitmap
|
||||||
|
- a3: APIC ICR
|
||||||
|
|
||||||
The hypercall lets a guest send multicast IPIs, with at most 128
|
The hypercall lets a guest send multicast IPIs, with at most 128
|
||||||
128 destinations per hypercall in 64-bit mode and 64 vCPUs per
|
128 destinations per hypercall in 64-bit mode and 64 vCPUs per
|
||||||
@ -143,12 +159,13 @@ corresponds to the APIC ID a2+1, and so on.
|
|||||||
Returns the number of CPUs to which the IPIs were delivered successfully.
|
Returns the number of CPUs to which the IPIs were delivered successfully.
|
||||||
|
|
||||||
7. KVM_HC_SCHED_YIELD
|
7. KVM_HC_SCHED_YIELD
|
||||||
------------------------
|
---------------------
|
||||||
Architecture: x86
|
|
||||||
Status: active
|
:Architecture: x86
|
||||||
Purpose: Hypercall used to yield if the IPI target vCPU is preempted
|
:Status: active
|
||||||
|
:Purpose: Hypercall used to yield if the IPI target vCPU is preempted
|
||||||
|
|
||||||
a0: destination APIC ID
|
a0: destination APIC ID
|
||||||
|
|
||||||
Usage example: When sending a call-function IPI-many to vCPUs, yield if
|
:Usage example: When sending a call-function IPI-many to vCPUs, yield if
|
||||||
any of the IPI target vCPUs was preempted.
|
any of the IPI target vCPUs was preempted.
|
@ -7,6 +7,22 @@ KVM
|
|||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
|
api
|
||||||
amd-memory-encryption
|
amd-memory-encryption
|
||||||
cpuid
|
cpuid
|
||||||
|
halt-polling
|
||||||
|
hypercalls
|
||||||
|
locking
|
||||||
|
mmu
|
||||||
|
msr
|
||||||
|
nested-vmx
|
||||||
|
ppc-pv
|
||||||
|
s390-diag
|
||||||
|
timekeeping
|
||||||
vcpu-requests
|
vcpu-requests
|
||||||
|
|
||||||
|
review-checklist
|
||||||
|
|
||||||
|
arm/index
|
||||||
|
|
||||||
|
devices/index
|
||||||
|
243
Documentation/virt/kvm/locking.rst
Normal file
243
Documentation/virt/kvm/locking.rst
Normal file
@ -0,0 +1,243 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
=================
|
||||||
|
KVM Lock Overview
|
||||||
|
=================
|
||||||
|
|
||||||
|
1. Acquisition Orders
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
The acquisition orders for mutexes are as follows:
|
||||||
|
|
||||||
|
- kvm->lock is taken outside vcpu->mutex
|
||||||
|
|
||||||
|
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
|
||||||
|
|
||||||
|
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
||||||
|
them together is quite rare.
|
||||||
|
|
||||||
|
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
|
||||||
|
|
||||||
|
Everything else is a leaf: no other lock is taken inside the critical
|
||||||
|
sections.
|
||||||
|
|
||||||
|
2. Exception
|
||||||
|
------------
|
||||||
|
|
||||||
|
Fast page fault:
|
||||||
|
|
||||||
|
Fast page fault is the fast path which fixes the guest page fault out of
|
||||||
|
the mmu-lock on x86. Currently, the page fault can be fast in one of the
|
||||||
|
following two cases:
|
||||||
|
|
||||||
|
1. Access Tracking: The SPTE is not present, but it is marked for access
|
||||||
|
tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
|
||||||
|
restore the saved R/X bits. This is described in more detail later below.
|
||||||
|
|
||||||
|
2. Write-Protection: The SPTE is present and the fault is
|
||||||
|
caused by write-protect. That means we just need to change the W bit of
|
||||||
|
the spte.
|
||||||
|
|
||||||
|
What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
|
||||||
|
SPTE_MMU_WRITEABLE bit on the spte:
|
||||||
|
|
||||||
|
- SPTE_HOST_WRITEABLE means the gfn is writable on host.
|
||||||
|
- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
|
||||||
|
the gfn is writable on guest mmu and it is not write-protected by shadow
|
||||||
|
page write-protection.
|
||||||
|
|
||||||
|
On fast page fault path, we will use cmpxchg to atomically set the spte W
|
||||||
|
bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
|
||||||
|
restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
|
||||||
|
is safe because whenever changing these bits can be detected by cmpxchg.
|
||||||
|
|
||||||
|
But we need carefully check these cases:
|
||||||
|
|
||||||
|
1) The mapping from gfn to pfn
|
||||||
|
|
||||||
|
The mapping from gfn to pfn may be changed since we can only ensure the pfn
|
||||||
|
is not changed during cmpxchg. This is a ABA problem, for example, below case
|
||||||
|
will happen:
|
||||||
|
|
||||||
|
+------------------------------------------------------------------------+
|
||||||
|
| At the beginning:: |
|
||||||
|
| |
|
||||||
|
| gpte = gfn1 |
|
||||||
|
| gfn1 is mapped to pfn1 on host |
|
||||||
|
| spte is the shadow page table entry corresponding with gpte and |
|
||||||
|
| spte = pfn1 |
|
||||||
|
+------------------------------------------------------------------------+
|
||||||
|
| On fast page fault path: |
|
||||||
|
+------------------------------------+-----------------------------------+
|
||||||
|
| CPU 0: | CPU 1: |
|
||||||
|
+------------------------------------+-----------------------------------+
|
||||||
|
| :: | |
|
||||||
|
| | |
|
||||||
|
| old_spte = *spte; | |
|
||||||
|
+------------------------------------+-----------------------------------+
|
||||||
|
| | pfn1 is swapped out:: |
|
||||||
|
| | |
|
||||||
|
| | spte = 0; |
|
||||||
|
| | |
|
||||||
|
| | pfn1 is re-alloced for gfn2. |
|
||||||
|
| | |
|
||||||
|
| | gpte is changed to point to |
|
||||||
|
| | gfn2 by the guest:: |
|
||||||
|
| | |
|
||||||
|
| | spte = pfn1; |
|
||||||
|
+------------------------------------+-----------------------------------+
|
||||||
|
| :: |
|
||||||
|
| |
|
||||||
|
| if (cmpxchg(spte, old_spte, old_spte+W) |
|
||||||
|
| mark_page_dirty(vcpu->kvm, gfn1) |
|
||||||
|
| OOPS!!! |
|
||||||
|
+------------------------------------------------------------------------+
|
||||||
|
|
||||||
|
We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
|
||||||
|
|
||||||
|
For direct sp, we can easily avoid it since the spte of direct sp is fixed
|
||||||
|
to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
|
||||||
|
to pin gfn to pfn, because after gfn_to_pfn_atomic():
|
||||||
|
|
||||||
|
- We have held the refcount of pfn that means the pfn can not be freed and
|
||||||
|
be reused for another gfn.
|
||||||
|
- The pfn is writable that means it can not be shared between different gfns
|
||||||
|
by KSM.
|
||||||
|
|
||||||
|
Then, we can ensure the dirty bitmaps is correctly set for a gfn.
|
||||||
|
|
||||||
|
Currently, to simplify the whole things, we disable fast page fault for
|
||||||
|
indirect shadow page.
|
||||||
|
|
||||||
|
2) Dirty bit tracking
|
||||||
|
|
||||||
|
In the origin code, the spte can be fast updated (non-atomically) if the
|
||||||
|
spte is read-only and the Accessed bit has already been set since the
|
||||||
|
Accessed bit and Dirty bit can not be lost.
|
||||||
|
|
||||||
|
But it is not true after fast page fault since the spte can be marked
|
||||||
|
writable between reading spte and updating spte. Like below case:
|
||||||
|
|
||||||
|
+------------------------------------------------------------------------+
|
||||||
|
| At the beginning:: |
|
||||||
|
| |
|
||||||
|
| spte.W = 0 |
|
||||||
|
| spte.Accessed = 1 |
|
||||||
|
+------------------------------------+-----------------------------------+
|
||||||
|
| CPU 0: | CPU 1: |
|
||||||
|
+------------------------------------+-----------------------------------+
|
||||||
|
| In mmu_spte_clear_track_bits():: | |
|
||||||
|
| | |
|
||||||
|
| old_spte = *spte; | |
|
||||||
|
| | |
|
||||||
|
| | |
|
||||||
|
| /* 'if' condition is satisfied. */| |
|
||||||
|
| if (old_spte.Accessed == 1 && | |
|
||||||
|
| old_spte.W == 0) | |
|
||||||
|
| spte = 0ull; | |
|
||||||
|
+------------------------------------+-----------------------------------+
|
||||||
|
| | on fast page fault path:: |
|
||||||
|
| | |
|
||||||
|
| | spte.W = 1 |
|
||||||
|
| | |
|
||||||
|
| | memory write on the spte:: |
|
||||||
|
| | |
|
||||||
|
| | spte.Dirty = 1 |
|
||||||
|
+------------------------------------+-----------------------------------+
|
||||||
|
| :: | |
|
||||||
|
| | |
|
||||||
|
| else | |
|
||||||
|
| old_spte = xchg(spte, 0ull) | |
|
||||||
|
| if (old_spte.Accessed == 1) | |
|
||||||
|
| kvm_set_pfn_accessed(spte.pfn);| |
|
||||||
|
| if (old_spte.Dirty == 1) | |
|
||||||
|
| kvm_set_pfn_dirty(spte.pfn); | |
|
||||||
|
| OOPS!!! | |
|
||||||
|
+------------------------------------+-----------------------------------+
|
||||||
|
|
||||||
|
The Dirty bit is lost in this case.
|
||||||
|
|
||||||
|
In order to avoid this kind of issue, we always treat the spte as "volatile"
|
||||||
|
if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
|
||||||
|
the spte is always atomically updated in this case.
|
||||||
|
|
||||||
|
3) flush tlbs due to spte updated
|
||||||
|
|
||||||
|
If the spte is updated from writable to readonly, we should flush all TLBs,
|
||||||
|
otherwise rmap_write_protect will find a read-only spte, even though the
|
||||||
|
writable spte might be cached on a CPU's TLB.
|
||||||
|
|
||||||
|
As mentioned before, the spte can be updated to writable out of mmu-lock on
|
||||||
|
fast page fault path, in order to easily audit the path, we see if TLBs need
|
||||||
|
be flushed caused by this reason in mmu_spte_update() since this is a common
|
||||||
|
function to update spte (present -> present).
|
||||||
|
|
||||||
|
Since the spte is "volatile" if it can be updated out of mmu-lock, we always
|
||||||
|
atomically update the spte, the race caused by fast page fault can be avoided,
|
||||||
|
See the comments in spte_has_volatile_bits() and mmu_spte_update().
|
||||||
|
|
||||||
|
Lockless Access Tracking:
|
||||||
|
|
||||||
|
This is used for Intel CPUs that are using EPT but do not support the EPT A/D
|
||||||
|
bits. In this case, when the KVM MMU notifier is called to track accesses to a
|
||||||
|
page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
|
||||||
|
by clearing the RWX bits in the PTE and storing the original R & X bits in
|
||||||
|
some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
|
||||||
|
PTE (using the ignored bit 62). When the VM tries to access the page later on,
|
||||||
|
a fault is generated and the fast page fault mechanism described above is used
|
||||||
|
to atomically restore the PTE to a Present state. The W bit is not saved when
|
||||||
|
the PTE is marked for access tracking and during restoration to the Present
|
||||||
|
state, the W bit is set depending on whether or not it was a write access. If
|
||||||
|
it wasn't, then the W bit will remain clear until a write access happens, at
|
||||||
|
which time it will be set using the Dirty tracking mechanism described above.
|
||||||
|
|
||||||
|
3. Reference
|
||||||
|
------------
|
||||||
|
|
||||||
|
:Name: kvm_lock
|
||||||
|
:Type: mutex
|
||||||
|
:Arch: any
|
||||||
|
:Protects: - vm_list
|
||||||
|
|
||||||
|
:Name: kvm_count_lock
|
||||||
|
:Type: raw_spinlock_t
|
||||||
|
:Arch: any
|
||||||
|
:Protects: - hardware virtualization enable/disable
|
||||||
|
:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
|
||||||
|
migration.
|
||||||
|
|
||||||
|
:Name: kvm_arch::tsc_write_lock
|
||||||
|
:Type: raw_spinlock
|
||||||
|
:Arch: x86
|
||||||
|
:Protects: - kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
|
||||||
|
- tsc offset in vmcb
|
||||||
|
:Comment: 'raw' because updating the tsc offsets must not be preempted.
|
||||||
|
|
||||||
|
:Name: kvm->mmu_lock
|
||||||
|
:Type: spinlock_t
|
||||||
|
:Arch: any
|
||||||
|
:Protects: -shadow page/shadow tlb entry
|
||||||
|
:Comment: it is a spinlock since it is used in mmu notifier.
|
||||||
|
|
||||||
|
:Name: kvm->srcu
|
||||||
|
:Type: srcu lock
|
||||||
|
:Arch: any
|
||||||
|
:Protects: - kvm->memslots
|
||||||
|
- kvm->buses
|
||||||
|
:Comment: The srcu read lock must be held while accessing memslots (e.g.
|
||||||
|
when using gfn_to_* functions) and while accessing in-kernel
|
||||||
|
MMIO/PIO address->device structure mapping (kvm->buses).
|
||||||
|
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
|
||||||
|
if it is needed by multiple functions.
|
||||||
|
|
||||||
|
:Name: blocked_vcpu_on_cpu_lock
|
||||||
|
:Type: spinlock_t
|
||||||
|
:Arch: x86
|
||||||
|
:Protects: blocked_vcpu_on_cpu
|
||||||
|
:Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
|
||||||
|
When VT-d posted-interrupts is supported and the VM has assigned
|
||||||
|
devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
|
||||||
|
protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
|
||||||
|
wakeup notification event since external interrupts from the
|
||||||
|
assigned devices happens, we will find the vCPU on the list to
|
||||||
|
wakeup.
|
@ -1,215 +0,0 @@
|
|||||||
KVM Lock Overview
|
|
||||||
=================
|
|
||||||
|
|
||||||
1. Acquisition Orders
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
The acquisition orders for mutexes are as follows:
|
|
||||||
|
|
||||||
- kvm->lock is taken outside vcpu->mutex
|
|
||||||
|
|
||||||
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
|
|
||||||
|
|
||||||
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
|
||||||
them together is quite rare.
|
|
||||||
|
|
||||||
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
|
|
||||||
|
|
||||||
Everything else is a leaf: no other lock is taken inside the critical
|
|
||||||
sections.
|
|
||||||
|
|
||||||
2: Exception
|
|
||||||
------------
|
|
||||||
|
|
||||||
Fast page fault:
|
|
||||||
|
|
||||||
Fast page fault is the fast path which fixes the guest page fault out of
|
|
||||||
the mmu-lock on x86. Currently, the page fault can be fast in one of the
|
|
||||||
following two cases:
|
|
||||||
|
|
||||||
1. Access Tracking: The SPTE is not present, but it is marked for access
|
|
||||||
tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
|
|
||||||
restore the saved R/X bits. This is described in more detail later below.
|
|
||||||
|
|
||||||
2. Write-Protection: The SPTE is present and the fault is
|
|
||||||
caused by write-protect. That means we just need to change the W bit of the
|
|
||||||
spte.
|
|
||||||
|
|
||||||
What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
|
|
||||||
SPTE_MMU_WRITEABLE bit on the spte:
|
|
||||||
- SPTE_HOST_WRITEABLE means the gfn is writable on host.
|
|
||||||
- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
|
|
||||||
the gfn is writable on guest mmu and it is not write-protected by shadow
|
|
||||||
page write-protection.
|
|
||||||
|
|
||||||
On fast page fault path, we will use cmpxchg to atomically set the spte W
|
|
||||||
bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
|
|
||||||
restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
|
|
||||||
is safe because whenever changing these bits can be detected by cmpxchg.
|
|
||||||
|
|
||||||
But we need carefully check these cases:
|
|
||||||
1): The mapping from gfn to pfn
|
|
||||||
The mapping from gfn to pfn may be changed since we can only ensure the pfn
|
|
||||||
is not changed during cmpxchg. This is a ABA problem, for example, below case
|
|
||||||
will happen:
|
|
||||||
|
|
||||||
At the beginning:
|
|
||||||
gpte = gfn1
|
|
||||||
gfn1 is mapped to pfn1 on host
|
|
||||||
spte is the shadow page table entry corresponding with gpte and
|
|
||||||
spte = pfn1
|
|
||||||
|
|
||||||
VCPU 0 VCPU0
|
|
||||||
on fast page fault path:
|
|
||||||
|
|
||||||
old_spte = *spte;
|
|
||||||
pfn1 is swapped out:
|
|
||||||
spte = 0;
|
|
||||||
|
|
||||||
pfn1 is re-alloced for gfn2.
|
|
||||||
|
|
||||||
gpte is changed to point to
|
|
||||||
gfn2 by the guest:
|
|
||||||
spte = pfn1;
|
|
||||||
|
|
||||||
if (cmpxchg(spte, old_spte, old_spte+W)
|
|
||||||
mark_page_dirty(vcpu->kvm, gfn1)
|
|
||||||
OOPS!!!
|
|
||||||
|
|
||||||
We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
|
|
||||||
|
|
||||||
For direct sp, we can easily avoid it since the spte of direct sp is fixed
|
|
||||||
to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
|
|
||||||
to pin gfn to pfn, because after gfn_to_pfn_atomic():
|
|
||||||
- We have held the refcount of pfn that means the pfn can not be freed and
|
|
||||||
be reused for another gfn.
|
|
||||||
- The pfn is writable that means it can not be shared between different gfns
|
|
||||||
by KSM.
|
|
||||||
|
|
||||||
Then, we can ensure the dirty bitmaps is correctly set for a gfn.
|
|
||||||
|
|
||||||
Currently, to simplify the whole things, we disable fast page fault for
|
|
||||||
indirect shadow page.
|
|
||||||
|
|
||||||
2): Dirty bit tracking
|
|
||||||
In the origin code, the spte can be fast updated (non-atomically) if the
|
|
||||||
spte is read-only and the Accessed bit has already been set since the
|
|
||||||
Accessed bit and Dirty bit can not be lost.
|
|
||||||
|
|
||||||
But it is not true after fast page fault since the spte can be marked
|
|
||||||
writable between reading spte and updating spte. Like below case:
|
|
||||||
|
|
||||||
At the beginning:
|
|
||||||
spte.W = 0
|
|
||||||
spte.Accessed = 1
|
|
||||||
|
|
||||||
VCPU 0 VCPU0
|
|
||||||
In mmu_spte_clear_track_bits():
|
|
||||||
|
|
||||||
old_spte = *spte;
|
|
||||||
|
|
||||||
/* 'if' condition is satisfied. */
|
|
||||||
if (old_spte.Accessed == 1 &&
|
|
||||||
old_spte.W == 0)
|
|
||||||
spte = 0ull;
|
|
||||||
on fast page fault path:
|
|
||||||
spte.W = 1
|
|
||||||
memory write on the spte:
|
|
||||||
spte.Dirty = 1
|
|
||||||
|
|
||||||
|
|
||||||
else
|
|
||||||
old_spte = xchg(spte, 0ull)
|
|
||||||
|
|
||||||
|
|
||||||
if (old_spte.Accessed == 1)
|
|
||||||
kvm_set_pfn_accessed(spte.pfn);
|
|
||||||
if (old_spte.Dirty == 1)
|
|
||||||
kvm_set_pfn_dirty(spte.pfn);
|
|
||||||
OOPS!!!
|
|
||||||
|
|
||||||
The Dirty bit is lost in this case.
|
|
||||||
|
|
||||||
In order to avoid this kind of issue, we always treat the spte as "volatile"
|
|
||||||
if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
|
|
||||||
the spte is always atomically updated in this case.
|
|
||||||
|
|
||||||
3): flush tlbs due to spte updated
|
|
||||||
If the spte is updated from writable to readonly, we should flush all TLBs,
|
|
||||||
otherwise rmap_write_protect will find a read-only spte, even though the
|
|
||||||
writable spte might be cached on a CPU's TLB.
|
|
||||||
|
|
||||||
As mentioned before, the spte can be updated to writable out of mmu-lock on
|
|
||||||
fast page fault path, in order to easily audit the path, we see if TLBs need
|
|
||||||
be flushed caused by this reason in mmu_spte_update() since this is a common
|
|
||||||
function to update spte (present -> present).
|
|
||||||
|
|
||||||
Since the spte is "volatile" if it can be updated out of mmu-lock, we always
|
|
||||||
atomically update the spte, the race caused by fast page fault can be avoided,
|
|
||||||
See the comments in spte_has_volatile_bits() and mmu_spte_update().
|
|
||||||
|
|
||||||
Lockless Access Tracking:
|
|
||||||
|
|
||||||
This is used for Intel CPUs that are using EPT but do not support the EPT A/D
|
|
||||||
bits. In this case, when the KVM MMU notifier is called to track accesses to a
|
|
||||||
page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
|
|
||||||
by clearing the RWX bits in the PTE and storing the original R & X bits in
|
|
||||||
some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
|
|
||||||
PTE (using the ignored bit 62). When the VM tries to access the page later on,
|
|
||||||
a fault is generated and the fast page fault mechanism described above is used
|
|
||||||
to atomically restore the PTE to a Present state. The W bit is not saved when
|
|
||||||
the PTE is marked for access tracking and during restoration to the Present
|
|
||||||
state, the W bit is set depending on whether or not it was a write access. If
|
|
||||||
it wasn't, then the W bit will remain clear until a write access happens, at
|
|
||||||
which time it will be set using the Dirty tracking mechanism described above.
|
|
||||||
|
|
||||||
3. Reference
|
|
||||||
------------
|
|
||||||
|
|
||||||
Name: kvm_lock
|
|
||||||
Type: mutex
|
|
||||||
Arch: any
|
|
||||||
Protects: - vm_list
|
|
||||||
|
|
||||||
Name: kvm_count_lock
|
|
||||||
Type: raw_spinlock_t
|
|
||||||
Arch: any
|
|
||||||
Protects: - hardware virtualization enable/disable
|
|
||||||
Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
|
|
||||||
migration.
|
|
||||||
|
|
||||||
Name: kvm_arch::tsc_write_lock
|
|
||||||
Type: raw_spinlock
|
|
||||||
Arch: x86
|
|
||||||
Protects: - kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
|
|
||||||
- tsc offset in vmcb
|
|
||||||
Comment: 'raw' because updating the tsc offsets must not be preempted.
|
|
||||||
|
|
||||||
Name: kvm->mmu_lock
|
|
||||||
Type: spinlock_t
|
|
||||||
Arch: any
|
|
||||||
Protects: -shadow page/shadow tlb entry
|
|
||||||
Comment: it is a spinlock since it is used in mmu notifier.
|
|
||||||
|
|
||||||
Name: kvm->srcu
|
|
||||||
Type: srcu lock
|
|
||||||
Arch: any
|
|
||||||
Protects: - kvm->memslots
|
|
||||||
- kvm->buses
|
|
||||||
Comment: The srcu read lock must be held while accessing memslots (e.g.
|
|
||||||
when using gfn_to_* functions) and while accessing in-kernel
|
|
||||||
MMIO/PIO address->device structure mapping (kvm->buses).
|
|
||||||
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
|
|
||||||
if it is needed by multiple functions.
|
|
||||||
|
|
||||||
Name: blocked_vcpu_on_cpu_lock
|
|
||||||
Type: spinlock_t
|
|
||||||
Arch: x86
|
|
||||||
Protects: blocked_vcpu_on_cpu
|
|
||||||
Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
|
|
||||||
When VT-d posted-interrupts is supported and the VM has assigned
|
|
||||||
devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
|
|
||||||
protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
|
|
||||||
wakeup notification event since external interrupts from the
|
|
||||||
assigned devices happens, we will find the vCPU on the list to
|
|
||||||
wakeup.
|
|
@ -1,3 +1,6 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
======================
|
||||||
The x86 kvm shadow mmu
|
The x86 kvm shadow mmu
|
||||||
======================
|
======================
|
||||||
|
|
||||||
@ -7,27 +10,37 @@ physical addresses to host physical addresses.
|
|||||||
|
|
||||||
The mmu code attempts to satisfy the following requirements:
|
The mmu code attempts to satisfy the following requirements:
|
||||||
|
|
||||||
- correctness: the guest should not be able to determine that it is running
|
- correctness:
|
||||||
|
the guest should not be able to determine that it is running
|
||||||
on an emulated mmu except for timing (we attempt to comply
|
on an emulated mmu except for timing (we attempt to comply
|
||||||
with the specification, not emulate the characteristics of
|
with the specification, not emulate the characteristics of
|
||||||
a particular implementation such as tlb size)
|
a particular implementation such as tlb size)
|
||||||
- security: the guest must not be able to touch host memory not assigned
|
- security:
|
||||||
|
the guest must not be able to touch host memory not assigned
|
||||||
to it
|
to it
|
||||||
- performance: minimize the performance penalty imposed by the mmu
|
- performance:
|
||||||
- scaling: need to scale to large memory and large vcpu guests
|
minimize the performance penalty imposed by the mmu
|
||||||
- hardware: support the full range of x86 virtualization hardware
|
- scaling:
|
||||||
- integration: Linux memory management code must be in control of guest memory
|
need to scale to large memory and large vcpu guests
|
||||||
|
- hardware:
|
||||||
|
support the full range of x86 virtualization hardware
|
||||||
|
- integration:
|
||||||
|
Linux memory management code must be in control of guest memory
|
||||||
so that swapping, page migration, page merging, transparent
|
so that swapping, page migration, page merging, transparent
|
||||||
hugepages, and similar features work without change
|
hugepages, and similar features work without change
|
||||||
- dirty tracking: report writes to guest memory to enable live migration
|
- dirty tracking:
|
||||||
|
report writes to guest memory to enable live migration
|
||||||
and framebuffer-based displays
|
and framebuffer-based displays
|
||||||
- footprint: keep the amount of pinned kernel memory low (most memory
|
- footprint:
|
||||||
|
keep the amount of pinned kernel memory low (most memory
|
||||||
should be shrinkable)
|
should be shrinkable)
|
||||||
- reliability: avoid multipage or GFP_ATOMIC allocations
|
- reliability:
|
||||||
|
avoid multipage or GFP_ATOMIC allocations
|
||||||
|
|
||||||
Acronyms
|
Acronyms
|
||||||
========
|
========
|
||||||
|
|
||||||
|
==== ====================================================================
|
||||||
pfn host page frame number
|
pfn host page frame number
|
||||||
hpa host physical address
|
hpa host physical address
|
||||||
hva host virtual address
|
hva host virtual address
|
||||||
@ -41,6 +54,7 @@ pte page table entry (used also to refer generically to paging structure
|
|||||||
gpte guest pte (referring to gfns)
|
gpte guest pte (referring to gfns)
|
||||||
spte shadow pte (referring to pfns)
|
spte shadow pte (referring to pfns)
|
||||||
tdp two dimensional paging (vendor neutral term for NPT and EPT)
|
tdp two dimensional paging (vendor neutral term for NPT and EPT)
|
||||||
|
==== ====================================================================
|
||||||
|
|
||||||
Virtual and real hardware supported
|
Virtual and real hardware supported
|
||||||
===================================
|
===================================
|
||||||
@ -90,11 +104,13 @@ Events
|
|||||||
The mmu is driven by events, some from the guest, some from the host.
|
The mmu is driven by events, some from the guest, some from the host.
|
||||||
|
|
||||||
Guest generated events:
|
Guest generated events:
|
||||||
|
|
||||||
- writes to control registers (especially cr3)
|
- writes to control registers (especially cr3)
|
||||||
- invlpg/invlpga instruction execution
|
- invlpg/invlpga instruction execution
|
||||||
- access to missing or protected translations
|
- access to missing or protected translations
|
||||||
|
|
||||||
Host generated events:
|
Host generated events:
|
||||||
|
|
||||||
- changes in the gpa->hpa translation (either through gpa->hva changes or
|
- changes in the gpa->hpa translation (either through gpa->hva changes or
|
||||||
through hva->hpa changes)
|
through hva->hpa changes)
|
||||||
- memory pressure (the shrinker)
|
- memory pressure (the shrinker)
|
||||||
@ -117,16 +133,19 @@ Leaf ptes point at guest pages.
|
|||||||
The following table shows translations encoded by leaf ptes, with higher-level
|
The following table shows translations encoded by leaf ptes, with higher-level
|
||||||
translations in parentheses:
|
translations in parentheses:
|
||||||
|
|
||||||
Non-nested guests:
|
Non-nested guests::
|
||||||
|
|
||||||
nonpaging: gpa->hpa
|
nonpaging: gpa->hpa
|
||||||
paging: gva->gpa->hpa
|
paging: gva->gpa->hpa
|
||||||
paging, tdp: (gva->)gpa->hpa
|
paging, tdp: (gva->)gpa->hpa
|
||||||
Nested guests:
|
|
||||||
|
Nested guests::
|
||||||
|
|
||||||
non-tdp: ngva->gpa->hpa (*)
|
non-tdp: ngva->gpa->hpa (*)
|
||||||
tdp: (ngva->)ngpa->gpa->hpa
|
tdp: (ngva->)ngpa->gpa->hpa
|
||||||
|
|
||||||
(*) the guest hypervisor will encode the ngva->gpa translation into its page
|
(*) the guest hypervisor will encode the ngva->gpa translation into its page
|
||||||
tables if npt is not present
|
tables if npt is not present
|
||||||
|
|
||||||
Shadow pages contain the following information:
|
Shadow pages contain the following information:
|
||||||
role.level:
|
role.level:
|
||||||
@ -291,28 +310,41 @@ Handling a page fault is performed as follows:
|
|||||||
|
|
||||||
- if the RSV bit of the error code is set, the page fault is caused by guest
|
- if the RSV bit of the error code is set, the page fault is caused by guest
|
||||||
accessing MMIO and cached MMIO information is available.
|
accessing MMIO and cached MMIO information is available.
|
||||||
|
|
||||||
- walk shadow page table
|
- walk shadow page table
|
||||||
- check for valid generation number in the spte (see "Fast invalidation of
|
- check for valid generation number in the spte (see "Fast invalidation of
|
||||||
MMIO sptes" below)
|
MMIO sptes" below)
|
||||||
- cache the information to vcpu->arch.mmio_gva, vcpu->arch.mmio_access and
|
- cache the information to vcpu->arch.mmio_gva, vcpu->arch.mmio_access and
|
||||||
vcpu->arch.mmio_gfn, and call the emulator
|
vcpu->arch.mmio_gfn, and call the emulator
|
||||||
|
|
||||||
- If both P bit and R/W bit of error code are set, this could possibly
|
- If both P bit and R/W bit of error code are set, this could possibly
|
||||||
be handled as a "fast page fault" (fixed without taking the MMU lock). See
|
be handled as a "fast page fault" (fixed without taking the MMU lock). See
|
||||||
the description in Documentation/virt/kvm/locking.txt.
|
the description in Documentation/virt/kvm/locking.txt.
|
||||||
|
|
||||||
- if needed, walk the guest page tables to determine the guest translation
|
- if needed, walk the guest page tables to determine the guest translation
|
||||||
(gva->gpa or ngpa->gpa)
|
(gva->gpa or ngpa->gpa)
|
||||||
|
|
||||||
- if permissions are insufficient, reflect the fault back to the guest
|
- if permissions are insufficient, reflect the fault back to the guest
|
||||||
|
|
||||||
- determine the host page
|
- determine the host page
|
||||||
|
|
||||||
- if this is an mmio request, there is no host page; cache the info to
|
- if this is an mmio request, there is no host page; cache the info to
|
||||||
vcpu->arch.mmio_gva, vcpu->arch.mmio_access and vcpu->arch.mmio_gfn
|
vcpu->arch.mmio_gva, vcpu->arch.mmio_access and vcpu->arch.mmio_gfn
|
||||||
|
|
||||||
- walk the shadow page table to find the spte for the translation,
|
- walk the shadow page table to find the spte for the translation,
|
||||||
instantiating missing intermediate page tables as necessary
|
instantiating missing intermediate page tables as necessary
|
||||||
|
|
||||||
- If this is an mmio request, cache the mmio info to the spte and set some
|
- If this is an mmio request, cache the mmio info to the spte and set some
|
||||||
reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask)
|
reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask)
|
||||||
|
|
||||||
- try to unsynchronize the page
|
- try to unsynchronize the page
|
||||||
|
|
||||||
- if successful, we can let the guest continue and modify the gpte
|
- if successful, we can let the guest continue and modify the gpte
|
||||||
|
|
||||||
- emulate the instruction
|
- emulate the instruction
|
||||||
|
|
||||||
- if failed, unshadow the page and let the guest continue
|
- if failed, unshadow the page and let the guest continue
|
||||||
|
|
||||||
- update any translations that were modified by the instruction
|
- update any translations that were modified by the instruction
|
||||||
|
|
||||||
invlpg handling:
|
invlpg handling:
|
||||||
@ -324,10 +356,12 @@ invlpg handling:
|
|||||||
Guest control register updates:
|
Guest control register updates:
|
||||||
|
|
||||||
- mov to cr3
|
- mov to cr3
|
||||||
|
|
||||||
- look up new shadow roots
|
- look up new shadow roots
|
||||||
- synchronize newly reachable shadow pages
|
- synchronize newly reachable shadow pages
|
||||||
|
|
||||||
- mov to cr0/cr4/efer
|
- mov to cr0/cr4/efer
|
||||||
|
|
||||||
- set up mmu context for new paging mode
|
- set up mmu context for new paging mode
|
||||||
- look up new shadow roots
|
- look up new shadow roots
|
||||||
- synchronize newly reachable shadow pages
|
- synchronize newly reachable shadow pages
|
||||||
@ -358,6 +392,7 @@ on fault type:
|
|||||||
(user write faults generate a #PF)
|
(user write faults generate a #PF)
|
||||||
|
|
||||||
In the first case there are two additional complications:
|
In the first case there are two additional complications:
|
||||||
|
|
||||||
- if CR4.SMEP is enabled: since we've turned the page into a kernel page,
|
- if CR4.SMEP is enabled: since we've turned the page into a kernel page,
|
||||||
the kernel may now execute it. We handle this by also setting spte.nx.
|
the kernel may now execute it. We handle this by also setting spte.nx.
|
||||||
If we get a user fetch or read fault, we'll change spte.u=1 and
|
If we get a user fetch or read fault, we'll change spte.u=1 and
|
||||||
@ -446,4 +481,3 @@ Further reading
|
|||||||
|
|
||||||
- NPT presentation from KVM Forum 2008
|
- NPT presentation from KVM Forum 2008
|
||||||
http://www.linux-kvm.org/images/c/c8/KvmForum2008%24kdf2008_21.pdf
|
http://www.linux-kvm.org/images/c/c8/KvmForum2008%24kdf2008_21.pdf
|
||||||
|
|
@ -1,6 +1,10 @@
|
|||||||
KVM-specific MSRs.
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
|
|
||||||
=====================================================
|
=================
|
||||||
|
KVM-specific MSRs
|
||||||
|
=================
|
||||||
|
|
||||||
|
:Author: Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
|
||||||
|
|
||||||
KVM makes use of some custom MSRs to service some requests.
|
KVM makes use of some custom MSRs to service some requests.
|
||||||
|
|
||||||
@ -9,34 +13,39 @@ Custom MSRs have a range reserved for them, that goes from
|
|||||||
but they are deprecated and their use is discouraged.
|
but they are deprecated and their use is discouraged.
|
||||||
|
|
||||||
Custom MSR list
|
Custom MSR list
|
||||||
--------
|
---------------
|
||||||
|
|
||||||
The current supported Custom MSR list is:
|
The current supported Custom MSR list is:
|
||||||
|
|
||||||
MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
|
MSR_KVM_WALL_CLOCK_NEW:
|
||||||
|
0x4b564d00
|
||||||
|
|
||||||
data: 4-byte alignment physical address of a memory area which must be
|
data:
|
||||||
|
4-byte alignment physical address of a memory area which must be
|
||||||
in guest RAM. This memory is expected to hold a copy of the following
|
in guest RAM. This memory is expected to hold a copy of the following
|
||||||
structure:
|
structure::
|
||||||
|
|
||||||
struct pvclock_wall_clock {
|
struct pvclock_wall_clock {
|
||||||
u32 version;
|
u32 version;
|
||||||
u32 sec;
|
u32 sec;
|
||||||
u32 nsec;
|
u32 nsec;
|
||||||
} __attribute__((__packed__));
|
} __attribute__((__packed__));
|
||||||
|
|
||||||
whose data will be filled in by the hypervisor. The hypervisor is only
|
whose data will be filled in by the hypervisor. The hypervisor is only
|
||||||
guaranteed to update this data at the moment of MSR write.
|
guaranteed to update this data at the moment of MSR write.
|
||||||
Users that want to reliably query this information more than once have
|
Users that want to reliably query this information more than once have
|
||||||
to write more than once to this MSR. Fields have the following meanings:
|
to write more than once to this MSR. Fields have the following meanings:
|
||||||
|
|
||||||
version: guest has to check version before and after grabbing
|
version:
|
||||||
|
guest has to check version before and after grabbing
|
||||||
time information and check that they are both equal and even.
|
time information and check that they are both equal and even.
|
||||||
An odd version indicates an in-progress update.
|
An odd version indicates an in-progress update.
|
||||||
|
|
||||||
sec: number of seconds for wallclock at time of boot.
|
sec:
|
||||||
|
number of seconds for wallclock at time of boot.
|
||||||
|
|
||||||
nsec: number of nanoseconds for wallclock at time of boot.
|
nsec:
|
||||||
|
number of nanoseconds for wallclock at time of boot.
|
||||||
|
|
||||||
In order to get the current wallclock time, the system_time from
|
In order to get the current wallclock time, the system_time from
|
||||||
MSR_KVM_SYSTEM_TIME_NEW needs to be added.
|
MSR_KVM_SYSTEM_TIME_NEW needs to be added.
|
||||||
@ -47,13 +56,15 @@ MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
|
|||||||
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
|
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
|
||||||
leaf prior to usage.
|
leaf prior to usage.
|
||||||
|
|
||||||
MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
MSR_KVM_SYSTEM_TIME_NEW:
|
||||||
|
0x4b564d01
|
||||||
|
|
||||||
data: 4-byte aligned physical address of a memory area which must be in
|
data:
|
||||||
|
4-byte aligned physical address of a memory area which must be in
|
||||||
guest RAM, plus an enable bit in bit 0. This memory is expected to hold
|
guest RAM, plus an enable bit in bit 0. This memory is expected to hold
|
||||||
a copy of the following structure:
|
a copy of the following structure::
|
||||||
|
|
||||||
struct pvclock_vcpu_time_info {
|
struct pvclock_vcpu_time_info {
|
||||||
u32 version;
|
u32 version;
|
||||||
u32 pad0;
|
u32 pad0;
|
||||||
u64 tsc_timestamp;
|
u64 tsc_timestamp;
|
||||||
@ -62,7 +73,7 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
|||||||
s8 tsc_shift;
|
s8 tsc_shift;
|
||||||
u8 flags;
|
u8 flags;
|
||||||
u8 pad[2];
|
u8 pad[2];
|
||||||
} __attribute__((__packed__)); /* 32 bytes */
|
} __attribute__((__packed__)); /* 32 bytes */
|
||||||
|
|
||||||
whose data will be filled in by the hypervisor periodically. Only one
|
whose data will be filled in by the hypervisor periodically. Only one
|
||||||
write, or registration, is needed for each VCPU. The interval between
|
write, or registration, is needed for each VCPU. The interval between
|
||||||
@ -72,23 +83,28 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
|||||||
|
|
||||||
Fields have the following meanings:
|
Fields have the following meanings:
|
||||||
|
|
||||||
version: guest has to check version before and after grabbing
|
version:
|
||||||
|
guest has to check version before and after grabbing
|
||||||
time information and check that they are both equal and even.
|
time information and check that they are both equal and even.
|
||||||
An odd version indicates an in-progress update.
|
An odd version indicates an in-progress update.
|
||||||
|
|
||||||
tsc_timestamp: the tsc value at the current VCPU at the time
|
tsc_timestamp:
|
||||||
|
the tsc value at the current VCPU at the time
|
||||||
of the update of this structure. Guests can subtract this value
|
of the update of this structure. Guests can subtract this value
|
||||||
from current tsc to derive a notion of elapsed time since the
|
from current tsc to derive a notion of elapsed time since the
|
||||||
structure update.
|
structure update.
|
||||||
|
|
||||||
system_time: a host notion of monotonic time, including sleep
|
system_time:
|
||||||
|
a host notion of monotonic time, including sleep
|
||||||
time at the time this structure was last updated. Unit is
|
time at the time this structure was last updated. Unit is
|
||||||
nanoseconds.
|
nanoseconds.
|
||||||
|
|
||||||
tsc_to_system_mul: multiplier to be used when converting
|
tsc_to_system_mul:
|
||||||
|
multiplier to be used when converting
|
||||||
tsc-related quantity to nanoseconds
|
tsc-related quantity to nanoseconds
|
||||||
|
|
||||||
tsc_shift: shift to be used when converting tsc-related
|
tsc_shift:
|
||||||
|
shift to be used when converting tsc-related
|
||||||
quantity to nanoseconds. This shift will ensure that
|
quantity to nanoseconds. This shift will ensure that
|
||||||
multiplication with tsc_to_system_mul does not overflow.
|
multiplication with tsc_to_system_mul does not overflow.
|
||||||
A positive value denotes a left shift, a negative value
|
A positive value denotes a left shift, a negative value
|
||||||
@ -96,7 +112,7 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
|||||||
|
|
||||||
The conversion from tsc to nanoseconds involves an additional
|
The conversion from tsc to nanoseconds involves an additional
|
||||||
right shift by 32 bits. With this information, guests can
|
right shift by 32 bits. With this information, guests can
|
||||||
derive per-CPU time by doing:
|
derive per-CPU time by doing::
|
||||||
|
|
||||||
time = (current_tsc - tsc_timestamp)
|
time = (current_tsc - tsc_timestamp)
|
||||||
if (tsc_shift >= 0)
|
if (tsc_shift >= 0)
|
||||||
@ -106,29 +122,34 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
|
|||||||
time = (time * tsc_to_system_mul) >> 32
|
time = (time * tsc_to_system_mul) >> 32
|
||||||
time = time + system_time
|
time = time + system_time
|
||||||
|
|
||||||
flags: bits in this field indicate extended capabilities
|
flags:
|
||||||
|
bits in this field indicate extended capabilities
|
||||||
coordinated between the guest and the hypervisor. Availability
|
coordinated between the guest and the hypervisor. Availability
|
||||||
of specific flags has to be checked in 0x40000001 cpuid leaf.
|
of specific flags has to be checked in 0x40000001 cpuid leaf.
|
||||||
Current flags are:
|
Current flags are:
|
||||||
|
|
||||||
flag bit | cpuid bit | meaning
|
|
||||||
-------------------------------------------------------------
|
+-----------+--------------+----------------------------------+
|
||||||
| | time measures taken across
|
| flag bit | cpuid bit | meaning |
|
||||||
0 | 24 | multiple cpus are guaranteed to
|
+-----------+--------------+----------------------------------+
|
||||||
| | be monotonic
|
| | | time measures taken across |
|
||||||
-------------------------------------------------------------
|
| 0 | 24 | multiple cpus are guaranteed to |
|
||||||
| | guest vcpu has been paused by
|
| | | be monotonic |
|
||||||
1 | N/A | the host
|
+-----------+--------------+----------------------------------+
|
||||||
| | See 4.70 in api.txt
|
| | | guest vcpu has been paused by |
|
||||||
-------------------------------------------------------------
|
| 1 | N/A | the host |
|
||||||
|
| | | See 4.70 in api.txt |
|
||||||
|
+-----------+--------------+----------------------------------+
|
||||||
|
|
||||||
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
|
Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
|
||||||
leaf prior to usage.
|
leaf prior to usage.
|
||||||
|
|
||||||
|
|
||||||
MSR_KVM_WALL_CLOCK: 0x11
|
MSR_KVM_WALL_CLOCK:
|
||||||
|
0x11
|
||||||
|
|
||||||
data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
|
data and functioning:
|
||||||
|
same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
|
||||||
|
|
||||||
This MSR falls outside the reserved KVM range and may be removed in the
|
This MSR falls outside the reserved KVM range and may be removed in the
|
||||||
future. Its usage is deprecated.
|
future. Its usage is deprecated.
|
||||||
@ -136,9 +157,11 @@ MSR_KVM_WALL_CLOCK: 0x11
|
|||||||
Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
|
Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
|
||||||
leaf prior to usage.
|
leaf prior to usage.
|
||||||
|
|
||||||
MSR_KVM_SYSTEM_TIME: 0x12
|
MSR_KVM_SYSTEM_TIME:
|
||||||
|
0x12
|
||||||
|
|
||||||
data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
|
data and functioning:
|
||||||
|
same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
|
||||||
|
|
||||||
This MSR falls outside the reserved KVM range and may be removed in the
|
This MSR falls outside the reserved KVM range and may be removed in the
|
||||||
future. Its usage is deprecated.
|
future. Its usage is deprecated.
|
||||||
@ -146,7 +169,7 @@ MSR_KVM_SYSTEM_TIME: 0x12
|
|||||||
Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
|
Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
|
||||||
leaf prior to usage.
|
leaf prior to usage.
|
||||||
|
|
||||||
The suggested algorithm for detecting kvmclock presence is then:
|
The suggested algorithm for detecting kvmclock presence is then::
|
||||||
|
|
||||||
if (!kvm_para_available()) /* refer to cpuid.txt */
|
if (!kvm_para_available()) /* refer to cpuid.txt */
|
||||||
return NON_PRESENT;
|
return NON_PRESENT;
|
||||||
@ -163,8 +186,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
|
|||||||
} else
|
} else
|
||||||
return NON_PRESENT;
|
return NON_PRESENT;
|
||||||
|
|
||||||
MSR_KVM_ASYNC_PF_EN: 0x4b564d02
|
MSR_KVM_ASYNC_PF_EN:
|
||||||
data: Bits 63-6 hold 64-byte aligned physical address of a
|
0x4b564d02
|
||||||
|
|
||||||
|
data:
|
||||||
|
Bits 63-6 hold 64-byte aligned physical address of a
|
||||||
64 byte memory area which must be in guest RAM and must be
|
64 byte memory area which must be in guest RAM and must be
|
||||||
zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
|
zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
|
||||||
when asynchronous page faults are enabled on the vcpu 0 when
|
when asynchronous page faults are enabled on the vcpu 0 when
|
||||||
@ -200,20 +226,22 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
|
|||||||
Currently type 2 APF will be always delivered on the same vcpu as
|
Currently type 2 APF will be always delivered on the same vcpu as
|
||||||
type 1 was, but guest should not rely on that.
|
type 1 was, but guest should not rely on that.
|
||||||
|
|
||||||
MSR_KVM_STEAL_TIME: 0x4b564d03
|
MSR_KVM_STEAL_TIME:
|
||||||
|
0x4b564d03
|
||||||
|
|
||||||
data: 64-byte alignment physical address of a memory area which must be
|
data:
|
||||||
|
64-byte alignment physical address of a memory area which must be
|
||||||
in guest RAM, plus an enable bit in bit 0. This memory is expected to
|
in guest RAM, plus an enable bit in bit 0. This memory is expected to
|
||||||
hold a copy of the following structure:
|
hold a copy of the following structure::
|
||||||
|
|
||||||
struct kvm_steal_time {
|
struct kvm_steal_time {
|
||||||
__u64 steal;
|
__u64 steal;
|
||||||
__u32 version;
|
__u32 version;
|
||||||
__u32 flags;
|
__u32 flags;
|
||||||
__u8 preempted;
|
__u8 preempted;
|
||||||
__u8 u8_pad[3];
|
__u8 u8_pad[3];
|
||||||
__u32 pad[11];
|
__u32 pad[11];
|
||||||
}
|
}
|
||||||
|
|
||||||
whose data will be filled in by the hypervisor periodically. Only one
|
whose data will be filled in by the hypervisor periodically. Only one
|
||||||
write, or registration, is needed for each VCPU. The interval between
|
write, or registration, is needed for each VCPU. The interval between
|
||||||
@ -224,25 +252,32 @@ MSR_KVM_STEAL_TIME: 0x4b564d03
|
|||||||
|
|
||||||
Fields have the following meanings:
|
Fields have the following meanings:
|
||||||
|
|
||||||
version: a sequence counter. In other words, guest has to check
|
version:
|
||||||
|
a sequence counter. In other words, guest has to check
|
||||||
this field before and after grabbing time information and make
|
this field before and after grabbing time information and make
|
||||||
sure they are both equal and even. An odd version indicates an
|
sure they are both equal and even. An odd version indicates an
|
||||||
in-progress update.
|
in-progress update.
|
||||||
|
|
||||||
flags: At this point, always zero. May be used to indicate
|
flags:
|
||||||
|
At this point, always zero. May be used to indicate
|
||||||
changes in this structure in the future.
|
changes in this structure in the future.
|
||||||
|
|
||||||
steal: the amount of time in which this vCPU did not run, in
|
steal:
|
||||||
|
the amount of time in which this vCPU did not run, in
|
||||||
nanoseconds. Time during which the vcpu is idle, will not be
|
nanoseconds. Time during which the vcpu is idle, will not be
|
||||||
reported as steal time.
|
reported as steal time.
|
||||||
|
|
||||||
preempted: indicate the vCPU who owns this struct is running or
|
preempted:
|
||||||
|
indicate the vCPU who owns this struct is running or
|
||||||
not. Non-zero values mean the vCPU has been preempted. Zero
|
not. Non-zero values mean the vCPU has been preempted. Zero
|
||||||
means the vCPU is not preempted. NOTE, it is always zero if the
|
means the vCPU is not preempted. NOTE, it is always zero if the
|
||||||
the hypervisor doesn't support this field.
|
the hypervisor doesn't support this field.
|
||||||
|
|
||||||
MSR_KVM_EOI_EN: 0x4b564d04
|
MSR_KVM_EOI_EN:
|
||||||
data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
|
0x4b564d04
|
||||||
|
|
||||||
|
data:
|
||||||
|
Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
|
||||||
when disabled. Bit 1 is reserved and must be zero. When PV end of
|
when disabled. Bit 1 is reserved and must be zero. When PV end of
|
||||||
interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned
|
interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned
|
||||||
physical address of a 4 byte memory area which must be in guest RAM and
|
physical address of a 4 byte memory area which must be in guest RAM and
|
||||||
@ -274,11 +309,13 @@ MSR_KVM_EOI_EN: 0x4b564d04
|
|||||||
clear it using a single CPU instruction, such as test and clear, or
|
clear it using a single CPU instruction, such as test and clear, or
|
||||||
compare and exchange.
|
compare and exchange.
|
||||||
|
|
||||||
MSR_KVM_POLL_CONTROL: 0x4b564d05
|
MSR_KVM_POLL_CONTROL:
|
||||||
|
0x4b564d05
|
||||||
|
|
||||||
Control host-side polling.
|
Control host-side polling.
|
||||||
|
|
||||||
data: Bit 0 enables (1) or disables (0) host-side HLT polling logic.
|
data:
|
||||||
|
Bit 0 enables (1) or disables (0) host-side HLT polling logic.
|
||||||
|
|
||||||
KVM guests can request the host not to poll on HLT, for example if
|
KVM guests can request the host not to poll on HLT, for example if
|
||||||
they are performing polling themselves.
|
they are performing polling themselves.
|
||||||
|
|
@ -1,3 +1,6 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
==========
|
||||||
Nested VMX
|
Nested VMX
|
||||||
==========
|
==========
|
||||||
|
|
||||||
@ -41,9 +44,9 @@ No modifications are required to user space (qemu). However, qemu's default
|
|||||||
emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
|
emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
|
||||||
explicitly enabled, by giving qemu one of the following options:
|
explicitly enabled, by giving qemu one of the following options:
|
||||||
|
|
||||||
-cpu host (emulated CPU has all features of the real CPU)
|
- cpu host (emulated CPU has all features of the real CPU)
|
||||||
|
|
||||||
-cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
|
- cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
|
||||||
|
|
||||||
|
|
||||||
ABIs
|
ABIs
|
||||||
@ -75,6 +78,8 @@ of this structure changes, this can break live migration across KVM versions.
|
|||||||
VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
|
VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
|
||||||
struct shadow_vmcs is ever changed.
|
struct shadow_vmcs is ever changed.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
typedef u64 natural_width;
|
typedef u64 natural_width;
|
||||||
struct __packed vmcs12 {
|
struct __packed vmcs12 {
|
||||||
/* According to the Intel spec, a VMCS region must start with
|
/* According to the Intel spec, a VMCS region must start with
|
||||||
@ -220,21 +225,21 @@ Authors
|
|||||||
-------
|
-------
|
||||||
|
|
||||||
These patches were written by:
|
These patches were written by:
|
||||||
Abel Gordon, abelg <at> il.ibm.com
|
- Abel Gordon, abelg <at> il.ibm.com
|
||||||
Nadav Har'El, nyh <at> il.ibm.com
|
- Nadav Har'El, nyh <at> il.ibm.com
|
||||||
Orit Wasserman, oritw <at> il.ibm.com
|
- Orit Wasserman, oritw <at> il.ibm.com
|
||||||
Ben-Ami Yassor, benami <at> il.ibm.com
|
- Ben-Ami Yassor, benami <at> il.ibm.com
|
||||||
Muli Ben-Yehuda, muli <at> il.ibm.com
|
- Muli Ben-Yehuda, muli <at> il.ibm.com
|
||||||
|
|
||||||
With contributions by:
|
With contributions by:
|
||||||
Anthony Liguori, aliguori <at> us.ibm.com
|
- Anthony Liguori, aliguori <at> us.ibm.com
|
||||||
Mike Day, mdday <at> us.ibm.com
|
- Mike Day, mdday <at> us.ibm.com
|
||||||
Michael Factor, factor <at> il.ibm.com
|
- Michael Factor, factor <at> il.ibm.com
|
||||||
Zvi Dubitzky, dubi <at> il.ibm.com
|
- Zvi Dubitzky, dubi <at> il.ibm.com
|
||||||
|
|
||||||
And valuable reviews by:
|
And valuable reviews by:
|
||||||
Avi Kivity, avi <at> redhat.com
|
- Avi Kivity, avi <at> redhat.com
|
||||||
Gleb Natapov, gleb <at> redhat.com
|
- Gleb Natapov, gleb <at> redhat.com
|
||||||
Marcelo Tosatti, mtosatti <at> redhat.com
|
- Marcelo Tosatti, mtosatti <at> redhat.com
|
||||||
Kevin Tian, kevin.tian <at> intel.com
|
- Kevin Tian, kevin.tian <at> intel.com
|
||||||
and others.
|
- and others.
|
@ -1,3 +1,6 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
=================================
|
||||||
The PPC KVM paravirtual interface
|
The PPC KVM paravirtual interface
|
||||||
=================================
|
=================================
|
||||||
|
|
||||||
@ -34,8 +37,9 @@ up the hypercall. To call a hypercall, just call these instructions.
|
|||||||
|
|
||||||
The parameters are as follows:
|
The parameters are as follows:
|
||||||
|
|
||||||
|
======== ================ ================
|
||||||
Register IN OUT
|
Register IN OUT
|
||||||
|
======== ================ ================
|
||||||
r0 - volatile
|
r0 - volatile
|
||||||
r3 1st parameter Return code
|
r3 1st parameter Return code
|
||||||
r4 2nd parameter 1st output value
|
r4 2nd parameter 1st output value
|
||||||
@ -47,6 +51,7 @@ The parameters are as follows:
|
|||||||
r10 8th parameter 7th output value
|
r10 8th parameter 7th output value
|
||||||
r11 hypercall number 8th output value
|
r11 hypercall number 8th output value
|
||||||
r12 - volatile
|
r12 - volatile
|
||||||
|
======== ================ ================
|
||||||
|
|
||||||
Hypercall definitions are shared in generic code, so the same hypercall numbers
|
Hypercall definitions are shared in generic code, so the same hypercall numbers
|
||||||
apply for x86 and powerpc alike with the exception that each KVM hypercall
|
apply for x86 and powerpc alike with the exception that each KVM hypercall
|
||||||
@ -54,11 +59,13 @@ also needs to be ORed with the KVM vendor code which is (42 << 16).
|
|||||||
|
|
||||||
Return codes can be as follows:
|
Return codes can be as follows:
|
||||||
|
|
||||||
|
==== =========================
|
||||||
Code Meaning
|
Code Meaning
|
||||||
|
==== =========================
|
||||||
0 Success
|
0 Success
|
||||||
12 Hypercall not implemented
|
12 Hypercall not implemented
|
||||||
<0 Error
|
<0 Error
|
||||||
|
==== =========================
|
||||||
|
|
||||||
The magic page
|
The magic page
|
||||||
==============
|
==============
|
||||||
@ -72,7 +79,7 @@ desired location. The first parameter indicates the effective address when the
|
|||||||
MMU is enabled. The second parameter indicates the address in real mode, if
|
MMU is enabled. The second parameter indicates the address in real mode, if
|
||||||
applicable to the target. For now, we always map the page to -4096. This way we
|
applicable to the target. For now, we always map the page to -4096. This way we
|
||||||
can access it using absolute load and store functions. The following
|
can access it using absolute load and store functions. The following
|
||||||
instruction reads the first field of the magic page:
|
instruction reads the first field of the magic page::
|
||||||
|
|
||||||
ld rX, -4096(0)
|
ld rX, -4096(0)
|
||||||
|
|
||||||
@ -93,8 +100,10 @@ a bitmap of available features inside the magic page.
|
|||||||
|
|
||||||
The following enhancements to the magic page are currently available:
|
The following enhancements to the magic page are currently available:
|
||||||
|
|
||||||
|
============================ =======================================
|
||||||
KVM_MAGIC_FEAT_SR Maps SR registers r/w in the magic page
|
KVM_MAGIC_FEAT_SR Maps SR registers r/w in the magic page
|
||||||
KVM_MAGIC_FEAT_MAS0_TO_SPRG7 Maps MASn, ESR, PIR and high SPRGs
|
KVM_MAGIC_FEAT_MAS0_TO_SPRG7 Maps MASn, ESR, PIR and high SPRGs
|
||||||
|
============================ =======================================
|
||||||
|
|
||||||
For enhanced features in the magic page, please check for the existence of the
|
For enhanced features in the magic page, please check for the existence of the
|
||||||
feature before using them!
|
feature before using them!
|
||||||
@ -121,8 +130,8 @@ when entering the guest or don't have any impact on the hypervisor's behavior.
|
|||||||
|
|
||||||
The following bits are safe to be set inside the guest:
|
The following bits are safe to be set inside the guest:
|
||||||
|
|
||||||
MSR_EE
|
- MSR_EE
|
||||||
MSR_RI
|
- MSR_RI
|
||||||
|
|
||||||
If any other bit changes in the MSR, please still use mtmsr(d).
|
If any other bit changes in the MSR, please still use mtmsr(d).
|
||||||
|
|
||||||
@ -138,9 +147,9 @@ guest. Implementing any of those mappings is optional, as the instruction traps
|
|||||||
also act on the shared page. So calling privileged instructions still works as
|
also act on the shared page. So calling privileged instructions still works as
|
||||||
before.
|
before.
|
||||||
|
|
||||||
|
======================= ================================
|
||||||
From To
|
From To
|
||||||
==== ==
|
======================= ================================
|
||||||
|
|
||||||
mfmsr rX ld rX, magic_page->msr
|
mfmsr rX ld rX, magic_page->msr
|
||||||
mfsprg rX, 0 ld rX, magic_page->sprg0
|
mfsprg rX, 0 ld rX, magic_page->sprg0
|
||||||
mfsprg rX, 1 ld rX, magic_page->sprg1
|
mfsprg rX, 1 ld rX, magic_page->sprg1
|
||||||
@ -173,7 +182,7 @@ mtsrin rX, rY b <special mtsrin section>
|
|||||||
|
|
||||||
[BookE only]
|
[BookE only]
|
||||||
wrteei [0|1] b <special wrteei section>
|
wrteei [0|1] b <special wrteei section>
|
||||||
|
======================= ================================
|
||||||
|
|
||||||
Some instructions require more logic to determine what's going on than a load
|
Some instructions require more logic to determine what's going on than a load
|
||||||
or store instruction can deliver. To enable patching of those, we keep some
|
or store instruction can deliver. To enable patching of those, we keep some
|
||||||
@ -191,6 +200,7 @@ for example.
|
|||||||
|
|
||||||
Hypercall ABIs in KVM on PowerPC
|
Hypercall ABIs in KVM on PowerPC
|
||||||
=================================
|
=================================
|
||||||
|
|
||||||
1) KVM hypercalls (ePAPR)
|
1) KVM hypercalls (ePAPR)
|
||||||
|
|
||||||
These are ePAPR compliant hypercall implementation (mentioned above). Even
|
These are ePAPR compliant hypercall implementation (mentioned above). Even
|
@ -1,3 +1,6 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
================================
|
||||||
Review checklist for kvm patches
|
Review checklist for kvm patches
|
||||||
================================
|
================================
|
||||||
|
|
@ -1,3 +1,6 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
=============================
|
||||||
The s390 DIAGNOSE call on KVM
|
The s390 DIAGNOSE call on KVM
|
||||||
=============================
|
=============================
|
||||||
|
|
||||||
@ -16,12 +19,12 @@ DIAGNOSE calls by the guest cause a mandatory intercept. This implies
|
|||||||
all supported DIAGNOSE calls need to be handled by either KVM or its
|
all supported DIAGNOSE calls need to be handled by either KVM or its
|
||||||
userspace.
|
userspace.
|
||||||
|
|
||||||
All DIAGNOSE calls supported by KVM use the RS-a format:
|
All DIAGNOSE calls supported by KVM use the RS-a format::
|
||||||
|
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
| '83' | R1 | R3 | B2 | D2 |
|
| '83' | R1 | R3 | B2 | D2 |
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
0 8 12 16 20 31
|
0 8 12 16 20 31
|
||||||
|
|
||||||
The second-operand address (obtained by the base/displacement calculation)
|
The second-operand address (obtained by the base/displacement calculation)
|
||||||
is not used to address data. Instead, bits 48-63 of this address specify
|
is not used to address data. Instead, bits 48-63 of this address specify
|
@ -1,17 +1,21 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
Timekeeping Virtualization for X86-Based Architectures
|
======================================================
|
||||||
|
Timekeeping Virtualization for X86-Based Architectures
|
||||||
|
======================================================
|
||||||
|
|
||||||
Zachary Amsden <zamsden@redhat.com>
|
:Author: Zachary Amsden <zamsden@redhat.com>
|
||||||
Copyright (c) 2010, Red Hat. All rights reserved.
|
:Copyright: (c) 2010, Red Hat. All rights reserved.
|
||||||
|
|
||||||
1) Overview
|
.. Contents
|
||||||
2) Timing Devices
|
|
||||||
3) TSC Hardware
|
|
||||||
4) Virtualization Problems
|
|
||||||
|
|
||||||
=========================================================================
|
1) Overview
|
||||||
|
2) Timing Devices
|
||||||
|
3) TSC Hardware
|
||||||
|
4) Virtualization Problems
|
||||||
|
|
||||||
1) Overview
|
1. Overview
|
||||||
|
===========
|
||||||
|
|
||||||
One of the most complicated parts of the X86 platform, and specifically,
|
One of the most complicated parts of the X86 platform, and specifically,
|
||||||
the virtualization of this platform is the plethora of timing devices available
|
the virtualization of this platform is the plethora of timing devices available
|
||||||
@ -27,15 +31,15 @@ The purpose of this document is to collect data and information relevant to
|
|||||||
timekeeping which may be difficult to find elsewhere, specifically,
|
timekeeping which may be difficult to find elsewhere, specifically,
|
||||||
information relevant to KVM and hardware-based virtualization.
|
information relevant to KVM and hardware-based virtualization.
|
||||||
|
|
||||||
=========================================================================
|
2. Timing Devices
|
||||||
|
=================
|
||||||
2) Timing Devices
|
|
||||||
|
|
||||||
First we discuss the basic hardware devices available. TSC and the related
|
First we discuss the basic hardware devices available. TSC and the related
|
||||||
KVM clock are special enough to warrant a full exposition and are described in
|
KVM clock are special enough to warrant a full exposition and are described in
|
||||||
the following section.
|
the following section.
|
||||||
|
|
||||||
2.1) i8254 - PIT
|
2.1. i8254 - PIT
|
||||||
|
----------------
|
||||||
|
|
||||||
One of the first timer devices available is the programmable interrupt timer,
|
One of the first timer devices available is the programmable interrupt timer,
|
||||||
or PIT. The PIT has a fixed frequency 1.193182 MHz base clock and three
|
or PIT. The PIT has a fixed frequency 1.193182 MHz base clock and three
|
||||||
@ -50,13 +54,13 @@ The PIT uses I/O ports 0x40 - 0x43. Access to the 16-bit counters is done
|
|||||||
using single or multiple byte access to the I/O ports. There are 6 modes
|
using single or multiple byte access to the I/O ports. There are 6 modes
|
||||||
available, but not all modes are available to all timers, as only timer 2
|
available, but not all modes are available to all timers, as only timer 2
|
||||||
has a connected gate input, required for modes 1 and 5. The gate line is
|
has a connected gate input, required for modes 1 and 5. The gate line is
|
||||||
controlled by port 61h, bit 0, as illustrated in the following diagram.
|
controlled by port 61h, bit 0, as illustrated in the following diagram::
|
||||||
|
|
||||||
-------------- ----------------
|
-------------- ----------------
|
||||||
| | | |
|
| | | |
|
||||||
| 1.1932 MHz |---------->| CLOCK OUT | ---------> IRQ 0
|
| 1.1932 MHz|---------->| CLOCK OUT | ---------> IRQ 0
|
||||||
| Clock | | | |
|
| Clock | | | |
|
||||||
-------------- | +->| GATE TIMER 0 |
|
-------------- | +->| GATE TIMER 0 |
|
||||||
| ----------------
|
| ----------------
|
||||||
|
|
|
|
||||||
| ----------------
|
| ----------------
|
||||||
@ -70,29 +74,33 @@ controlled by port 61h, bit 0, as illustrated in the following diagram.
|
|||||||
| | |
|
| | |
|
||||||
|------>| CLOCK OUT | ---------> Port 61h, bit 5
|
|------>| CLOCK OUT | ---------> Port 61h, bit 5
|
||||||
| | |
|
| | |
|
||||||
Port 61h, bit 0 ---------->| GATE TIMER 2 | \_.---- ____
|
Port 61h, bit 0 -------->| GATE TIMER 2 | \_.---- ____
|
||||||
---------------- _| )--|LPF|---Speaker
|
---------------- _| )--|LPF|---Speaker
|
||||||
/ *---- \___/
|
/ *---- \___/
|
||||||
Port 61h, bit 1 -----------------------------------/
|
Port 61h, bit 1 ---------------------------------/
|
||||||
|
|
||||||
The timer modes are now described.
|
The timer modes are now described.
|
||||||
|
|
||||||
Mode 0: Single Timeout. This is a one-shot software timeout that counts down
|
Mode 0: Single Timeout.
|
||||||
|
This is a one-shot software timeout that counts down
|
||||||
when the gate is high (always true for timers 0 and 1). When the count
|
when the gate is high (always true for timers 0 and 1). When the count
|
||||||
reaches zero, the output goes high.
|
reaches zero, the output goes high.
|
||||||
|
|
||||||
Mode 1: Triggered One-shot. The output is initially set high. When the gate
|
Mode 1: Triggered One-shot.
|
||||||
|
The output is initially set high. When the gate
|
||||||
line is set high, a countdown is initiated (which does not stop if the gate is
|
line is set high, a countdown is initiated (which does not stop if the gate is
|
||||||
lowered), during which the output is set low. When the count reaches zero,
|
lowered), during which the output is set low. When the count reaches zero,
|
||||||
the output goes high.
|
the output goes high.
|
||||||
|
|
||||||
Mode 2: Rate Generator. The output is initially set high. When the countdown
|
Mode 2: Rate Generator.
|
||||||
|
The output is initially set high. When the countdown
|
||||||
reaches 1, the output goes low for one count and then returns high. The value
|
reaches 1, the output goes low for one count and then returns high. The value
|
||||||
is reloaded and the countdown automatically resumes. If the gate line goes
|
is reloaded and the countdown automatically resumes. If the gate line goes
|
||||||
low, the count is halted. If the output is low when the gate is lowered, the
|
low, the count is halted. If the output is low when the gate is lowered, the
|
||||||
output automatically goes high (this only affects timer 2).
|
output automatically goes high (this only affects timer 2).
|
||||||
|
|
||||||
Mode 3: Square Wave. This generates a high / low square wave. The count
|
Mode 3: Square Wave.
|
||||||
|
This generates a high / low square wave. The count
|
||||||
determines the length of the pulse, which alternates between high and low
|
determines the length of the pulse, which alternates between high and low
|
||||||
when zero is reached. The count only proceeds when gate is high and is
|
when zero is reached. The count only proceeds when gate is high and is
|
||||||
automatically reloaded on reaching zero. The count is decremented twice at
|
automatically reloaded on reaching zero. The count is decremented twice at
|
||||||
@ -103,12 +111,14 @@ Mode 3: Square Wave. This generates a high / low square wave. The count
|
|||||||
values are not observed when reading. This is the intended mode for timer 2,
|
values are not observed when reading. This is the intended mode for timer 2,
|
||||||
which generates sine-like tones by low-pass filtering the square wave output.
|
which generates sine-like tones by low-pass filtering the square wave output.
|
||||||
|
|
||||||
Mode 4: Software Strobe. After programming this mode and loading the counter,
|
Mode 4: Software Strobe.
|
||||||
|
After programming this mode and loading the counter,
|
||||||
the output remains high until the counter reaches zero. Then the output
|
the output remains high until the counter reaches zero. Then the output
|
||||||
goes low for 1 clock cycle and returns high. The counter is not reloaded.
|
goes low for 1 clock cycle and returns high. The counter is not reloaded.
|
||||||
Counting only occurs when gate is high.
|
Counting only occurs when gate is high.
|
||||||
|
|
||||||
Mode 5: Hardware Strobe. After programming and loading the counter, the
|
Mode 5: Hardware Strobe.
|
||||||
|
After programming and loading the counter, the
|
||||||
output remains high. When the gate is raised, a countdown is initiated
|
output remains high. When the gate is raised, a countdown is initiated
|
||||||
(which does not stop if the gate is lowered). When the counter reaches zero,
|
(which does not stop if the gate is lowered). When the counter reaches zero,
|
||||||
the output goes low for 1 clock cycle and then returns high. The counter is
|
the output goes low for 1 clock cycle and then returns high. The counter is
|
||||||
@ -118,49 +128,49 @@ In addition to normal binary counting, the PIT supports BCD counting. The
|
|||||||
command port, 0x43 is used to set the counter and mode for each of the three
|
command port, 0x43 is used to set the counter and mode for each of the three
|
||||||
timers.
|
timers.
|
||||||
|
|
||||||
PIT commands, issued to port 0x43, using the following bit encoding:
|
PIT commands, issued to port 0x43, using the following bit encoding::
|
||||||
|
|
||||||
Bit 7-4: Command (See table below)
|
Bit 7-4: Command (See table below)
|
||||||
Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
|
Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
|
||||||
Bit 0 : Binary (0) / BCD (1)
|
Bit 0 : Binary (0) / BCD (1)
|
||||||
|
|
||||||
Command table:
|
Command table::
|
||||||
|
|
||||||
0000 - Latch Timer 0 count for port 0x40
|
0000 - Latch Timer 0 count for port 0x40
|
||||||
sample and hold the count to be read in port 0x40;
|
sample and hold the count to be read in port 0x40;
|
||||||
additional commands ignored until counter is read;
|
additional commands ignored until counter is read;
|
||||||
mode bits ignored.
|
mode bits ignored.
|
||||||
|
|
||||||
0001 - Set Timer 0 LSB mode for port 0x40
|
0001 - Set Timer 0 LSB mode for port 0x40
|
||||||
set timer to read LSB only and force MSB to zero;
|
set timer to read LSB only and force MSB to zero;
|
||||||
mode bits set timer mode
|
mode bits set timer mode
|
||||||
|
|
||||||
0010 - Set Timer 0 MSB mode for port 0x40
|
0010 - Set Timer 0 MSB mode for port 0x40
|
||||||
set timer to read MSB only and force LSB to zero;
|
set timer to read MSB only and force LSB to zero;
|
||||||
mode bits set timer mode
|
mode bits set timer mode
|
||||||
|
|
||||||
0011 - Set Timer 0 16-bit mode for port 0x40
|
0011 - Set Timer 0 16-bit mode for port 0x40
|
||||||
set timer to read / write LSB first, then MSB;
|
set timer to read / write LSB first, then MSB;
|
||||||
mode bits set timer mode
|
mode bits set timer mode
|
||||||
|
|
||||||
0100 - Latch Timer 1 count for port 0x41 - as described above
|
0100 - Latch Timer 1 count for port 0x41 - as described above
|
||||||
0101 - Set Timer 1 LSB mode for port 0x41 - as described above
|
0101 - Set Timer 1 LSB mode for port 0x41 - as described above
|
||||||
0110 - Set Timer 1 MSB mode for port 0x41 - as described above
|
0110 - Set Timer 1 MSB mode for port 0x41 - as described above
|
||||||
0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
|
0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
|
||||||
|
|
||||||
1000 - Latch Timer 2 count for port 0x42 - as described above
|
1000 - Latch Timer 2 count for port 0x42 - as described above
|
||||||
1001 - Set Timer 2 LSB mode for port 0x42 - as described above
|
1001 - Set Timer 2 LSB mode for port 0x42 - as described above
|
||||||
1010 - Set Timer 2 MSB mode for port 0x42 - as described above
|
1010 - Set Timer 2 MSB mode for port 0x42 - as described above
|
||||||
1011 - Set Timer 2 16-bit mode for port 0x42 as described above
|
1011 - Set Timer 2 16-bit mode for port 0x42 as described above
|
||||||
|
|
||||||
1101 - General counter latch
|
1101 - General counter latch
|
||||||
Latch combination of counters into corresponding ports
|
Latch combination of counters into corresponding ports
|
||||||
Bit 3 = Counter 2
|
Bit 3 = Counter 2
|
||||||
Bit 2 = Counter 1
|
Bit 2 = Counter 1
|
||||||
Bit 1 = Counter 0
|
Bit 1 = Counter 0
|
||||||
Bit 0 = Unused
|
Bit 0 = Unused
|
||||||
|
|
||||||
1110 - Latch timer status
|
1110 - Latch timer status
|
||||||
Latch combination of counter mode into corresponding ports
|
Latch combination of counter mode into corresponding ports
|
||||||
Bit 3 = Counter 2
|
Bit 3 = Counter 2
|
||||||
Bit 2 = Counter 1
|
Bit 2 = Counter 1
|
||||||
@ -177,7 +187,8 @@ Command table:
|
|||||||
Bit 3-1 = Mode
|
Bit 3-1 = Mode
|
||||||
Bit 0 = Binary (0) / BCD mode (1)
|
Bit 0 = Binary (0) / BCD mode (1)
|
||||||
|
|
||||||
2.2) RTC
|
2.2. RTC
|
||||||
|
--------
|
||||||
|
|
||||||
The second device which was available in the original PC was the MC146818 real
|
The second device which was available in the original PC was the MC146818 real
|
||||||
time clock. The original device is now obsolete, and usually emulated by the
|
time clock. The original device is now obsolete, and usually emulated by the
|
||||||
@ -201,21 +212,21 @@ in progress, as indicated in the status register.
|
|||||||
The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be
|
The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be
|
||||||
programmed to a 32kHz divider if the RTC is to count seconds.
|
programmed to a 32kHz divider if the RTC is to count seconds.
|
||||||
|
|
||||||
This is the RAM map originally used for the RTC/CMOS:
|
This is the RAM map originally used for the RTC/CMOS::
|
||||||
|
|
||||||
Location Size Description
|
Location Size Description
|
||||||
------------------------------------------
|
------------------------------------------
|
||||||
00h byte Current second (BCD)
|
00h byte Current second (BCD)
|
||||||
01h byte Seconds alarm (BCD)
|
01h byte Seconds alarm (BCD)
|
||||||
02h byte Current minute (BCD)
|
02h byte Current minute (BCD)
|
||||||
03h byte Minutes alarm (BCD)
|
03h byte Minutes alarm (BCD)
|
||||||
04h byte Current hour (BCD)
|
04h byte Current hour (BCD)
|
||||||
05h byte Hours alarm (BCD)
|
05h byte Hours alarm (BCD)
|
||||||
06h byte Current day of week (BCD)
|
06h byte Current day of week (BCD)
|
||||||
07h byte Current day of month (BCD)
|
07h byte Current day of month (BCD)
|
||||||
08h byte Current month (BCD)
|
08h byte Current month (BCD)
|
||||||
09h byte Current year (BCD)
|
09h byte Current year (BCD)
|
||||||
0Ah byte Register A
|
0Ah byte Register A
|
||||||
bit 7 = Update in progress
|
bit 7 = Update in progress
|
||||||
bit 6-4 = Divider for clock
|
bit 6-4 = Divider for clock
|
||||||
000 = 4.194 MHz
|
000 = 4.194 MHz
|
||||||
@ -234,7 +245,7 @@ Location Size Description
|
|||||||
1101 = 125 mS
|
1101 = 125 mS
|
||||||
1110 = 250 mS
|
1110 = 250 mS
|
||||||
1111 = 500 mS
|
1111 = 500 mS
|
||||||
0Bh byte Register B
|
0Bh byte Register B
|
||||||
bit 7 = Run (0) / Halt (1)
|
bit 7 = Run (0) / Halt (1)
|
||||||
bit 6 = Periodic interrupt enable
|
bit 6 = Periodic interrupt enable
|
||||||
bit 5 = Alarm interrupt enable
|
bit 5 = Alarm interrupt enable
|
||||||
@ -243,19 +254,20 @@ Location Size Description
|
|||||||
bit 2 = BCD calendar (0) / Binary (1)
|
bit 2 = BCD calendar (0) / Binary (1)
|
||||||
bit 1 = 12-hour mode (0) / 24-hour mode (1)
|
bit 1 = 12-hour mode (0) / 24-hour mode (1)
|
||||||
bit 0 = 0 (DST off) / 1 (DST enabled)
|
bit 0 = 0 (DST off) / 1 (DST enabled)
|
||||||
OCh byte Register C (read only)
|
OCh byte Register C (read only)
|
||||||
bit 7 = interrupt request flag (IRQF)
|
bit 7 = interrupt request flag (IRQF)
|
||||||
bit 6 = periodic interrupt flag (PF)
|
bit 6 = periodic interrupt flag (PF)
|
||||||
bit 5 = alarm interrupt flag (AF)
|
bit 5 = alarm interrupt flag (AF)
|
||||||
bit 4 = update interrupt flag (UF)
|
bit 4 = update interrupt flag (UF)
|
||||||
bit 3-0 = reserved
|
bit 3-0 = reserved
|
||||||
ODh byte Register D (read only)
|
ODh byte Register D (read only)
|
||||||
bit 7 = RTC has power
|
bit 7 = RTC has power
|
||||||
bit 6-0 = reserved
|
bit 6-0 = reserved
|
||||||
32h byte Current century BCD (*)
|
32h byte Current century BCD (*)
|
||||||
(*) location vendor specific and now determined from ACPI global tables
|
(*) location vendor specific and now determined from ACPI global tables
|
||||||
|
|
||||||
2.3) APIC
|
2.3. APIC
|
||||||
|
---------
|
||||||
|
|
||||||
On Pentium and later processors, an on-board timer is available to each CPU
|
On Pentium and later processors, an on-board timer is available to each CPU
|
||||||
as part of the Advanced Programmable Interrupt Controller. The APIC is
|
as part of the Advanced Programmable Interrupt Controller. The APIC is
|
||||||
@ -276,7 +288,8 @@ timer is programmed through the LVT (local vector timer) register, is capable
|
|||||||
of one-shot or periodic operation, and is based on the bus clock divided down
|
of one-shot or periodic operation, and is based on the bus clock divided down
|
||||||
by the programmable divider register.
|
by the programmable divider register.
|
||||||
|
|
||||||
2.4) HPET
|
2.4. HPET
|
||||||
|
---------
|
||||||
|
|
||||||
HPET is quite complex, and was originally intended to replace the PIT / RTC
|
HPET is quite complex, and was originally intended to replace the PIT / RTC
|
||||||
support of the X86 PC. It remains to be seen whether that will be the case, as
|
support of the X86 PC. It remains to be seen whether that will be the case, as
|
||||||
@ -297,7 +310,8 @@ indicated through ACPI tables by the BIOS.
|
|||||||
Detailed specification of the HPET is beyond the current scope of this
|
Detailed specification of the HPET is beyond the current scope of this
|
||||||
document, as it is also very well documented elsewhere.
|
document, as it is also very well documented elsewhere.
|
||||||
|
|
||||||
2.5) Offboard Timers
|
2.5. Offboard Timers
|
||||||
|
--------------------
|
||||||
|
|
||||||
Several cards, both proprietary (watchdog boards) and commonplace (e1000) have
|
Several cards, both proprietary (watchdog boards) and commonplace (e1000) have
|
||||||
timing chips built into the cards which may have registers which are accessible
|
timing chips built into the cards which may have registers which are accessible
|
||||||
@ -307,9 +321,8 @@ general frowned upon as not playing by the agreed rules of the game. Such a
|
|||||||
timer device would require additional support to be virtualized properly and is
|
timer device would require additional support to be virtualized properly and is
|
||||||
not considered important at this time as no known operating system does this.
|
not considered important at this time as no known operating system does this.
|
||||||
|
|
||||||
=========================================================================
|
3. TSC Hardware
|
||||||
|
===============
|
||||||
3) TSC Hardware
|
|
||||||
|
|
||||||
The TSC or time stamp counter is relatively simple in theory; it counts
|
The TSC or time stamp counter is relatively simple in theory; it counts
|
||||||
instruction cycles issued by the processor, which can be used as a measure of
|
instruction cycles issued by the processor, which can be used as a measure of
|
||||||
@ -340,7 +353,8 @@ allows the guest visible TSC to be offset by a constant. Newer implementations
|
|||||||
promise to allow the TSC to additionally be scaled, but this hardware is not
|
promise to allow the TSC to additionally be scaled, but this hardware is not
|
||||||
yet widely available.
|
yet widely available.
|
||||||
|
|
||||||
3.1) TSC synchronization
|
3.1. TSC synchronization
|
||||||
|
------------------------
|
||||||
|
|
||||||
The TSC is a CPU-local clock in most implementations. This means, on SMP
|
The TSC is a CPU-local clock in most implementations. This means, on SMP
|
||||||
platforms, the TSCs of different CPUs may start at different times depending
|
platforms, the TSCs of different CPUs may start at different times depending
|
||||||
@ -357,7 +371,8 @@ practice, getting a perfectly synchronized TSC will not be possible unless all
|
|||||||
values are read from the same clock, which generally only is possible on single
|
values are read from the same clock, which generally only is possible on single
|
||||||
socket systems or those with special hardware support.
|
socket systems or those with special hardware support.
|
||||||
|
|
||||||
3.2) TSC and CPU hotplug
|
3.2. TSC and CPU hotplug
|
||||||
|
------------------------
|
||||||
|
|
||||||
As touched on already, CPUs which arrive later than the boot time of the system
|
As touched on already, CPUs which arrive later than the boot time of the system
|
||||||
may not have a TSC value that is synchronized with the rest of the system.
|
may not have a TSC value that is synchronized with the rest of the system.
|
||||||
@ -367,7 +382,8 @@ a guarantee. This can have the effect of bringing a system from a state where
|
|||||||
TSC is synchronized back to a state where TSC synchronization flaws, however
|
TSC is synchronized back to a state where TSC synchronization flaws, however
|
||||||
small, may be exposed to the OS and any virtualization environment.
|
small, may be exposed to the OS and any virtualization environment.
|
||||||
|
|
||||||
3.3) TSC and multi-socket / NUMA
|
3.3. TSC and multi-socket / NUMA
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
Multi-socket systems, especially large multi-socket systems are likely to have
|
Multi-socket systems, especially large multi-socket systems are likely to have
|
||||||
individual clocksources rather than a single, universally distributed clock.
|
individual clocksources rather than a single, universally distributed clock.
|
||||||
@ -385,7 +401,8 @@ standards for telecommunications and computer equipment.
|
|||||||
It is recommended not to trust the TSCs to remain synchronized on NUMA or
|
It is recommended not to trust the TSCs to remain synchronized on NUMA or
|
||||||
multiple socket systems for these reasons.
|
multiple socket systems for these reasons.
|
||||||
|
|
||||||
3.4) TSC and C-states
|
3.4. TSC and C-states
|
||||||
|
---------------------
|
||||||
|
|
||||||
C-states, or idling states of the processor, especially C1E and deeper sleep
|
C-states, or idling states of the processor, especially C1E and deeper sleep
|
||||||
states may be problematic for TSC as well. The TSC may stop advancing in such
|
states may be problematic for TSC as well. The TSC may stop advancing in such
|
||||||
@ -396,7 +413,8 @@ based on CPU and chipset identifications.
|
|||||||
The TSC in such a case may be corrected by catching it up to a known external
|
The TSC in such a case may be corrected by catching it up to a known external
|
||||||
clocksource.
|
clocksource.
|
||||||
|
|
||||||
3.5) TSC frequency change / P-states
|
3.5. TSC frequency change / P-states
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
To make things slightly more interesting, some CPUs may change frequency. They
|
To make things slightly more interesting, some CPUs may change frequency. They
|
||||||
may or may not run the TSC at the same rate, and because the frequency change
|
may or may not run the TSC at the same rate, and because the frequency change
|
||||||
@ -416,14 +434,16 @@ other processors. In such cases, the TSC on halted CPUs could advance faster
|
|||||||
than that of non-halted processors. AMD Turion processors are known to have
|
than that of non-halted processors. AMD Turion processors are known to have
|
||||||
this problem.
|
this problem.
|
||||||
|
|
||||||
3.6) TSC and STPCLK / T-states
|
3.6. TSC and STPCLK / T-states
|
||||||
|
------------------------------
|
||||||
|
|
||||||
External signals given to the processor may also have the effect of stopping
|
External signals given to the processor may also have the effect of stopping
|
||||||
the TSC. This is typically done for thermal emergency power control to prevent
|
the TSC. This is typically done for thermal emergency power control to prevent
|
||||||
an overheating condition, and typically, there is no way to detect that this
|
an overheating condition, and typically, there is no way to detect that this
|
||||||
condition has happened.
|
condition has happened.
|
||||||
|
|
||||||
3.7) TSC virtualization - VMX
|
3.7. TSC virtualization - VMX
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
|
VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
|
||||||
instructions, which is enough for full virtualization of TSC in any manner. In
|
instructions, which is enough for full virtualization of TSC in any manner. In
|
||||||
@ -431,14 +451,16 @@ addition, VMX allows passing through the host TSC plus an additional TSC_OFFSET
|
|||||||
field specified in the VMCS. Special instructions must be used to read and
|
field specified in the VMCS. Special instructions must be used to read and
|
||||||
write the VMCS field.
|
write the VMCS field.
|
||||||
|
|
||||||
3.8) TSC virtualization - SVM
|
3.8. TSC virtualization - SVM
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
|
SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
|
||||||
instructions, which is enough for full virtualization of TSC in any manner. In
|
instructions, which is enough for full virtualization of TSC in any manner. In
|
||||||
addition, SVM allows passing through the host TSC plus an additional offset
|
addition, SVM allows passing through the host TSC plus an additional offset
|
||||||
field specified in the SVM control block.
|
field specified in the SVM control block.
|
||||||
|
|
||||||
3.9) TSC feature bits in Linux
|
3.9. TSC feature bits in Linux
|
||||||
|
------------------------------
|
||||||
|
|
||||||
In summary, there is no way to guarantee the TSC remains in perfect
|
In summary, there is no way to guarantee the TSC remains in perfect
|
||||||
synchronization unless it is explicitly guaranteed by the architecture. Even
|
synchronization unless it is explicitly guaranteed by the architecture. Even
|
||||||
@ -448,13 +470,16 @@ despite being locally consistent.
|
|||||||
The following feature bits are used by Linux to signal various TSC attributes,
|
The following feature bits are used by Linux to signal various TSC attributes,
|
||||||
but they can only be taken to be meaningful for UP or single node systems.
|
but they can only be taken to be meaningful for UP or single node systems.
|
||||||
|
|
||||||
X86_FEATURE_TSC : The TSC is available in hardware
|
========================= =======================================
|
||||||
X86_FEATURE_RDTSCP : The RDTSCP instruction is available
|
X86_FEATURE_TSC The TSC is available in hardware
|
||||||
X86_FEATURE_CONSTANT_TSC : The TSC rate is unchanged with P-states
|
X86_FEATURE_RDTSCP The RDTSCP instruction is available
|
||||||
X86_FEATURE_NONSTOP_TSC : The TSC does not stop in C-states
|
X86_FEATURE_CONSTANT_TSC The TSC rate is unchanged with P-states
|
||||||
X86_FEATURE_TSC_RELIABLE : TSC sync checks are skipped (VMware)
|
X86_FEATURE_NONSTOP_TSC The TSC does not stop in C-states
|
||||||
|
X86_FEATURE_TSC_RELIABLE TSC sync checks are skipped (VMware)
|
||||||
|
========================= =======================================
|
||||||
|
|
||||||
4) Virtualization Problems
|
4. Virtualization Problems
|
||||||
|
==========================
|
||||||
|
|
||||||
Timekeeping is especially problematic for virtualization because a number of
|
Timekeeping is especially problematic for virtualization because a number of
|
||||||
challenges arise. The most obvious problem is that time is now shared between
|
challenges arise. The most obvious problem is that time is now shared between
|
||||||
@ -473,7 +498,8 @@ BIOS, but not in such an extreme fashion. However, the fact that SMM mode may
|
|||||||
cause similar problems to virtualization makes it a good justification for
|
cause similar problems to virtualization makes it a good justification for
|
||||||
solving many of these problems on bare metal.
|
solving many of these problems on bare metal.
|
||||||
|
|
||||||
4.1) Interrupt clocking
|
4.1. Interrupt clocking
|
||||||
|
-----------------------
|
||||||
|
|
||||||
One of the most immediate problems that occurs with legacy operating systems
|
One of the most immediate problems that occurs with legacy operating systems
|
||||||
is that the system timekeeping routines are often designed to keep track of
|
is that the system timekeeping routines are often designed to keep track of
|
||||||
@ -502,7 +528,8 @@ thus requires interrupt slewing to keep proper time. It does use a low enough
|
|||||||
rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in
|
rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in
|
||||||
practice.
|
practice.
|
||||||
|
|
||||||
4.2) TSC sampling and serialization
|
4.2. TSC sampling and serialization
|
||||||
|
-----------------------------------
|
||||||
|
|
||||||
As the highest precision time source available, the cycle counter of the CPU
|
As the highest precision time source available, the cycle counter of the CPU
|
||||||
has aroused much interest from developers. As explained above, this timer has
|
has aroused much interest from developers. As explained above, this timer has
|
||||||
@ -524,7 +551,8 @@ it may be necessary for an implementation to guard against "backwards" reads of
|
|||||||
the TSC as seen from other CPUs, even in an otherwise perfectly synchronized
|
the TSC as seen from other CPUs, even in an otherwise perfectly synchronized
|
||||||
system.
|
system.
|
||||||
|
|
||||||
4.3) Timespec aliasing
|
4.3. Timespec aliasing
|
||||||
|
----------------------
|
||||||
|
|
||||||
Additionally, this lack of serialization from the TSC poses another challenge
|
Additionally, this lack of serialization from the TSC poses another challenge
|
||||||
when using results of the TSC when measured against another time source. As
|
when using results of the TSC when measured against another time source. As
|
||||||
@ -548,7 +576,8 @@ This aliasing requires care in the computation and recalibration of kvmclock
|
|||||||
and any other values derived from TSC computation (such as TSC virtualization
|
and any other values derived from TSC computation (such as TSC virtualization
|
||||||
itself).
|
itself).
|
||||||
|
|
||||||
4.4) Migration
|
4.4. Migration
|
||||||
|
--------------
|
||||||
|
|
||||||
Migration of a virtual machine raises problems for timekeeping in two ways.
|
Migration of a virtual machine raises problems for timekeeping in two ways.
|
||||||
First, the migration itself may take time, during which interrupts cannot be
|
First, the migration itself may take time, during which interrupts cannot be
|
||||||
@ -566,7 +595,8 @@ always be caught up to the original rate. KVM clock avoids these problems by
|
|||||||
simply storing multipliers and offsets against the TSC for the guest to convert
|
simply storing multipliers and offsets against the TSC for the guest to convert
|
||||||
back into nanosecond resolution values.
|
back into nanosecond resolution values.
|
||||||
|
|
||||||
4.5) Scheduling
|
4.5. Scheduling
|
||||||
|
---------------
|
||||||
|
|
||||||
Since scheduling may be based on precise timing and firing of interrupts, the
|
Since scheduling may be based on precise timing and firing of interrupts, the
|
||||||
scheduling algorithms of an operating system may be adversely affected by
|
scheduling algorithms of an operating system may be adversely affected by
|
||||||
@ -579,7 +609,8 @@ In an attempt to work around this, several implementations have provided a
|
|||||||
paravirtualized scheduler clock, which reveals the true amount of CPU time for
|
paravirtualized scheduler clock, which reveals the true amount of CPU time for
|
||||||
which a virtual machine has been running.
|
which a virtual machine has been running.
|
||||||
|
|
||||||
4.6) Watchdogs
|
4.6. Watchdogs
|
||||||
|
--------------
|
||||||
|
|
||||||
Watchdog timers, such as the lock detector in Linux may fire accidentally when
|
Watchdog timers, such as the lock detector in Linux may fire accidentally when
|
||||||
running under hardware virtualization due to timer interrupts being delayed or
|
running under hardware virtualization due to timer interrupts being delayed or
|
||||||
@ -587,7 +618,8 @@ misinterpretation of the passage of real time. Usually, these warnings are
|
|||||||
spurious and can be ignored, but in some circumstances it may be necessary to
|
spurious and can be ignored, but in some circumstances it may be necessary to
|
||||||
disable such detection.
|
disable such detection.
|
||||||
|
|
||||||
4.7) Delays and precision timing
|
4.7. Delays and precision timing
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
Precise timing and delays may not be possible in a virtualized system. This
|
Precise timing and delays may not be possible in a virtualized system. This
|
||||||
can happen if the system is controlling physical hardware, or issues delays to
|
can happen if the system is controlling physical hardware, or issues delays to
|
||||||
@ -600,7 +632,8 @@ The second issue may cause performance problems, but this is unlikely to be a
|
|||||||
significant issue. In many cases these delays may be eliminated through
|
significant issue. In many cases these delays may be eliminated through
|
||||||
configuration or paravirtualization.
|
configuration or paravirtualization.
|
||||||
|
|
||||||
4.8) Covert channels and leaks
|
4.8. Covert channels and leaks
|
||||||
|
------------------------------
|
||||||
|
|
||||||
In addition to the above problems, time information will inevitably leak to the
|
In addition to the above problems, time information will inevitably leak to the
|
||||||
guest about the host in anything but a perfect implementation of virtualized
|
guest about the host in anything but a perfect implementation of virtualized
|
File diff suppressed because it is too large
Load Diff
85
MAINTAINERS
85
MAINTAINERS
@ -2796,11 +2796,11 @@ F: drivers/block/aoe/
|
|||||||
|
|
||||||
ATHEROS 71XX/9XXX GPIO DRIVER
|
ATHEROS 71XX/9XXX GPIO DRIVER
|
||||||
M: Alban Bedel <albeu@free.fr>
|
M: Alban Bedel <albeu@free.fr>
|
||||||
|
S: Maintained
|
||||||
W: https://github.com/AlbanBedel/linux
|
W: https://github.com/AlbanBedel/linux
|
||||||
T: git git://github.com/AlbanBedel/linux
|
T: git git://github.com/AlbanBedel/linux
|
||||||
S: Maintained
|
|
||||||
F: drivers/gpio/gpio-ath79.c
|
|
||||||
F: Documentation/devicetree/bindings/gpio/gpio-ath79.txt
|
F: Documentation/devicetree/bindings/gpio/gpio-ath79.txt
|
||||||
|
F: drivers/gpio/gpio-ath79.c
|
||||||
|
|
||||||
ATHEROS 71XX/9XXX USB PHY DRIVER
|
ATHEROS 71XX/9XXX USB PHY DRIVER
|
||||||
M: Alban Bedel <albeu@free.fr>
|
M: Alban Bedel <albeu@free.fr>
|
||||||
@ -3422,8 +3422,8 @@ BROADCOM BRCMSTB GPIO DRIVER
|
|||||||
M: Gregory Fong <gregory.0xf0@gmail.com>
|
M: Gregory Fong <gregory.0xf0@gmail.com>
|
||||||
L: bcm-kernel-feedback-list@broadcom.com
|
L: bcm-kernel-feedback-list@broadcom.com
|
||||||
S: Supported
|
S: Supported
|
||||||
F: drivers/gpio/gpio-brcmstb.c
|
|
||||||
F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
|
F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt
|
||||||
|
F: drivers/gpio/gpio-brcmstb.c
|
||||||
|
|
||||||
BROADCOM BRCMSTB I2C DRIVER
|
BROADCOM BRCMSTB I2C DRIVER
|
||||||
M: Kamal Dasu <kdasu.kdev@gmail.com>
|
M: Kamal Dasu <kdasu.kdev@gmail.com>
|
||||||
@ -3481,8 +3481,8 @@ BROADCOM KONA GPIO DRIVER
|
|||||||
M: Ray Jui <rjui@broadcom.com>
|
M: Ray Jui <rjui@broadcom.com>
|
||||||
L: bcm-kernel-feedback-list@broadcom.com
|
L: bcm-kernel-feedback-list@broadcom.com
|
||||||
S: Supported
|
S: Supported
|
||||||
F: drivers/gpio/gpio-bcm-kona.c
|
|
||||||
F: Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
|
F: Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
|
||||||
|
F: drivers/gpio/gpio-bcm-kona.c
|
||||||
|
|
||||||
BROADCOM NETXTREME-E ROCE DRIVER
|
BROADCOM NETXTREME-E ROCE DRIVER
|
||||||
M: Selvin Xavier <selvin.xavier@broadcom.com>
|
M: Selvin Xavier <selvin.xavier@broadcom.com>
|
||||||
@ -3597,8 +3597,8 @@ F: sound/pci/bt87x.c
|
|||||||
|
|
||||||
BT8XXGPIO DRIVER
|
BT8XXGPIO DRIVER
|
||||||
M: Michael Buesch <m@bues.ch>
|
M: Michael Buesch <m@bues.ch>
|
||||||
W: http://bu3sch.de/btgpio.php
|
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
W: http://bu3sch.de/btgpio.php
|
||||||
F: drivers/gpio/gpio-bt8xx.c
|
F: drivers/gpio/gpio-bt8xx.c
|
||||||
|
|
||||||
BTRFS FILE SYSTEM
|
BTRFS FILE SYSTEM
|
||||||
@ -3649,6 +3649,7 @@ F: sound/pci/oxygen/
|
|||||||
|
|
||||||
C-SKY ARCHITECTURE
|
C-SKY ARCHITECTURE
|
||||||
M: Guo Ren <guoren@kernel.org>
|
M: Guo Ren <guoren@kernel.org>
|
||||||
|
L: linux-csky@vger.kernel.org
|
||||||
T: git https://github.com/c-sky/csky-linux.git
|
T: git https://github.com/c-sky/csky-linux.git
|
||||||
S: Supported
|
S: Supported
|
||||||
F: arch/csky/
|
F: arch/csky/
|
||||||
@ -3909,7 +3910,7 @@ S: Supported
|
|||||||
F: Documentation/filesystems/ceph.txt
|
F: Documentation/filesystems/ceph.txt
|
||||||
F: fs/ceph/
|
F: fs/ceph/
|
||||||
|
|
||||||
CERTIFICATE HANDLING:
|
CERTIFICATE HANDLING
|
||||||
M: David Howells <dhowells@redhat.com>
|
M: David Howells <dhowells@redhat.com>
|
||||||
M: David Woodhouse <dwmw2@infradead.org>
|
M: David Woodhouse <dwmw2@infradead.org>
|
||||||
L: keyrings@vger.kernel.org
|
L: keyrings@vger.kernel.org
|
||||||
@ -3919,7 +3920,7 @@ F: certs/
|
|||||||
F: scripts/sign-file.c
|
F: scripts/sign-file.c
|
||||||
F: scripts/extract-cert.c
|
F: scripts/extract-cert.c
|
||||||
|
|
||||||
CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
|
CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM
|
||||||
L: devel@driverdev.osuosl.org
|
L: devel@driverdev.osuosl.org
|
||||||
S: Obsolete
|
S: Obsolete
|
||||||
F: drivers/staging/wusbcore/
|
F: drivers/staging/wusbcore/
|
||||||
@ -5932,12 +5933,12 @@ S: Maintained
|
|||||||
F: drivers/media/dvb-frontends/ec100*
|
F: drivers/media/dvb-frontends/ec100*
|
||||||
|
|
||||||
ECRYPT FILE SYSTEM
|
ECRYPT FILE SYSTEM
|
||||||
M: Tyler Hicks <tyhicks@canonical.com>
|
M: Tyler Hicks <code@tyhicks.com>
|
||||||
L: ecryptfs@vger.kernel.org
|
L: ecryptfs@vger.kernel.org
|
||||||
W: http://ecryptfs.org
|
W: http://ecryptfs.org
|
||||||
W: https://launchpad.net/ecryptfs
|
W: https://launchpad.net/ecryptfs
|
||||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs.git
|
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs.git
|
||||||
S: Supported
|
S: Odd Fixes
|
||||||
F: Documentation/filesystems/ecryptfs.txt
|
F: Documentation/filesystems/ecryptfs.txt
|
||||||
F: fs/ecryptfs/
|
F: fs/ecryptfs/
|
||||||
|
|
||||||
@ -7047,7 +7048,7 @@ L: kvm@vger.kernel.org
|
|||||||
S: Supported
|
S: Supported
|
||||||
F: drivers/uio/uio_pci_generic.c
|
F: drivers/uio/uio_pci_generic.c
|
||||||
|
|
||||||
GENERIC VDSO LIBRARY:
|
GENERIC VDSO LIBRARY
|
||||||
M: Andy Lutomirski <luto@kernel.org>
|
M: Andy Lutomirski <luto@kernel.org>
|
||||||
M: Thomas Gleixner <tglx@linutronix.de>
|
M: Thomas Gleixner <tglx@linutronix.de>
|
||||||
M: Vincenzo Frascino <vincenzo.frascino@arm.com>
|
M: Vincenzo Frascino <vincenzo.frascino@arm.com>
|
||||||
@ -7143,18 +7144,18 @@ GPIO SUBSYSTEM
|
|||||||
M: Linus Walleij <linus.walleij@linaro.org>
|
M: Linus Walleij <linus.walleij@linaro.org>
|
||||||
M: Bartosz Golaszewski <bgolaszewski@baylibre.com>
|
M: Bartosz Golaszewski <bgolaszewski@baylibre.com>
|
||||||
L: linux-gpio@vger.kernel.org
|
L: linux-gpio@vger.kernel.org
|
||||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
|
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
|
||||||
|
F: Documentation/ABI/obsolete/sysfs-gpio
|
||||||
|
F: Documentation/ABI/testing/gpio-cdev
|
||||||
|
F: Documentation/admin-guide/gpio/
|
||||||
F: Documentation/devicetree/bindings/gpio/
|
F: Documentation/devicetree/bindings/gpio/
|
||||||
F: Documentation/driver-api/gpio/
|
F: Documentation/driver-api/gpio/
|
||||||
F: Documentation/admin-guide/gpio/
|
|
||||||
F: Documentation/ABI/testing/gpio-cdev
|
|
||||||
F: Documentation/ABI/obsolete/sysfs-gpio
|
|
||||||
F: drivers/gpio/
|
F: drivers/gpio/
|
||||||
|
F: include/asm-generic/gpio.h
|
||||||
F: include/linux/gpio/
|
F: include/linux/gpio/
|
||||||
F: include/linux/gpio.h
|
F: include/linux/gpio.h
|
||||||
F: include/linux/of_gpio.h
|
F: include/linux/of_gpio.h
|
||||||
F: include/asm-generic/gpio.h
|
|
||||||
F: include/uapi/linux/gpio.h
|
F: include/uapi/linux/gpio.h
|
||||||
F: tools/gpio/
|
F: tools/gpio/
|
||||||
|
|
||||||
@ -8055,8 +8056,8 @@ F: drivers/scsi/ips.*
|
|||||||
ICH LPC AND GPIO DRIVER
|
ICH LPC AND GPIO DRIVER
|
||||||
M: Peter Tyser <ptyser@xes-inc.com>
|
M: Peter Tyser <ptyser@xes-inc.com>
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: drivers/mfd/lpc_ich.c
|
|
||||||
F: drivers/gpio/gpio-ich.c
|
F: drivers/gpio/gpio-ich.c
|
||||||
|
F: drivers/mfd/lpc_ich.c
|
||||||
|
|
||||||
ICY I2C DRIVER
|
ICY I2C DRIVER
|
||||||
M: Max Staudt <max@enpas.org>
|
M: Max Staudt <max@enpas.org>
|
||||||
@ -8392,7 +8393,7 @@ M: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
|
|||||||
M: Rodrigo Vivi <rodrigo.vivi@intel.com>
|
M: Rodrigo Vivi <rodrigo.vivi@intel.com>
|
||||||
L: intel-gfx@lists.freedesktop.org
|
L: intel-gfx@lists.freedesktop.org
|
||||||
W: https://01.org/linuxgraphics/
|
W: https://01.org/linuxgraphics/
|
||||||
B: https://01.org/linuxgraphics/documentation/how-report-bugs
|
B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
|
||||||
C: irc://chat.freenode.net/intel-gfx
|
C: irc://chat.freenode.net/intel-gfx
|
||||||
Q: http://patchwork.freedesktop.org/project/intel-gfx/
|
Q: http://patchwork.freedesktop.org/project/intel-gfx/
|
||||||
T: git git://anongit.freedesktop.org/drm-intel
|
T: git git://anongit.freedesktop.org/drm-intel
|
||||||
@ -9278,7 +9279,7 @@ F: include/keys/trusted-type.h
|
|||||||
F: security/keys/trusted.c
|
F: security/keys/trusted.c
|
||||||
F: include/keys/trusted.h
|
F: include/keys/trusted.h
|
||||||
|
|
||||||
KEYS/KEYRINGS:
|
KEYS/KEYRINGS
|
||||||
M: David Howells <dhowells@redhat.com>
|
M: David Howells <dhowells@redhat.com>
|
||||||
M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
|
M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
|
||||||
L: keyrings@vger.kernel.org
|
L: keyrings@vger.kernel.org
|
||||||
@ -11484,7 +11485,7 @@ F: drivers/scsi/mac_scsi.*
|
|||||||
F: drivers/scsi/sun3_scsi.*
|
F: drivers/scsi/sun3_scsi.*
|
||||||
F: drivers/scsi/sun3_scsi_vme.c
|
F: drivers/scsi/sun3_scsi_vme.c
|
||||||
|
|
||||||
NCSI LIBRARY:
|
NCSI LIBRARY
|
||||||
M: Samuel Mendoza-Jonas <sam@mendozajonas.com>
|
M: Samuel Mendoza-Jonas <sam@mendozajonas.com>
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: net/ncsi/
|
F: net/ncsi/
|
||||||
@ -13512,7 +13513,7 @@ L: linuxppc-dev@lists.ozlabs.org
|
|||||||
S: Maintained
|
S: Maintained
|
||||||
F: drivers/block/ps3vram.c
|
F: drivers/block/ps3vram.c
|
||||||
|
|
||||||
PSAMPLE PACKET SAMPLING SUPPORT:
|
PSAMPLE PACKET SAMPLING SUPPORT
|
||||||
M: Yotam Gigi <yotam.gi@gmail.com>
|
M: Yotam Gigi <yotam.gi@gmail.com>
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: net/psample
|
F: net/psample
|
||||||
@ -14582,10 +14583,10 @@ F: drivers/media/pci/saa7146/
|
|||||||
F: include/media/drv-intf/saa7146*
|
F: include/media/drv-intf/saa7146*
|
||||||
|
|
||||||
SAFESETID SECURITY MODULE
|
SAFESETID SECURITY MODULE
|
||||||
M: Micah Morton <mortonm@chromium.org>
|
M: Micah Morton <mortonm@chromium.org>
|
||||||
S: Supported
|
S: Supported
|
||||||
F: security/safesetid/
|
F: security/safesetid/
|
||||||
F: Documentation/admin-guide/LSM/SafeSetID.rst
|
F: Documentation/admin-guide/LSM/SafeSetID.rst
|
||||||
|
|
||||||
SAMSUNG AUDIO (ASoC) DRIVERS
|
SAMSUNG AUDIO (ASoC) DRIVERS
|
||||||
M: Krzysztof Kozlowski <krzk@kernel.org>
|
M: Krzysztof Kozlowski <krzk@kernel.org>
|
||||||
@ -16075,8 +16076,8 @@ F: Documentation/devicetree/bindings/reset/snps,axs10x-reset.txt
|
|||||||
SYNOPSYS CREG GPIO DRIVER
|
SYNOPSYS CREG GPIO DRIVER
|
||||||
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
|
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: drivers/gpio/gpio-creg-snps.c
|
|
||||||
F: Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt
|
F: Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt
|
||||||
|
F: drivers/gpio/gpio-creg-snps.c
|
||||||
|
|
||||||
SYNOPSYS DESIGNWARE 8250 UART DRIVER
|
SYNOPSYS DESIGNWARE 8250 UART DRIVER
|
||||||
R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
|
R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
|
||||||
@ -16087,8 +16088,8 @@ SYNOPSYS DESIGNWARE APB GPIO DRIVER
|
|||||||
M: Hoan Tran <hoan@os.amperecomputing.com>
|
M: Hoan Tran <hoan@os.amperecomputing.com>
|
||||||
L: linux-gpio@vger.kernel.org
|
L: linux-gpio@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: drivers/gpio/gpio-dwapb.c
|
|
||||||
F: Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
|
F: Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
|
||||||
|
F: drivers/gpio/gpio-dwapb.c
|
||||||
|
|
||||||
SYNOPSYS DESIGNWARE AXI DMAC DRIVER
|
SYNOPSYS DESIGNWARE AXI DMAC DRIVER
|
||||||
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
|
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
|
||||||
@ -16552,8 +16553,8 @@ M: Michael Jamet <michael.jamet@intel.com>
|
|||||||
M: Mika Westerberg <mika.westerberg@linux.intel.com>
|
M: Mika Westerberg <mika.westerberg@linux.intel.com>
|
||||||
M: Yehezkel Bernat <YehezkelShB@gmail.com>
|
M: Yehezkel Bernat <YehezkelShB@gmail.com>
|
||||||
L: linux-usb@vger.kernel.org
|
L: linux-usb@vger.kernel.org
|
||||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
|
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
|
||||||
F: Documentation/admin-guide/thunderbolt.rst
|
F: Documentation/admin-guide/thunderbolt.rst
|
||||||
F: drivers/thunderbolt/
|
F: drivers/thunderbolt/
|
||||||
F: include/linux/thunderbolt.h
|
F: include/linux/thunderbolt.h
|
||||||
@ -17080,7 +17081,7 @@ S: Maintained
|
|||||||
F: Documentation/admin-guide/ufs.rst
|
F: Documentation/admin-guide/ufs.rst
|
||||||
F: fs/ufs/
|
F: fs/ufs/
|
||||||
|
|
||||||
UHID USERSPACE HID IO DRIVER:
|
UHID USERSPACE HID IO DRIVER
|
||||||
M: David Herrmann <dh.herrmann@googlemail.com>
|
M: David Herrmann <dh.herrmann@googlemail.com>
|
||||||
L: linux-input@vger.kernel.org
|
L: linux-input@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
@ -17094,18 +17095,18 @@ S: Maintained
|
|||||||
F: drivers/usb/common/ulpi.c
|
F: drivers/usb/common/ulpi.c
|
||||||
F: include/linux/ulpi/
|
F: include/linux/ulpi/
|
||||||
|
|
||||||
ULTRA-WIDEBAND (UWB) SUBSYSTEM:
|
ULTRA-WIDEBAND (UWB) SUBSYSTEM
|
||||||
L: devel@driverdev.osuosl.org
|
L: devel@driverdev.osuosl.org
|
||||||
S: Obsolete
|
S: Obsolete
|
||||||
F: drivers/staging/uwb/
|
F: drivers/staging/uwb/
|
||||||
|
|
||||||
UNICODE SUBSYSTEM:
|
UNICODE SUBSYSTEM
|
||||||
M: Gabriel Krisman Bertazi <krisman@collabora.com>
|
M: Gabriel Krisman Bertazi <krisman@collabora.com>
|
||||||
L: linux-fsdevel@vger.kernel.org
|
L: linux-fsdevel@vger.kernel.org
|
||||||
S: Supported
|
S: Supported
|
||||||
F: fs/unicode/
|
F: fs/unicode/
|
||||||
|
|
||||||
UNICORE32 ARCHITECTURE:
|
UNICORE32 ARCHITECTURE
|
||||||
M: Guan Xuetao <gxt@pku.edu.cn>
|
M: Guan Xuetao <gxt@pku.edu.cn>
|
||||||
W: http://mprc.pku.edu.cn/~guanxuetao/linux
|
W: http://mprc.pku.edu.cn/~guanxuetao/linux
|
||||||
S: Maintained
|
S: Maintained
|
||||||
@ -17398,11 +17399,14 @@ F: drivers/usb/
|
|||||||
F: include/linux/usb.h
|
F: include/linux/usb.h
|
||||||
F: include/linux/usb/
|
F: include/linux/usb/
|
||||||
|
|
||||||
USB TYPEC PI3USB30532 MUX DRIVER
|
USB TYPEC BUS FOR ALTERNATE MODES
|
||||||
M: Hans de Goede <hdegoede@redhat.com>
|
M: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
L: linux-usb@vger.kernel.org
|
L: linux-usb@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: drivers/usb/typec/mux/pi3usb30532.c
|
F: Documentation/ABI/testing/sysfs-bus-typec
|
||||||
|
F: Documentation/driver-api/usb/typec_bus.rst
|
||||||
|
F: drivers/usb/typec/altmodes/
|
||||||
|
F: include/linux/usb/typec_altmode.h
|
||||||
|
|
||||||
USB TYPEC CLASS
|
USB TYPEC CLASS
|
||||||
M: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
M: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
@ -17413,14 +17417,11 @@ F: Documentation/driver-api/usb/typec.rst
|
|||||||
F: drivers/usb/typec/
|
F: drivers/usb/typec/
|
||||||
F: include/linux/usb/typec.h
|
F: include/linux/usb/typec.h
|
||||||
|
|
||||||
USB TYPEC BUS FOR ALTERNATE MODES
|
USB TYPEC PI3USB30532 MUX DRIVER
|
||||||
M: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
M: Hans de Goede <hdegoede@redhat.com>
|
||||||
L: linux-usb@vger.kernel.org
|
L: linux-usb@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: Documentation/ABI/testing/sysfs-bus-typec
|
F: drivers/usb/typec/mux/pi3usb30532.c
|
||||||
F: Documentation/driver-api/usb/typec_bus.rst
|
|
||||||
F: drivers/usb/typec/altmodes/
|
|
||||||
F: include/linux/usb/typec_altmode.h
|
|
||||||
|
|
||||||
USB TYPEC PORT CONTROLLER DRIVERS
|
USB TYPEC PORT CONTROLLER DRIVERS
|
||||||
M: Guenter Roeck <linux@roeck-us.net>
|
M: Guenter Roeck <linux@roeck-us.net>
|
||||||
@ -17797,7 +17798,7 @@ F: include/linux/vbox_utils.h
|
|||||||
F: include/uapi/linux/vbox*.h
|
F: include/uapi/linux/vbox*.h
|
||||||
F: drivers/virt/vboxguest/
|
F: drivers/virt/vboxguest/
|
||||||
|
|
||||||
VIRTUAL BOX SHARED FOLDER VFS DRIVER:
|
VIRTUAL BOX SHARED FOLDER VFS DRIVER
|
||||||
M: Hans de Goede <hdegoede@redhat.com>
|
M: Hans de Goede <hdegoede@redhat.com>
|
||||||
L: linux-fsdevel@vger.kernel.org
|
L: linux-fsdevel@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
@ -18420,8 +18421,8 @@ M: Nandor Han <nandor.han@ge.com>
|
|||||||
M: Semi Malinen <semi.malinen@ge.com>
|
M: Semi Malinen <semi.malinen@ge.com>
|
||||||
L: linux-gpio@vger.kernel.org
|
L: linux-gpio@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: drivers/gpio/gpio-xra1403.c
|
|
||||||
F: Documentation/devicetree/bindings/gpio/gpio-xra1403.txt
|
F: Documentation/devicetree/bindings/gpio/gpio-xra1403.txt
|
||||||
|
F: drivers/gpio/gpio-xra1403.c
|
||||||
|
|
||||||
XTENSA XTFPGA PLATFORM SUPPORT
|
XTENSA XTFPGA PLATFORM SUPPORT
|
||||||
M: Max Filippov <jcmvbkbc@gmail.com>
|
M: Max Filippov <jcmvbkbc@gmail.com>
|
||||||
|
2
Makefile
2
Makefile
@ -2,7 +2,7 @@
|
|||||||
VERSION = 5
|
VERSION = 5
|
||||||
PATCHLEVEL = 6
|
PATCHLEVEL = 6
|
||||||
SUBLEVEL = 0
|
SUBLEVEL = 0
|
||||||
EXTRAVERSION = -rc1
|
EXTRAVERSION = -rc3
|
||||||
NAME = Kleptomaniac Octopus
|
NAME = Kleptomaniac Octopus
|
||||||
|
|
||||||
# *DOCUMENTATION*
|
# *DOCUMENTATION*
|
||||||
|
@ -178,9 +178,6 @@
|
|||||||
phy-mode = "rgmii";
|
phy-mode = "rgmii";
|
||||||
pinctrl-0 = <&pinctrl_rgmii1 &pinctrl_rgmii1_mdio_1>;
|
pinctrl-0 = <&pinctrl_rgmii1 &pinctrl_rgmii1_mdio_1>;
|
||||||
|
|
||||||
snps,phy-bus-name = "stmmac";
|
|
||||||
snps,phy-bus-id = <0>;
|
|
||||||
snps,phy-addr = <0>;
|
|
||||||
snps,reset-gpio = <&pio0 7 0>;
|
snps,reset-gpio = <&pio0 7 0>;
|
||||||
snps,reset-active-low;
|
snps,reset-active-low;
|
||||||
snps,reset-delays-us = <0 10000 1000000>;
|
snps,reset-delays-us = <0 10000 1000000>;
|
||||||
|
@ -46,7 +46,7 @@
|
|||||||
/* DAC */
|
/* DAC */
|
||||||
format = "i2s";
|
format = "i2s";
|
||||||
mclk-fs = <256>;
|
mclk-fs = <256>;
|
||||||
frame-inversion = <1>;
|
frame-inversion;
|
||||||
cpu {
|
cpu {
|
||||||
sound-dai = <&sti_uni_player2>;
|
sound-dai = <&sti_uni_player2>;
|
||||||
};
|
};
|
||||||
|
@ -11,8 +11,6 @@ CONFIG_SLAB=y
|
|||||||
CONFIG_MODULES=y
|
CONFIG_MODULES=y
|
||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_PXA=y
|
CONFIG_ARCH_PXA=y
|
||||||
CONFIG_ARCH_GUMSTIX=y
|
CONFIG_ARCH_GUMSTIX=y
|
||||||
CONFIG_PCCARD=y
|
CONFIG_PCCARD=y
|
||||||
|
@ -25,7 +25,6 @@ CONFIG_EMBEDDED=y
|
|||||||
CONFIG_PROFILING=y
|
CONFIG_PROFILING=y
|
||||||
CONFIG_MODULES=y
|
CONFIG_MODULES=y
|
||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
CONFIG_ARCH_AXXIA=y
|
CONFIG_ARCH_AXXIA=y
|
||||||
CONFIG_GPIO_PCA953X=y
|
CONFIG_GPIO_PCA953X=y
|
||||||
CONFIG_ARM_LPAE=y
|
CONFIG_ARM_LPAE=y
|
||||||
|
@ -7,7 +7,6 @@ CONFIG_EMBEDDED=y
|
|||||||
CONFIG_SLOB=y
|
CONFIG_SLOB=y
|
||||||
CONFIG_JUMP_LABEL=y
|
CONFIG_JUMP_LABEL=y
|
||||||
CONFIG_PARTITION_ADVANCED=y
|
CONFIG_PARTITION_ADVANCED=y
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_CLPS711X=y
|
CONFIG_ARCH_CLPS711X=y
|
||||||
CONFIG_ARCH_AUTCPU12=y
|
CONFIG_ARCH_AUTCPU12=y
|
||||||
CONFIG_ARCH_CDB89712=y
|
CONFIG_ARCH_CDB89712=y
|
||||||
|
@ -17,7 +17,7 @@ CONFIG_MODULE_UNLOAD=y
|
|||||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||||
CONFIG_MODVERSIONS=y
|
CONFIG_MODVERSIONS=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
CONFIG_IOSCHED_CFQ=m
|
CONFIG_IOSCHED_BFQ=m
|
||||||
CONFIG_ARCH_MULTI_V6=y
|
CONFIG_ARCH_MULTI_V6=y
|
||||||
#CONFIG_ARCH_MULTI_V7 is not set
|
#CONFIG_ARCH_MULTI_V7 is not set
|
||||||
CONFIG_ARCH_CNS3XXX=y
|
CONFIG_ARCH_CNS3XXX=y
|
||||||
|
@ -43,7 +43,6 @@ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
|
|||||||
CONFIG_USB_MON=y
|
CONFIG_USB_MON=y
|
||||||
CONFIG_USB_STORAGE=y
|
CONFIG_USB_STORAGE=y
|
||||||
CONFIG_MMC=y
|
CONFIG_MMC=y
|
||||||
# CONFIG_MMC_BLOCK_BOUNCE is not set
|
|
||||||
CONFIG_MMC_PXA=y
|
CONFIG_MMC_PXA=y
|
||||||
CONFIG_EXT3_FS=y
|
CONFIG_EXT3_FS=y
|
||||||
CONFIG_NFS_FS=y
|
CONFIG_NFS_FS=y
|
||||||
|
@ -7,8 +7,6 @@ CONFIG_EXPERT=y
|
|||||||
# CONFIG_BASE_FULL is not set
|
# CONFIG_BASE_FULL is not set
|
||||||
# CONFIG_EPOLL is not set
|
# CONFIG_EPOLL is not set
|
||||||
CONFIG_SLOB=y
|
CONFIG_SLOB=y
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_SA1100=y
|
CONFIG_ARCH_SA1100=y
|
||||||
CONFIG_SA1100_COLLIE=y
|
CONFIG_SA1100_COLLIE=y
|
||||||
CONFIG_PCCARD=y
|
CONFIG_PCCARD=y
|
||||||
|
@ -15,8 +15,6 @@ CONFIG_MODULE_UNLOAD=y
|
|||||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||||
CONFIG_MODVERSIONS=y
|
CONFIG_MODVERSIONS=y
|
||||||
CONFIG_PARTITION_ADVANCED=y
|
CONFIG_PARTITION_ADVANCED=y
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_MULTIPLATFORM=y
|
CONFIG_ARCH_MULTIPLATFORM=y
|
||||||
CONFIG_ARCH_MULTI_V7=n
|
CONFIG_ARCH_MULTI_V7=n
|
||||||
CONFIG_ARCH_MULTI_V5=y
|
CONFIG_ARCH_MULTI_V5=y
|
||||||
|
@ -12,8 +12,6 @@ CONFIG_EMBEDDED=y
|
|||||||
# CONFIG_VM_EVENT_COUNTERS is not set
|
# CONFIG_VM_EVENT_COUNTERS is not set
|
||||||
# CONFIG_SLUB_DEBUG is not set
|
# CONFIG_SLUB_DEBUG is not set
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
# CONFIG_MMU is not set
|
# CONFIG_MMU is not set
|
||||||
CONFIG_ARM_SINGLE_ARMV7M=y
|
CONFIG_ARM_SINGLE_ARMV7M=y
|
||||||
CONFIG_ARCH_EFM32=y
|
CONFIG_ARCH_EFM32=y
|
||||||
|
@ -11,7 +11,6 @@ CONFIG_MODULE_UNLOAD=y
|
|||||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
CONFIG_PARTITION_ADVANCED=y
|
CONFIG_PARTITION_ADVANCED=y
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_EP93XX=y
|
CONFIG_ARCH_EP93XX=y
|
||||||
CONFIG_CRUNCH=y
|
CONFIG_CRUNCH=y
|
||||||
CONFIG_MACH_ADSSPHERE=y
|
CONFIG_MACH_ADSSPHERE=y
|
||||||
|
@ -9,8 +9,6 @@ CONFIG_MODULES=y
|
|||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_PXA=y
|
CONFIG_ARCH_PXA=y
|
||||||
CONFIG_ARCH_PXA_ESERIES=y
|
CONFIG_ARCH_PXA_ESERIES=y
|
||||||
# CONFIG_ARM_THUMB is not set
|
# CONFIG_ARM_THUMB is not set
|
||||||
|
@ -14,7 +14,6 @@ CONFIG_MODULE_UNLOAD=y
|
|||||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||||
CONFIG_MODVERSIONS=y
|
CONFIG_MODVERSIONS=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_PXA=y
|
CONFIG_ARCH_PXA=y
|
||||||
CONFIG_PXA_EZX=y
|
CONFIG_PXA_EZX=y
|
||||||
CONFIG_NO_HZ=y
|
CONFIG_NO_HZ=y
|
||||||
|
@ -5,8 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14
|
|||||||
CONFIG_BLK_DEV_INITRD=y
|
CONFIG_BLK_DEV_INITRD=y
|
||||||
CONFIG_MODULES=y
|
CONFIG_MODULES=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_SA1100=y
|
CONFIG_ARCH_SA1100=y
|
||||||
CONFIG_SA1100_H3600=y
|
CONFIG_SA1100_H3600=y
|
||||||
CONFIG_PCCARD=y
|
CONFIG_PCCARD=y
|
||||||
|
@ -10,7 +10,6 @@ CONFIG_MODULES=y
|
|||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_PXA=y
|
CONFIG_ARCH_PXA=y
|
||||||
CONFIG_MACH_H5000=y
|
CONFIG_MACH_H5000=y
|
||||||
CONFIG_AEABI=y
|
CONFIG_AEABI=y
|
||||||
|
@ -13,7 +13,6 @@ CONFIG_MODULE_UNLOAD=y
|
|||||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||||
CONFIG_MODVERSIONS=y
|
CONFIG_MODVERSIONS=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_PXA=y
|
CONFIG_ARCH_PXA=y
|
||||||
CONFIG_MACH_INTELMOTE2=y
|
CONFIG_MACH_INTELMOTE2=y
|
||||||
CONFIG_NO_HZ=y
|
CONFIG_NO_HZ=y
|
||||||
|
@ -32,8 +32,6 @@ CONFIG_KPROBES=y
|
|||||||
CONFIG_MODULES=y
|
CONFIG_MODULES=y
|
||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_NET=y
|
CONFIG_NET=y
|
||||||
CONFIG_PACKET=y
|
CONFIG_PACKET=y
|
||||||
CONFIG_UNIX=y
|
CONFIG_UNIX=y
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
CONFIG_CROSS_COMPILE="arm-linux-gnueabihf-"
|
|
||||||
CONFIG_HIGH_RES_TIMERS=y
|
CONFIG_HIGH_RES_TIMERS=y
|
||||||
CONFIG_PREEMPT=y
|
CONFIG_PREEMPT=y
|
||||||
CONFIG_BLK_DEV_INITRD=y
|
CONFIG_BLK_DEV_INITRD=y
|
||||||
@ -28,10 +27,7 @@ CONFIG_FLASH_SIZE=0x00080000
|
|||||||
CONFIG_ZBOOT_ROM_TEXT=0x0
|
CONFIG_ZBOOT_ROM_TEXT=0x0
|
||||||
CONFIG_ZBOOT_ROM_BSS=0x0
|
CONFIG_ZBOOT_ROM_BSS=0x0
|
||||||
CONFIG_ARM_APPENDED_DTB=y
|
CONFIG_ARM_APPENDED_DTB=y
|
||||||
# CONFIG_LBDAF is not set
|
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_BINFMT_FLAT=y
|
CONFIG_BINFMT_FLAT=y
|
||||||
CONFIG_BINFMT_ZFLAT=y
|
CONFIG_BINFMT_ZFLAT=y
|
||||||
CONFIG_BINFMT_SHARED_FLAT=y
|
CONFIG_BINFMT_SHARED_FLAT=y
|
||||||
|
@ -9,8 +9,6 @@ CONFIG_SLAB=y
|
|||||||
CONFIG_MODULES=y
|
CONFIG_MODULES=y
|
||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_PXA=y
|
CONFIG_ARCH_PXA=y
|
||||||
CONFIG_MACH_H4700=y
|
CONFIG_MACH_H4700=y
|
||||||
CONFIG_MACH_MAGICIAN=y
|
CONFIG_MACH_MAGICIAN=y
|
||||||
|
@ -15,7 +15,6 @@ CONFIG_EMBEDDED=y
|
|||||||
# CONFIG_SLUB_DEBUG is not set
|
# CONFIG_SLUB_DEBUG is not set
|
||||||
# CONFIG_COMPAT_BRK is not set
|
# CONFIG_COMPAT_BRK is not set
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
CONFIG_ARCH_MULTI_V4=y
|
CONFIG_ARCH_MULTI_V4=y
|
||||||
# CONFIG_ARCH_MULTI_V7 is not set
|
# CONFIG_ARCH_MULTI_V7 is not set
|
||||||
CONFIG_ARCH_MOXART=y
|
CONFIG_ARCH_MOXART=y
|
||||||
|
@ -25,8 +25,6 @@ CONFIG_MODULE_UNLOAD=y
|
|||||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||||
CONFIG_MODVERSIONS=y
|
CONFIG_MODVERSIONS=y
|
||||||
CONFIG_BLK_DEV_INTEGRITY=y
|
CONFIG_BLK_DEV_INTEGRITY=y
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_NET=y
|
CONFIG_NET=y
|
||||||
CONFIG_PACKET=y
|
CONFIG_PACKET=y
|
||||||
CONFIG_UNIX=y
|
CONFIG_UNIX=y
|
||||||
|
@ -18,8 +18,6 @@ CONFIG_MODULES=y
|
|||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_OMAP=y
|
CONFIG_ARCH_OMAP=y
|
||||||
CONFIG_ARCH_OMAP1=y
|
CONFIG_ARCH_OMAP1=y
|
||||||
CONFIG_OMAP_RESET_CLOCKS=y
|
CONFIG_OMAP_RESET_CLOCKS=y
|
||||||
|
@ -7,8 +7,6 @@ CONFIG_SLAB=y
|
|||||||
CONFIG_MODULES=y
|
CONFIG_MODULES=y
|
||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_PXA=y
|
CONFIG_ARCH_PXA=y
|
||||||
CONFIG_ARCH_PXA_PALM=y
|
CONFIG_ARCH_PXA_PALM=y
|
||||||
# CONFIG_MACH_PALMTX is not set
|
# CONFIG_MACH_PALMTX is not set
|
||||||
|
@ -13,8 +13,6 @@ CONFIG_MODULES=y
|
|||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_PXA=y
|
CONFIG_ARCH_PXA=y
|
||||||
CONFIG_MACH_PCM027=y
|
CONFIG_MACH_PCM027=y
|
||||||
CONFIG_MACH_PCM990_BASEBOARD=y
|
CONFIG_MACH_PCM990_BASEBOARD=y
|
||||||
|
@ -6,8 +6,6 @@ CONFIG_EXPERT=y
|
|||||||
# CONFIG_HOTPLUG is not set
|
# CONFIG_HOTPLUG is not set
|
||||||
# CONFIG_SHMEM is not set
|
# CONFIG_SHMEM is not set
|
||||||
CONFIG_MODULES=y
|
CONFIG_MODULES=y
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_SA1100=y
|
CONFIG_ARCH_SA1100=y
|
||||||
CONFIG_SA1100_PLEB=y
|
CONFIG_SA1100_PLEB=y
|
||||||
CONFIG_ZBOOT_ROM_TEXT=0x0
|
CONFIG_ZBOOT_ROM_TEXT=0x0
|
||||||
|
@ -8,7 +8,6 @@ CONFIG_SLAB=y
|
|||||||
CONFIG_MODULES=y
|
CONFIG_MODULES=y
|
||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_MULTI_V6=y
|
CONFIG_ARCH_MULTI_V6=y
|
||||||
CONFIG_ARCH_REALVIEW=y
|
CONFIG_ARCH_REALVIEW=y
|
||||||
CONFIG_MACH_REALVIEW_EB=y
|
CONFIG_MACH_REALVIEW_EB=y
|
||||||
|
@ -14,8 +14,6 @@ CONFIG_MODULE_FORCE_LOAD=y
|
|||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
CONFIG_MODULE_FORCE_UNLOAD=y
|
CONFIG_MODULE_FORCE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_AT91=y
|
CONFIG_ARCH_AT91=y
|
||||||
CONFIG_SOC_SAMA5D2=y
|
CONFIG_SOC_SAMA5D2=y
|
||||||
CONFIG_SOC_SAMA5D3=y
|
CONFIG_SOC_SAMA5D3=y
|
||||||
@ -182,7 +180,6 @@ CONFIG_USB_GADGET=y
|
|||||||
CONFIG_USB_ATMEL_USBA=y
|
CONFIG_USB_ATMEL_USBA=y
|
||||||
CONFIG_USB_G_SERIAL=y
|
CONFIG_USB_G_SERIAL=y
|
||||||
CONFIG_MMC=y
|
CONFIG_MMC=y
|
||||||
# CONFIG_MMC_BLOCK_BOUNCE is not set
|
|
||||||
CONFIG_MMC_SDHCI=y
|
CONFIG_MMC_SDHCI=y
|
||||||
CONFIG_MMC_SDHCI_PLTFM=y
|
CONFIG_MMC_SDHCI_PLTFM=y
|
||||||
CONFIG_MMC_SDHCI_OF_AT91=y
|
CONFIG_MMC_SDHCI_OF_AT91=y
|
||||||
|
@ -14,8 +14,6 @@ CONFIG_EMBEDDED=y
|
|||||||
# CONFIG_VM_EVENT_COUNTERS is not set
|
# CONFIG_VM_EVENT_COUNTERS is not set
|
||||||
# CONFIG_SLUB_DEBUG is not set
|
# CONFIG_SLUB_DEBUG is not set
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
# CONFIG_MMU is not set
|
# CONFIG_MMU is not set
|
||||||
CONFIG_ARCH_STM32=y
|
CONFIG_ARCH_STM32=y
|
||||||
CONFIG_CPU_V7M_NUM_IRQ=240
|
CONFIG_CPU_V7M_NUM_IRQ=240
|
||||||
|
@ -85,6 +85,7 @@ CONFIG_BATTERY_AXP20X=y
|
|||||||
CONFIG_AXP20X_POWER=y
|
CONFIG_AXP20X_POWER=y
|
||||||
CONFIG_THERMAL=y
|
CONFIG_THERMAL=y
|
||||||
CONFIG_CPU_THERMAL=y
|
CONFIG_CPU_THERMAL=y
|
||||||
|
CONFIG_SUN8I_THERMAL=y
|
||||||
CONFIG_WATCHDOG=y
|
CONFIG_WATCHDOG=y
|
||||||
CONFIG_SUNXI_WATCHDOG=y
|
CONFIG_SUNXI_WATCHDOG=y
|
||||||
CONFIG_MFD_AC100=y
|
CONFIG_MFD_AC100=y
|
||||||
|
@ -11,7 +11,6 @@ CONFIG_MODULES=y
|
|||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
CONFIG_PARTITION_ADVANCED=y
|
CONFIG_PARTITION_ADVANCED=y
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
# CONFIG_ARCH_MULTI_V7 is not set
|
# CONFIG_ARCH_MULTI_V7 is not set
|
||||||
CONFIG_ARCH_U300=y
|
CONFIG_ARCH_U300=y
|
||||||
CONFIG_MACH_U300_SPIDUMMY=y
|
CONFIG_MACH_U300_SPIDUMMY=y
|
||||||
@ -46,7 +45,6 @@ CONFIG_FB=y
|
|||||||
CONFIG_BACKLIGHT_CLASS_DEVICE=y
|
CONFIG_BACKLIGHT_CLASS_DEVICE=y
|
||||||
# CONFIG_USB_SUPPORT is not set
|
# CONFIG_USB_SUPPORT is not set
|
||||||
CONFIG_MMC=y
|
CONFIG_MMC=y
|
||||||
# CONFIG_MMC_BLOCK_BOUNCE is not set
|
|
||||||
CONFIG_MMC_ARMMMCI=y
|
CONFIG_MMC_ARMMMCI=y
|
||||||
CONFIG_RTC_CLASS=y
|
CONFIG_RTC_CLASS=y
|
||||||
# CONFIG_RTC_HCTOSYS is not set
|
# CONFIG_RTC_HCTOSYS is not set
|
||||||
|
@ -15,8 +15,6 @@ CONFIG_OPROFILE=y
|
|||||||
CONFIG_MODULES=y
|
CONFIG_MODULES=y
|
||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_DEADLINE is not set
|
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_VEXPRESS=y
|
CONFIG_ARCH_VEXPRESS=y
|
||||||
CONFIG_ARCH_VEXPRESS_DCSCB=y
|
CONFIG_ARCH_VEXPRESS_DCSCB=y
|
||||||
CONFIG_ARCH_VEXPRESS_TC2_PM=y
|
CONFIG_ARCH_VEXPRESS_TC2_PM=y
|
||||||
|
@ -9,7 +9,6 @@ CONFIG_SLAB=y
|
|||||||
CONFIG_MODULES=y
|
CONFIG_MODULES=y
|
||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_PXA=y
|
CONFIG_ARCH_PXA=y
|
||||||
CONFIG_ARCH_VIPER=y
|
CONFIG_ARCH_VIPER=y
|
||||||
CONFIG_IWMMXT=y
|
CONFIG_IWMMXT=y
|
||||||
|
@ -4,7 +4,6 @@ CONFIG_LOG_BUF_SHIFT=13
|
|||||||
CONFIG_MODULES=y
|
CONFIG_MODULES=y
|
||||||
CONFIG_MODULE_UNLOAD=y
|
CONFIG_MODULE_UNLOAD=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_PXA=y
|
CONFIG_ARCH_PXA=y
|
||||||
CONFIG_MACH_ARCOM_ZEUS=y
|
CONFIG_MACH_ARCOM_ZEUS=y
|
||||||
CONFIG_PCCARD=m
|
CONFIG_PCCARD=m
|
||||||
@ -137,7 +136,6 @@ CONFIG_USB_MASS_STORAGE=m
|
|||||||
CONFIG_USB_G_SERIAL=m
|
CONFIG_USB_G_SERIAL=m
|
||||||
CONFIG_USB_G_PRINTER=m
|
CONFIG_USB_G_PRINTER=m
|
||||||
CONFIG_MMC=y
|
CONFIG_MMC=y
|
||||||
# CONFIG_MMC_BLOCK_BOUNCE is not set
|
|
||||||
CONFIG_MMC_PXA=y
|
CONFIG_MMC_PXA=y
|
||||||
CONFIG_NEW_LEDS=y
|
CONFIG_NEW_LEDS=y
|
||||||
CONFIG_LEDS_CLASS=m
|
CONFIG_LEDS_CLASS=m
|
||||||
|
@ -16,7 +16,6 @@ CONFIG_EMBEDDED=y
|
|||||||
CONFIG_PERF_EVENTS=y
|
CONFIG_PERF_EVENTS=y
|
||||||
CONFIG_SLAB=y
|
CONFIG_SLAB=y
|
||||||
# CONFIG_BLK_DEV_BSG is not set
|
# CONFIG_BLK_DEV_BSG is not set
|
||||||
# CONFIG_IOSCHED_CFQ is not set
|
|
||||||
CONFIG_ARCH_ZX=y
|
CONFIG_ARCH_ZX=y
|
||||||
CONFIG_SOC_ZX296702=y
|
CONFIG_SOC_ZX296702=y
|
||||||
# CONFIG_SWP_EMULATE is not set
|
# CONFIG_SWP_EMULATE is not set
|
||||||
|
@ -78,13 +78,10 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old,
|
|||||||
{
|
{
|
||||||
unsigned long replaced;
|
unsigned long replaced;
|
||||||
|
|
||||||
if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
|
if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
|
||||||
old = __opcode_to_mem_thumb32(old);
|
old = __opcode_to_mem_thumb32(old);
|
||||||
new = __opcode_to_mem_thumb32(new);
|
else
|
||||||
} else {
|
|
||||||
old = __opcode_to_mem_arm(old);
|
old = __opcode_to_mem_arm(old);
|
||||||
new = __opcode_to_mem_arm(new);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (validate) {
|
if (validate) {
|
||||||
if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE))
|
if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE))
|
||||||
|
@ -16,10 +16,10 @@ struct patch {
|
|||||||
unsigned int insn;
|
unsigned int insn;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_MMU
|
||||||
static DEFINE_RAW_SPINLOCK(patch_lock);
|
static DEFINE_RAW_SPINLOCK(patch_lock);
|
||||||
|
|
||||||
static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
||||||
__acquires(&patch_lock)
|
|
||||||
{
|
{
|
||||||
unsigned int uintaddr = (uintptr_t) addr;
|
unsigned int uintaddr = (uintptr_t) addr;
|
||||||
bool module = !core_kernel_text(uintaddr);
|
bool module = !core_kernel_text(uintaddr);
|
||||||
@ -34,8 +34,6 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
|||||||
|
|
||||||
if (flags)
|
if (flags)
|
||||||
raw_spin_lock_irqsave(&patch_lock, *flags);
|
raw_spin_lock_irqsave(&patch_lock, *flags);
|
||||||
else
|
|
||||||
__acquire(&patch_lock);
|
|
||||||
|
|
||||||
set_fixmap(fixmap, page_to_phys(page));
|
set_fixmap(fixmap, page_to_phys(page));
|
||||||
|
|
||||||
@ -43,15 +41,19 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
|
static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
|
||||||
__releases(&patch_lock)
|
|
||||||
{
|
{
|
||||||
clear_fixmap(fixmap);
|
clear_fixmap(fixmap);
|
||||||
|
|
||||||
if (flags)
|
if (flags)
|
||||||
raw_spin_unlock_irqrestore(&patch_lock, *flags);
|
raw_spin_unlock_irqrestore(&patch_lock, *flags);
|
||||||
else
|
|
||||||
__release(&patch_lock);
|
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
|
||||||
|
{
|
||||||
|
return addr;
|
||||||
|
}
|
||||||
|
static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { }
|
||||||
|
#endif
|
||||||
|
|
||||||
void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
|
void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
|
||||||
{
|
{
|
||||||
@ -64,8 +66,6 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
|
|||||||
|
|
||||||
if (remap)
|
if (remap)
|
||||||
waddr = patch_map(addr, FIX_TEXT_POKE0, &flags);
|
waddr = patch_map(addr, FIX_TEXT_POKE0, &flags);
|
||||||
else
|
|
||||||
__acquire(&patch_lock);
|
|
||||||
|
|
||||||
if (thumb2 && __opcode_is_thumb16(insn)) {
|
if (thumb2 && __opcode_is_thumb16(insn)) {
|
||||||
*(u16 *)waddr = __opcode_to_mem_thumb16(insn);
|
*(u16 *)waddr = __opcode_to_mem_thumb16(insn);
|
||||||
@ -102,8 +102,7 @@ void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
|
|||||||
if (waddr != addr) {
|
if (waddr != addr) {
|
||||||
flush_kernel_vmap_range(waddr, twopage ? size / 2 : size);
|
flush_kernel_vmap_range(waddr, twopage ? size / 2 : size);
|
||||||
patch_unmap(FIX_TEXT_POKE0, &flags);
|
patch_unmap(FIX_TEXT_POKE0, &flags);
|
||||||
} else
|
}
|
||||||
__release(&patch_lock);
|
|
||||||
|
|
||||||
flush_icache_range((uintptr_t)(addr),
|
flush_icache_range((uintptr_t)(addr),
|
||||||
(uintptr_t)(addr) + size);
|
(uintptr_t)(addr) + size);
|
||||||
|
@ -11,7 +11,7 @@ config ARCH_NPCM7XX
|
|||||||
depends on ARCH_MULTI_V7
|
depends on ARCH_MULTI_V7
|
||||||
select PINCTRL_NPCM7XX
|
select PINCTRL_NPCM7XX
|
||||||
select NPCM7XX_TIMER
|
select NPCM7XX_TIMER
|
||||||
select ARCH_REQUIRE_GPIOLIB
|
select GPIOLIB
|
||||||
select CACHE_L2X0
|
select CACHE_L2X0
|
||||||
select ARM_GIC
|
select ARM_GIC
|
||||||
select HAVE_ARM_TWD if SMP
|
select HAVE_ARM_TWD if SMP
|
||||||
|
@ -161,10 +161,10 @@
|
|||||||
bus-range = <0x0 0x1>;
|
bus-range = <0x0 0x1>;
|
||||||
reg = <0x0 0x40000000 0x0 0x10000000>;
|
reg = <0x0 0x40000000 0x0 0x10000000>;
|
||||||
ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>;
|
ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>;
|
||||||
interrupt-map = <0 0 0 1 &gic GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
|
interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
|
||||||
<0 0 0 2 &gic GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>,
|
<0 0 0 2 &gic 0 0 GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>,
|
||||||
<0 0 0 3 &gic GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>,
|
<0 0 0 3 &gic 0 0 GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>,
|
||||||
<0 0 0 4 &gic GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>;
|
<0 0 0 4 &gic 0 0 GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>;
|
||||||
interrupt-map-mask = <0x0 0x0 0x0 0x7>;
|
interrupt-map-mask = <0x0 0x0 0x0 0x7>;
|
||||||
msi-map = <0x0 &its 0x0 0x10000>;
|
msi-map = <0x0 &its 0x0 0x10000>;
|
||||||
iommu-map = <0x0 &smmu 0x0 0x10000>;
|
iommu-map = <0x0 &smmu 0x0 0x10000>;
|
||||||
|
@ -452,6 +452,7 @@ CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
|
|||||||
CONFIG_CPU_THERMAL=y
|
CONFIG_CPU_THERMAL=y
|
||||||
CONFIG_THERMAL_EMULATION=y
|
CONFIG_THERMAL_EMULATION=y
|
||||||
CONFIG_QORIQ_THERMAL=m
|
CONFIG_QORIQ_THERMAL=m
|
||||||
|
CONFIG_SUN8I_THERMAL=y
|
||||||
CONFIG_ROCKCHIP_THERMAL=m
|
CONFIG_ROCKCHIP_THERMAL=m
|
||||||
CONFIG_RCAR_THERMAL=y
|
CONFIG_RCAR_THERMAL=y
|
||||||
CONFIG_RCAR_GEN3_THERMAL=y
|
CONFIG_RCAR_GEN3_THERMAL=y
|
||||||
@ -547,6 +548,7 @@ CONFIG_ROCKCHIP_DW_MIPI_DSI=y
|
|||||||
CONFIG_ROCKCHIP_INNO_HDMI=y
|
CONFIG_ROCKCHIP_INNO_HDMI=y
|
||||||
CONFIG_DRM_RCAR_DU=m
|
CONFIG_DRM_RCAR_DU=m
|
||||||
CONFIG_DRM_SUN4I=m
|
CONFIG_DRM_SUN4I=m
|
||||||
|
CONFIG_DRM_SUN6I_DSI=m
|
||||||
CONFIG_DRM_SUN8I_DW_HDMI=m
|
CONFIG_DRM_SUN8I_DW_HDMI=m
|
||||||
CONFIG_DRM_SUN8I_MIXER=m
|
CONFIG_DRM_SUN8I_MIXER=m
|
||||||
CONFIG_DRM_MSM=m
|
CONFIG_DRM_MSM=m
|
||||||
@ -681,7 +683,7 @@ CONFIG_RTC_DRV_SNVS=m
|
|||||||
CONFIG_RTC_DRV_IMX_SC=m
|
CONFIG_RTC_DRV_IMX_SC=m
|
||||||
CONFIG_RTC_DRV_XGENE=y
|
CONFIG_RTC_DRV_XGENE=y
|
||||||
CONFIG_DMADEVICES=y
|
CONFIG_DMADEVICES=y
|
||||||
CONFIG_DMA_BCM2835=m
|
CONFIG_DMA_BCM2835=y
|
||||||
CONFIG_DMA_SUN6I=m
|
CONFIG_DMA_SUN6I=m
|
||||||
CONFIG_FSL_EDMA=y
|
CONFIG_FSL_EDMA=y
|
||||||
CONFIG_IMX_SDMA=y
|
CONFIG_IMX_SDMA=y
|
||||||
|
@ -33,7 +33,6 @@ static inline u32 disr_to_esr(u64 disr)
|
|||||||
|
|
||||||
asmlinkage void enter_from_user_mode(void);
|
asmlinkage void enter_from_user_mode(void);
|
||||||
void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
|
void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
|
||||||
void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
|
|
||||||
void do_undefinstr(struct pt_regs *regs);
|
void do_undefinstr(struct pt_regs *regs);
|
||||||
asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
|
asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
|
||||||
void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
|
void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
|
||||||
@ -47,7 +46,4 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
|
|||||||
void do_cp15instr(unsigned int esr, struct pt_regs *regs);
|
void do_cp15instr(unsigned int esr, struct pt_regs *regs);
|
||||||
void do_el0_svc(struct pt_regs *regs);
|
void do_el0_svc(struct pt_regs *regs);
|
||||||
void do_el0_svc_compat(struct pt_regs *regs);
|
void do_el0_svc_compat(struct pt_regs *regs);
|
||||||
void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr,
|
|
||||||
struct pt_regs *regs);
|
|
||||||
|
|
||||||
#endif /* __ASM_EXCEPTION_H */
|
#endif /* __ASM_EXCEPTION_H */
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
#ifdef CONFIG_ARM64_LSE_ATOMICS
|
#ifdef CONFIG_ARM64_LSE_ATOMICS
|
||||||
|
|
||||||
#define __LSE_PREAMBLE ".arch armv8-a+lse\n"
|
#define __LSE_PREAMBLE ".arch_extension lse\n"
|
||||||
|
|
||||||
#include <linux/compiler_types.h>
|
#include <linux/compiler_types.h>
|
||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
|
@ -213,7 +213,7 @@ static inline unsigned long kaslr_offset(void)
|
|||||||
((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55))
|
((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55))
|
||||||
|
|
||||||
#define untagged_addr(addr) ({ \
|
#define untagged_addr(addr) ({ \
|
||||||
u64 __addr = (__force u64)addr; \
|
u64 __addr = (__force u64)(addr); \
|
||||||
__addr &= __untagged_addr(__addr); \
|
__addr &= __untagged_addr(__addr); \
|
||||||
(__force __typeof__(addr))__addr; \
|
(__force __typeof__(addr))__addr; \
|
||||||
})
|
})
|
||||||
|
@ -18,6 +18,10 @@
|
|||||||
* See:
|
* See:
|
||||||
* https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net
|
* https://lore.kernel.org/lkml/20200110100612.GC2827@hirez.programming.kicks-ass.net
|
||||||
*/
|
*/
|
||||||
#define vcpu_is_preempted(cpu) false
|
#define vcpu_is_preempted vcpu_is_preempted
|
||||||
|
static inline bool vcpu_is_preempted(int cpu)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* __ASM_SPINLOCK_H */
|
#endif /* __ASM_SPINLOCK_H */
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
#include <asm/archrandom.h>
|
||||||
#include <asm/cacheflush.h>
|
#include <asm/cacheflush.h>
|
||||||
#include <asm/fixmap.h>
|
#include <asm/fixmap.h>
|
||||||
#include <asm/kernel-pgtable.h>
|
#include <asm/kernel-pgtable.h>
|
||||||
|
@ -466,6 +466,13 @@ static void ssbs_thread_switch(struct task_struct *next)
|
|||||||
if (unlikely(next->flags & PF_KTHREAD))
|
if (unlikely(next->flags & PF_KTHREAD))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If all CPUs implement the SSBS extension, then we just need to
|
||||||
|
* context-switch the PSTATE field.
|
||||||
|
*/
|
||||||
|
if (cpu_have_feature(cpu_feature(SSBS)))
|
||||||
|
return;
|
||||||
|
|
||||||
/* If the mitigation is enabled, then we leave SSBS clear. */
|
/* If the mitigation is enabled, then we leave SSBS clear. */
|
||||||
if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
|
if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
|
||||||
test_tsk_thread_flag(next, TIF_SSBD))
|
test_tsk_thread_flag(next, TIF_SSBD))
|
||||||
@ -608,8 +615,6 @@ long get_tagged_addr_ctrl(void)
|
|||||||
* only prevents the tagged address ABI enabling via prctl() and does not
|
* only prevents the tagged address ABI enabling via prctl() and does not
|
||||||
* disable it for tasks that already opted in to the relaxed ABI.
|
* disable it for tasks that already opted in to the relaxed ABI.
|
||||||
*/
|
*/
|
||||||
static int zero;
|
|
||||||
static int one = 1;
|
|
||||||
|
|
||||||
static struct ctl_table tagged_addr_sysctl_table[] = {
|
static struct ctl_table tagged_addr_sysctl_table[] = {
|
||||||
{
|
{
|
||||||
@ -618,8 +623,8 @@ static struct ctl_table tagged_addr_sysctl_table[] = {
|
|||||||
.data = &tagged_addr_disabled,
|
.data = &tagged_addr_disabled,
|
||||||
.maxlen = sizeof(int),
|
.maxlen = sizeof(int),
|
||||||
.proc_handler = proc_dointvec_minmax,
|
.proc_handler = proc_dointvec_minmax,
|
||||||
.extra1 = &zero,
|
.extra1 = SYSCTL_ZERO,
|
||||||
.extra2 = &one,
|
.extra2 = SYSCTL_ONE,
|
||||||
},
|
},
|
||||||
{ }
|
{ }
|
||||||
};
|
};
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
#include <linux/irq.h>
|
#include <linux/irq.h>
|
||||||
#include <linux/delay.h>
|
#include <linux/delay.h>
|
||||||
#include <linux/clocksource.h>
|
#include <linux/clocksource.h>
|
||||||
#include <linux/clk-provider.h>
|
#include <linux/of_clk.h>
|
||||||
#include <linux/acpi.h>
|
#include <linux/acpi.h>
|
||||||
|
|
||||||
#include <clocksource/arm_arch_timer.h>
|
#include <clocksource/arm_arch_timer.h>
|
||||||
|
@ -9,7 +9,6 @@ config CSKY
|
|||||||
select ARCH_USE_QUEUED_RWLOCKS if NR_CPUS>2
|
select ARCH_USE_QUEUED_RWLOCKS if NR_CPUS>2
|
||||||
select COMMON_CLK
|
select COMMON_CLK
|
||||||
select CLKSRC_MMIO
|
select CLKSRC_MMIO
|
||||||
select CLKSRC_OF
|
|
||||||
select CSKY_MPINTC if CPU_CK860
|
select CSKY_MPINTC if CPU_CK860
|
||||||
select CSKY_MP_TIMER if CPU_CK860
|
select CSKY_MP_TIMER if CPU_CK860
|
||||||
select CSKY_APB_INTC
|
select CSKY_APB_INTC
|
||||||
@ -37,6 +36,7 @@ config CSKY
|
|||||||
select GX6605S_TIMER if CPU_CK610
|
select GX6605S_TIMER if CPU_CK610
|
||||||
select HAVE_ARCH_TRACEHOOK
|
select HAVE_ARCH_TRACEHOOK
|
||||||
select HAVE_ARCH_AUDITSYSCALL
|
select HAVE_ARCH_AUDITSYSCALL
|
||||||
|
select HAVE_COPY_THREAD_TLS
|
||||||
select HAVE_DYNAMIC_FTRACE
|
select HAVE_DYNAMIC_FTRACE
|
||||||
select HAVE_FUNCTION_TRACER
|
select HAVE_FUNCTION_TRACER
|
||||||
select HAVE_FUNCTION_GRAPH_TRACER
|
select HAVE_FUNCTION_GRAPH_TRACER
|
||||||
@ -47,8 +47,8 @@ config CSKY
|
|||||||
select HAVE_PERF_EVENTS
|
select HAVE_PERF_EVENTS
|
||||||
select HAVE_PERF_REGS
|
select HAVE_PERF_REGS
|
||||||
select HAVE_PERF_USER_STACK_DUMP
|
select HAVE_PERF_USER_STACK_DUMP
|
||||||
select HAVE_DMA_API_DEBUG
|
|
||||||
select HAVE_DMA_CONTIGUOUS
|
select HAVE_DMA_CONTIGUOUS
|
||||||
|
select HAVE_STACKPROTECTOR
|
||||||
select HAVE_SYSCALL_TRACEPOINTS
|
select HAVE_SYSCALL_TRACEPOINTS
|
||||||
select MAY_HAVE_SPARSE_IRQ
|
select MAY_HAVE_SPARSE_IRQ
|
||||||
select MODULES_USE_ELF_RELA if MODULES
|
select MODULES_USE_ELF_RELA if MODULES
|
||||||
@ -59,6 +59,11 @@ config CSKY
|
|||||||
select TIMER_OF
|
select TIMER_OF
|
||||||
select USB_ARCH_HAS_EHCI
|
select USB_ARCH_HAS_EHCI
|
||||||
select USB_ARCH_HAS_OHCI
|
select USB_ARCH_HAS_OHCI
|
||||||
|
select GENERIC_PCI_IOMAP
|
||||||
|
select HAVE_PCI
|
||||||
|
select PCI_DOMAINS_GENERIC if PCI
|
||||||
|
select PCI_SYSCALL if PCI
|
||||||
|
select PCI_MSI if PCI
|
||||||
|
|
||||||
config CPU_HAS_CACHEV2
|
config CPU_HAS_CACHEV2
|
||||||
bool
|
bool
|
||||||
@ -75,7 +80,7 @@ config CPU_HAS_TLBI
|
|||||||
config CPU_HAS_LDSTEX
|
config CPU_HAS_LDSTEX
|
||||||
bool
|
bool
|
||||||
help
|
help
|
||||||
For SMP, CPU needs "ldex&stex" instrcutions to atomic operations.
|
For SMP, CPU needs "ldex&stex" instructions for atomic operations.
|
||||||
|
|
||||||
config CPU_NEED_TLBSYNC
|
config CPU_NEED_TLBSYNC
|
||||||
bool
|
bool
|
||||||
@ -188,6 +193,40 @@ config CPU_PM_STOP
|
|||||||
bool "stop"
|
bool "stop"
|
||||||
endchoice
|
endchoice
|
||||||
|
|
||||||
|
menuconfig HAVE_TCM
|
||||||
|
bool "Tightly-Coupled/Sram Memory"
|
||||||
|
select GENERIC_ALLOCATOR
|
||||||
|
help
|
||||||
|
The implementation are not only used by TCM (Tightly-Coupled Meory)
|
||||||
|
but also used by sram on SOC bus. It follow existed linux tcm
|
||||||
|
software interface, so that old tcm application codes could be
|
||||||
|
re-used directly.
|
||||||
|
|
||||||
|
if HAVE_TCM
|
||||||
|
config ITCM_RAM_BASE
|
||||||
|
hex "ITCM ram base"
|
||||||
|
default 0xffffffff
|
||||||
|
|
||||||
|
config ITCM_NR_PAGES
|
||||||
|
int "Page count of ITCM size: NR*4KB"
|
||||||
|
range 1 256
|
||||||
|
default 32
|
||||||
|
|
||||||
|
config HAVE_DTCM
|
||||||
|
bool "DTCM Support"
|
||||||
|
|
||||||
|
config DTCM_RAM_BASE
|
||||||
|
hex "DTCM ram base"
|
||||||
|
depends on HAVE_DTCM
|
||||||
|
default 0xffffffff
|
||||||
|
|
||||||
|
config DTCM_NR_PAGES
|
||||||
|
int "Page count of DTCM size: NR*4KB"
|
||||||
|
depends on HAVE_DTCM
|
||||||
|
range 1 256
|
||||||
|
default 32
|
||||||
|
endif
|
||||||
|
|
||||||
config CPU_HAS_VDSP
|
config CPU_HAS_VDSP
|
||||||
bool "CPU has VDSP coprocessor"
|
bool "CPU has VDSP coprocessor"
|
||||||
depends on CPU_HAS_FPU && CPU_HAS_FPUV2
|
depends on CPU_HAS_FPU && CPU_HAS_FPUV2
|
||||||
@ -196,6 +235,10 @@ config CPU_HAS_FPU
|
|||||||
bool "CPU has FPU coprocessor"
|
bool "CPU has FPU coprocessor"
|
||||||
depends on CPU_CK807 || CPU_CK810 || CPU_CK860
|
depends on CPU_CK807 || CPU_CK810 || CPU_CK860
|
||||||
|
|
||||||
|
config CPU_HAS_ICACHE_INS
|
||||||
|
bool "CPU has Icache invalidate instructions"
|
||||||
|
depends on CPU_HAS_CACHEV2
|
||||||
|
|
||||||
config CPU_HAS_TEE
|
config CPU_HAS_TEE
|
||||||
bool "CPU has Trusted Execution Environment"
|
bool "CPU has Trusted Execution Environment"
|
||||||
depends on CPU_CK810
|
depends on CPU_CK810
|
||||||
@ -235,4 +278,6 @@ config HOTPLUG_CPU
|
|||||||
Say N if you want to disable CPU hotplug.
|
Say N if you want to disable CPU hotplug.
|
||||||
endmenu
|
endmenu
|
||||||
|
|
||||||
|
source "arch/csky/Kconfig.platforms"
|
||||||
|
|
||||||
source "kernel/Kconfig.hz"
|
source "kernel/Kconfig.hz"
|
||||||
|
9
arch/csky/Kconfig.platforms
Normal file
9
arch/csky/Kconfig.platforms
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
menu "Platform drivers selection"
|
||||||
|
|
||||||
|
config ARCH_CSKY_DW_APB_ICTL
|
||||||
|
bool "Select dw-apb interrupt controller"
|
||||||
|
select DW_APB_ICTL
|
||||||
|
default y
|
||||||
|
help
|
||||||
|
This enables support for snps dw-apb-ictl
|
||||||
|
endmenu
|
@ -48,9 +48,8 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, u
|
|||||||
|
|
||||||
#define flush_icache_page(vma, page) do {} while (0);
|
#define flush_icache_page(vma, page) do {} while (0);
|
||||||
#define flush_icache_range(start, end) cache_wbinv_range(start, end)
|
#define flush_icache_range(start, end) cache_wbinv_range(start, end)
|
||||||
|
#define flush_icache_mm_range(mm, start, end) cache_wbinv_range(start, end)
|
||||||
#define flush_icache_user_range(vma,page,addr,len) \
|
#define flush_icache_deferred(mm) do {} while (0);
|
||||||
flush_dcache_page(page)
|
|
||||||
|
|
||||||
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
|
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
|
||||||
do { \
|
do { \
|
||||||
|
@ -16,14 +16,16 @@
|
|||||||
#define LSAVE_A4 40
|
#define LSAVE_A4 40
|
||||||
#define LSAVE_A5 44
|
#define LSAVE_A5 44
|
||||||
|
|
||||||
|
#define usp ss1
|
||||||
|
|
||||||
.macro USPTOKSP
|
.macro USPTOKSP
|
||||||
mtcr sp, ss1
|
mtcr sp, usp
|
||||||
mfcr sp, ss0
|
mfcr sp, ss0
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro KSPTOUSP
|
.macro KSPTOUSP
|
||||||
mtcr sp, ss0
|
mtcr sp, ss0
|
||||||
mfcr sp, ss1
|
mfcr sp, usp
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro SAVE_ALL epc_inc
|
.macro SAVE_ALL epc_inc
|
||||||
@ -45,7 +47,13 @@
|
|||||||
add lr, r13
|
add lr, r13
|
||||||
stw lr, (sp, 8)
|
stw lr, (sp, 8)
|
||||||
|
|
||||||
|
mov lr, sp
|
||||||
|
addi lr, 32
|
||||||
|
addi lr, 32
|
||||||
|
addi lr, 16
|
||||||
|
bt 2f
|
||||||
mfcr lr, ss1
|
mfcr lr, ss1
|
||||||
|
2:
|
||||||
stw lr, (sp, 16)
|
stw lr, (sp, 16)
|
||||||
|
|
||||||
stw a0, (sp, 20)
|
stw a0, (sp, 20)
|
||||||
@ -79,9 +87,10 @@
|
|||||||
ldw a0, (sp, 12)
|
ldw a0, (sp, 12)
|
||||||
mtcr a0, epsr
|
mtcr a0, epsr
|
||||||
btsti a0, 31
|
btsti a0, 31
|
||||||
|
bt 1f
|
||||||
ldw a0, (sp, 16)
|
ldw a0, (sp, 16)
|
||||||
mtcr a0, ss1
|
mtcr a0, ss1
|
||||||
|
1:
|
||||||
ldw a0, (sp, 24)
|
ldw a0, (sp, 24)
|
||||||
ldw a1, (sp, 28)
|
ldw a1, (sp, 28)
|
||||||
ldw a2, (sp, 32)
|
ldw a2, (sp, 32)
|
||||||
@ -102,9 +111,9 @@
|
|||||||
addi sp, 32
|
addi sp, 32
|
||||||
addi sp, 8
|
addi sp, 8
|
||||||
|
|
||||||
bt 1f
|
bt 2f
|
||||||
KSPTOUSP
|
KSPTOUSP
|
||||||
1:
|
2:
|
||||||
rte
|
rte
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
@ -6,46 +6,80 @@
|
|||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <asm/cache.h>
|
#include <asm/cache.h>
|
||||||
|
|
||||||
void flush_icache_page(struct vm_area_struct *vma, struct page *page)
|
|
||||||
{
|
|
||||||
unsigned long start;
|
|
||||||
|
|
||||||
start = (unsigned long) kmap_atomic(page);
|
|
||||||
|
|
||||||
cache_wbinv_range(start, start + PAGE_SIZE);
|
|
||||||
|
|
||||||
kunmap_atomic((void *)start);
|
|
||||||
}
|
|
||||||
|
|
||||||
void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
|
|
||||||
unsigned long vaddr, int len)
|
|
||||||
{
|
|
||||||
unsigned long kaddr;
|
|
||||||
|
|
||||||
kaddr = (unsigned long) kmap_atomic(page) + (vaddr & ~PAGE_MASK);
|
|
||||||
|
|
||||||
cache_wbinv_range(kaddr, kaddr + len);
|
|
||||||
|
|
||||||
kunmap_atomic((void *)kaddr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
|
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
|
||||||
pte_t *pte)
|
pte_t *pte)
|
||||||
{
|
{
|
||||||
unsigned long addr, pfn;
|
unsigned long addr;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
pfn = pte_pfn(*pte);
|
page = pfn_to_page(pte_pfn(*pte));
|
||||||
if (unlikely(!pfn_valid(pfn)))
|
if (page == ZERO_PAGE(0))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
page = pfn_to_page(pfn);
|
if (test_and_set_bit(PG_dcache_clean, &page->flags))
|
||||||
if (page == ZERO_PAGE(0))
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
addr = (unsigned long) kmap_atomic(page);
|
addr = (unsigned long) kmap_atomic(page);
|
||||||
|
|
||||||
cache_wbinv_range(addr, addr + PAGE_SIZE);
|
dcache_wb_range(addr, addr + PAGE_SIZE);
|
||||||
|
|
||||||
|
if (vma->vm_flags & VM_EXEC)
|
||||||
|
icache_inv_range(addr, addr + PAGE_SIZE);
|
||||||
|
|
||||||
kunmap_atomic((void *) addr);
|
kunmap_atomic((void *) addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void flush_icache_deferred(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
unsigned int cpu = smp_processor_id();
|
||||||
|
cpumask_t *mask = &mm->context.icache_stale_mask;
|
||||||
|
|
||||||
|
if (cpumask_test_cpu(cpu, mask)) {
|
||||||
|
cpumask_clear_cpu(cpu, mask);
|
||||||
|
/*
|
||||||
|
* Ensure the remote hart's writes are visible to this hart.
|
||||||
|
* This pairs with a barrier in flush_icache_mm.
|
||||||
|
*/
|
||||||
|
smp_mb();
|
||||||
|
local_icache_inv_all(NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void flush_icache_mm_range(struct mm_struct *mm,
|
||||||
|
unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
unsigned int cpu;
|
||||||
|
cpumask_t others, *mask;
|
||||||
|
|
||||||
|
preempt_disable();
|
||||||
|
|
||||||
|
#ifdef CONFIG_CPU_HAS_ICACHE_INS
|
||||||
|
if (mm == current->mm) {
|
||||||
|
icache_inv_range(start, end);
|
||||||
|
preempt_enable();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Mark every hart's icache as needing a flush for this MM. */
|
||||||
|
mask = &mm->context.icache_stale_mask;
|
||||||
|
cpumask_setall(mask);
|
||||||
|
|
||||||
|
/* Flush this hart's I$ now, and mark it as flushed. */
|
||||||
|
cpu = smp_processor_id();
|
||||||
|
cpumask_clear_cpu(cpu, mask);
|
||||||
|
local_icache_inv_all(NULL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flush the I$ of other harts concurrently executing, and mark them as
|
||||||
|
* flushed.
|
||||||
|
*/
|
||||||
|
cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
|
||||||
|
|
||||||
|
if (mm != current->active_mm || !cpumask_empty(&others)) {
|
||||||
|
on_each_cpu_mask(&others, local_icache_inv_all, NULL, 1);
|
||||||
|
cpumask_clear(mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
preempt_enable();
|
||||||
|
}
|
||||||
|
@ -13,24 +13,27 @@
|
|||||||
#define flush_cache_all() do { } while (0)
|
#define flush_cache_all() do { } while (0)
|
||||||
#define flush_cache_mm(mm) do { } while (0)
|
#define flush_cache_mm(mm) do { } while (0)
|
||||||
#define flush_cache_dup_mm(mm) do { } while (0)
|
#define flush_cache_dup_mm(mm) do { } while (0)
|
||||||
|
#define flush_cache_range(vma, start, end) do { } while (0)
|
||||||
#define flush_cache_range(vma, start, end) \
|
|
||||||
do { \
|
|
||||||
if (vma->vm_flags & VM_EXEC) \
|
|
||||||
icache_inv_all(); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
|
#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
|
||||||
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
|
|
||||||
#define flush_dcache_page(page) do { } while (0)
|
#define PG_dcache_clean PG_arch_1
|
||||||
|
|
||||||
|
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
|
||||||
|
static inline void flush_dcache_page(struct page *page)
|
||||||
|
{
|
||||||
|
if (test_bit(PG_dcache_clean, &page->flags))
|
||||||
|
clear_bit(PG_dcache_clean, &page->flags);
|
||||||
|
}
|
||||||
|
|
||||||
#define flush_dcache_mmap_lock(mapping) do { } while (0)
|
#define flush_dcache_mmap_lock(mapping) do { } while (0)
|
||||||
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
|
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
|
||||||
|
#define flush_icache_page(vma, page) do { } while (0)
|
||||||
|
|
||||||
#define flush_icache_range(start, end) cache_wbinv_range(start, end)
|
#define flush_icache_range(start, end) cache_wbinv_range(start, end)
|
||||||
|
|
||||||
void flush_icache_page(struct vm_area_struct *vma, struct page *page);
|
void flush_icache_mm_range(struct mm_struct *mm,
|
||||||
void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
|
unsigned long start, unsigned long end);
|
||||||
unsigned long vaddr, int len);
|
void flush_icache_deferred(struct mm_struct *mm);
|
||||||
|
|
||||||
#define flush_cache_vmap(start, end) do { } while (0)
|
#define flush_cache_vmap(start, end) do { } while (0)
|
||||||
#define flush_cache_vunmap(start, end) do { } while (0)
|
#define flush_cache_vunmap(start, end) do { } while (0)
|
||||||
@ -38,7 +41,13 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
|
|||||||
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
|
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
|
||||||
do { \
|
do { \
|
||||||
memcpy(dst, src, len); \
|
memcpy(dst, src, len); \
|
||||||
cache_wbinv_range((unsigned long)dst, (unsigned long)dst + len); \
|
if (vma->vm_flags & VM_EXEC) { \
|
||||||
|
dcache_wb_range((unsigned long)dst, \
|
||||||
|
(unsigned long)dst + len); \
|
||||||
|
flush_icache_mm_range(current->mm, \
|
||||||
|
(unsigned long)dst, \
|
||||||
|
(unsigned long)dst + len); \
|
||||||
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
|
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
|
||||||
memcpy(dst, src, len)
|
memcpy(dst, src, len)
|
||||||
|
@ -31,7 +31,13 @@
|
|||||||
|
|
||||||
mfcr lr, epsr
|
mfcr lr, epsr
|
||||||
stw lr, (sp, 12)
|
stw lr, (sp, 12)
|
||||||
|
btsti lr, 31
|
||||||
|
bf 1f
|
||||||
|
addi lr, sp, 152
|
||||||
|
br 2f
|
||||||
|
1:
|
||||||
mfcr lr, usp
|
mfcr lr, usp
|
||||||
|
2:
|
||||||
stw lr, (sp, 16)
|
stw lr, (sp, 16)
|
||||||
|
|
||||||
stw a0, (sp, 20)
|
stw a0, (sp, 20)
|
||||||
@ -64,8 +70,10 @@
|
|||||||
mtcr a0, epc
|
mtcr a0, epc
|
||||||
ldw a0, (sp, 12)
|
ldw a0, (sp, 12)
|
||||||
mtcr a0, epsr
|
mtcr a0, epsr
|
||||||
|
btsti a0, 31
|
||||||
ldw a0, (sp, 16)
|
ldw a0, (sp, 16)
|
||||||
mtcr a0, usp
|
mtcr a0, usp
|
||||||
|
mtcr a0, ss0
|
||||||
|
|
||||||
#ifdef CONFIG_CPU_HAS_HILO
|
#ifdef CONFIG_CPU_HAS_HILO
|
||||||
ldw a0, (sp, 140)
|
ldw a0, (sp, 140)
|
||||||
@ -86,6 +94,9 @@
|
|||||||
addi sp, 40
|
addi sp, 40
|
||||||
ldm r16-r30, (sp)
|
ldm r16-r30, (sp)
|
||||||
addi sp, 72
|
addi sp, 72
|
||||||
|
bf 1f
|
||||||
|
mfcr sp, ss0
|
||||||
|
1:
|
||||||
rte
|
rte
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user