Merge branch 'perf/urgent' into perf/core, to pick up the latest fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
						commit
						1941011a8b
					
				
							
								
								
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -100,6 +100,10 @@ modules.order | ||||
| /include/ksym/ | ||||
| /arch/*/include/generated/ | ||||
| 
 | ||||
| # Generated lkdtm tests | ||||
| /tools/testing/selftests/lkdtm/*.sh | ||||
| !/tools/testing/selftests/lkdtm/run.sh | ||||
| 
 | ||||
| # stgit generated dirs | ||||
| patches-* | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										2
									
								
								COPYING
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								COPYING
									
									
									
									
									
								
							| @ -16,3 +16,5 @@ In addition, other licenses may also apply. Please see: | ||||
| 	Documentation/process/license-rules.rst | ||||
| 
 | ||||
| for more details. | ||||
| 
 | ||||
| All contributions to the Linux Kernel are subject to this COPYING file. | ||||
|  | ||||
							
								
								
									
										5
									
								
								CREDITS
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								CREDITS
									
									
									
									
									
								
							| @ -567,6 +567,11 @@ D: Original author of Amiga FFS filesystem | ||||
| S: Orlando, Florida | ||||
| S: USA | ||||
| 
 | ||||
| N: Paul Burton | ||||
| E: paulburton@kernel.org | ||||
| W: https://pburton.com | ||||
| D: MIPS maintainer 2018-2020 | ||||
| 
 | ||||
| N: Lennert Buytenhek | ||||
| E: kernel@wantstofly.org | ||||
| D: Original (2.4) rewrite of the ethernet bridging code | ||||
|  | ||||
| @ -62,6 +62,30 @@ Or more shorter, written as following:: | ||||
| In both styles, same key words are automatically merged when parsing it | ||||
| at boot time. So you can append similar trees or key-values. | ||||
| 
 | ||||
| Same-key Values | ||||
| --------------- | ||||
| 
 | ||||
| It is prohibited that two or more values or arrays share a same-key. | ||||
| For example,:: | ||||
| 
 | ||||
|  foo = bar, baz | ||||
|  foo = qux  # !ERROR! we can not re-define same key | ||||
| 
 | ||||
| If you want to append the value to existing key as an array member, | ||||
| you can use ``+=`` operator. For example:: | ||||
| 
 | ||||
|  foo = bar, baz | ||||
|  foo += qux | ||||
| 
 | ||||
| In this case, the key ``foo`` has ``bar``, ``baz`` and ``qux``. | ||||
| 
 | ||||
| However, a sub-key and a value can not co-exist under a parent key. | ||||
| For example, following config is NOT allowed.:: | ||||
| 
 | ||||
|  foo = value1 | ||||
|  foo.bar = value2 # !ERROR! subkey "bar" and value "value1" can NOT co-exist | ||||
| 
 | ||||
| 
 | ||||
| Comments | ||||
| -------- | ||||
| 
 | ||||
| @ -102,9 +126,13 @@ Boot Kernel With a Boot Config | ||||
| ============================== | ||||
| 
 | ||||
| Since the boot configuration file is loaded with initrd, it will be added | ||||
| to the end of the initrd (initramfs) image file. The Linux kernel decodes | ||||
| the last part of the initrd image in memory to get the boot configuration | ||||
| data. | ||||
| to the end of the initrd (initramfs) image file with size, checksum and | ||||
| 12-byte magic word as below. | ||||
| 
 | ||||
| [initrd][bootconfig][size(u32)][checksum(u32)][#BOOTCONFIG\n] | ||||
| 
 | ||||
| The Linux kernel decodes the last part of the initrd image in memory to | ||||
| get the boot configuration data. | ||||
| Because of this "piggyback" method, there is no need to change or | ||||
| update the boot loader and the kernel image itself. | ||||
| 
 | ||||
|  | ||||
| @ -136,6 +136,10 @@ | ||||
| 			dynamic table installation which will install SSDT | ||||
| 			tables to /sys/firmware/acpi/tables/dynamic. | ||||
| 
 | ||||
| 	acpi_no_watchdog	[HW,ACPI,WDT] | ||||
| 			Ignore the ACPI-based watchdog interface (WDAT) and let | ||||
| 			a native driver control the watchdog device instead. | ||||
| 
 | ||||
| 	acpi_rsdp=	[ACPI,EFI,KEXEC] | ||||
| 			Pass the RSDP address to the kernel, mostly used | ||||
| 			on machines running EFI runtime service to boot the | ||||
|  | ||||
| @ -129,7 +129,7 @@ this logic. | ||||
| 
 | ||||
| As a single binary will need to support both 48-bit and 52-bit VA | ||||
| spaces, the VMEMMAP must be sized large enough for 52-bit VAs and | ||||
| also must be sized large enought to accommodate a fixed PAGE_OFFSET. | ||||
| also must be sized large enough to accommodate a fixed PAGE_OFFSET. | ||||
| 
 | ||||
| Most code in the kernel should not need to consider the VA_BITS, for | ||||
| code that does need to know the VA size the variables are | ||||
|  | ||||
| @ -44,8 +44,15 @@ The AArch64 Tagged Address ABI has two stages of relaxation depending | ||||
| how the user addresses are used by the kernel: | ||||
| 
 | ||||
| 1. User addresses not accessed by the kernel but used for address space | ||||
|    management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use | ||||
|    of valid tagged pointers in this context is always allowed. | ||||
|    management (e.g. ``mprotect()``, ``madvise()``). The use of valid | ||||
|    tagged pointers in this context is allowed with the exception of | ||||
|    ``brk()``, ``mmap()`` and the ``new_address`` argument to | ||||
|    ``mremap()`` as these have the potential to alias with existing | ||||
|    user addresses. | ||||
| 
 | ||||
|    NOTE: This behaviour changed in v5.6 and so some earlier kernels may | ||||
|    incorrectly accept valid tagged pointers for the ``brk()``, | ||||
|    ``mmap()`` and ``mremap()`` system calls. | ||||
| 
 | ||||
| 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI | ||||
|    relaxation is disabled by default and the application thread needs to | ||||
|  | ||||
| @ -551,6 +551,7 @@ options to your ``.config``: | ||||
| Once the kernel is built and installed, a simple | ||||
| 
 | ||||
| .. code-block:: bash | ||||
| 
 | ||||
| 	modprobe example-test | ||||
| 
 | ||||
| ...will run the tests. | ||||
|  | ||||
| @ -43,9 +43,13 @@ properties: | ||||
|         - enum: | ||||
|           - allwinner,sun8i-h3-tcon-tv | ||||
|           - allwinner,sun50i-a64-tcon-tv | ||||
|           - allwinner,sun50i-h6-tcon-tv | ||||
|         - const: allwinner,sun8i-a83t-tcon-tv | ||||
| 
 | ||||
|       - items: | ||||
|         - enum: | ||||
|           - allwinner,sun50i-h6-tcon-tv | ||||
|         - const: allwinner,sun8i-r40-tcon-tv | ||||
| 
 | ||||
|   reg: | ||||
|     maxItems: 1 | ||||
| 
 | ||||
|  | ||||
| @ -1,9 +1,10 @@ | ||||
| Ilitek ILI210x/ILI2117/ILI251x touchscreen controller | ||||
| Ilitek ILI210x/ILI2117/ILI2120/ILI251x touchscreen controller | ||||
| 
 | ||||
| Required properties: | ||||
| - compatible: | ||||
|     ilitek,ili210x for ILI210x | ||||
|     ilitek,ili2117 for ILI2117 | ||||
|     ilitek,ili2120 for ILI2120 | ||||
|     ilitek,ili251x for ILI251x | ||||
| 
 | ||||
| - reg: The I2C address of the device | ||||
|  | ||||
| @ -33,24 +33,40 @@ properties: | ||||
|     maxItems: 1 | ||||
| 
 | ||||
|   clocks: | ||||
|     minItems: 2 | ||||
|     maxItems: 3 | ||||
|     items: | ||||
|       - description: The CSI interface clock | ||||
|       - description: The CSI ISP clock | ||||
|       - description: The CSI DRAM clock | ||||
|     oneOf: | ||||
|       - items: | ||||
|         - description: The CSI interface clock | ||||
|         - description: The CSI DRAM clock | ||||
| 
 | ||||
|       - items: | ||||
|         - description: The CSI interface clock | ||||
|         - description: The CSI ISP clock | ||||
|         - description: The CSI DRAM clock | ||||
| 
 | ||||
|   clock-names: | ||||
|     minItems: 2 | ||||
|     maxItems: 3 | ||||
|     items: | ||||
|       - const: bus | ||||
|       - const: isp | ||||
|       - const: ram | ||||
|     oneOf: | ||||
|       - items: | ||||
|         - const: bus | ||||
|         - const: ram | ||||
| 
 | ||||
|       - items: | ||||
|         - const: bus | ||||
|         - const: isp | ||||
|         - const: ram | ||||
| 
 | ||||
|   resets: | ||||
|     maxItems: 1 | ||||
| 
 | ||||
|   # FIXME: This should be made required eventually once every SoC will | ||||
|   # have the MBUS declared. | ||||
|   interconnects: | ||||
|     maxItems: 1 | ||||
| 
 | ||||
|   # FIXME: This should be made required eventually once every SoC will | ||||
|   # have the MBUS declared. | ||||
|   interconnect-names: | ||||
|     const: dma-mem | ||||
| 
 | ||||
|   # See ./video-interfaces.txt for details | ||||
|   port: | ||||
|     type: object | ||||
|  | ||||
| @ -347,6 +347,7 @@ examples: | ||||
|         interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>; | ||||
| 
 | ||||
|         #iommu-cells = <1>; | ||||
|         #reset-cells = <1>; | ||||
|     }; | ||||
| 
 | ||||
|     external-memory-controller@7001b000 { | ||||
| @ -363,20 +364,23 @@ examples: | ||||
|             timing-0 { | ||||
|                 clock-frequency = <12750000>; | ||||
| 
 | ||||
|                 nvidia,emc-zcal-cnt-long = <0x00000042>; | ||||
|                 nvidia,emc-auto-cal-interval = <0x001fffff>; | ||||
|                 nvidia,emc-ctt-term-ctrl = <0x00000802>; | ||||
|                 nvidia,emc-cfg = <0x73240000>; | ||||
|                 nvidia,emc-cfg-2 = <0x000008c5>; | ||||
|                 nvidia,emc-sel-dpd-ctrl = <0x00040128>; | ||||
|                 nvidia,emc-bgbias-ctl0 = <0x00000008>; | ||||
|                 nvidia,emc-auto-cal-config = <0xa1430000>; | ||||
|                 nvidia,emc-auto-cal-config2 = <0x00000000>; | ||||
|                 nvidia,emc-auto-cal-config3 = <0x00000000>; | ||||
|                 nvidia,emc-mode-reset = <0x80001221>; | ||||
|                 nvidia,emc-auto-cal-interval = <0x001fffff>; | ||||
|                 nvidia,emc-bgbias-ctl0 = <0x00000008>; | ||||
|                 nvidia,emc-cfg = <0x73240000>; | ||||
|                 nvidia,emc-cfg-2 = <0x000008c5>; | ||||
|                 nvidia,emc-ctt-term-ctrl = <0x00000802>; | ||||
|                 nvidia,emc-mode-1 = <0x80100003>; | ||||
|                 nvidia,emc-mode-2 = <0x80200008>; | ||||
|                 nvidia,emc-mode-4 = <0x00000000>; | ||||
|                 nvidia,emc-mode-reset = <0x80001221>; | ||||
|                 nvidia,emc-mrs-wait-cnt = <0x000e000e>; | ||||
|                 nvidia,emc-sel-dpd-ctrl = <0x00040128>; | ||||
|                 nvidia,emc-xm2dqspadctrl2 = <0x0130b118>; | ||||
|                 nvidia,emc-zcal-cnt-long = <0x00000042>; | ||||
|                 nvidia,emc-zcal-interval = <0x00000000>; | ||||
| 
 | ||||
|                 nvidia,emc-configuration = < | ||||
|                     0x00000000 /* EMC_RC */ | ||||
|  | ||||
| @ -124,7 +124,7 @@ not every application needs SDIO irq, e.g. MMC cards. | ||||
| 		pinctrl-1 = <&mmc1_idle>; | ||||
| 		pinctrl-2 = <&mmc1_sleep>; | ||||
| 		... | ||||
| 		interrupts-extended = <&intc 64 &gpio2 28 GPIO_ACTIVE_LOW>; | ||||
| 		interrupts-extended = <&intc 64 &gpio2 28 IRQ_TYPE_LEVEL_LOW>; | ||||
| 	}; | ||||
| 
 | ||||
| 	mmc1_idle : pinmux_cirq_pin { | ||||
|  | ||||
| @ -56,7 +56,6 @@ patternProperties: | ||||
| examples: | ||||
|   - | | ||||
|     davinci_mdio: mdio@5c030000 { | ||||
|         compatible = "ti,davinci_mdio"; | ||||
|         reg = <0x5c030000 0x1000>; | ||||
|         #address-cells = <1>; | ||||
|         #size-cells = <0>; | ||||
|  | ||||
| @ -71,9 +71,13 @@ b) Example for device tree:: | ||||
|             ipmb@10 { | ||||
|                     compatible = "ipmb-dev"; | ||||
|                     reg = <0x10>; | ||||
|                     i2c-protocol; | ||||
|             }; | ||||
|      }; | ||||
| 
 | ||||
| If xmit of data to be done using raw i2c block vs smbus | ||||
| then "i2c-protocol" needs to be defined as above. | ||||
| 
 | ||||
| 2) Manually from Linux:: | ||||
| 
 | ||||
|      modprobe ipmb-dev-int | ||||
|  | ||||
| @ -134,7 +134,7 @@ Sequential zone files can only be written sequentially, starting from the file | ||||
| end, that is, write operations can only be append writes. Zonefs makes no | ||||
| attempt at accepting random writes and will fail any write request that has a | ||||
| start offset not corresponding to the end of the file, or to the end of the last | ||||
| write issued and still in-flight (for asynchrnous I/O operations). | ||||
| write issued and still in-flight (for asynchronous I/O operations). | ||||
| 
 | ||||
| Since dirty page writeback by the page cache does not guarantee a sequential | ||||
| write pattern, zonefs prevents buffered writes and writeable shared mappings | ||||
| @ -142,7 +142,7 @@ on sequential files. Only direct I/O writes are accepted for these files. | ||||
| zonefs relies on the sequential delivery of write I/O requests to the device | ||||
| implemented by the block layer elevator. An elevator implementing the sequential | ||||
| write feature for zoned block device (ELEVATOR_F_ZBD_SEQ_WRITE elevator feature) | ||||
| must be used. This type of elevator (e.g. mq-deadline) is the set by default | ||||
| must be used. This type of elevator (e.g. mq-deadline) is set by default | ||||
| for zoned block devices on device initialization. | ||||
| 
 | ||||
| There are no restrictions on the type of I/O used for read operations in | ||||
| @ -196,7 +196,7 @@ additional conditions that result in I/O errors. | ||||
|   may still happen in the case of a partial failure of a very large direct I/O | ||||
|   operation split into multiple BIOs/requests or asynchronous I/O operations. | ||||
|   If one of the write request within the set of sequential write requests | ||||
|   issued to the device fails, all write requests after queued after it will | ||||
|   issued to the device fails, all write requests queued after it will | ||||
|   become unaligned and fail. | ||||
| 
 | ||||
| * Delayed write errors: similarly to regular block devices, if the device side | ||||
| @ -207,7 +207,7 @@ additional conditions that result in I/O errors. | ||||
|   causing all data to be dropped after the sector that caused the error. | ||||
| 
 | ||||
| All I/O errors detected by zonefs are notified to the user with an error code | ||||
| return for the system call that trigered or detected the error. The recovery | ||||
| return for the system call that triggered or detected the error. The recovery | ||||
| actions taken by zonefs in response to I/O errors depend on the I/O type (read | ||||
| vs write) and on the reason for the error (bad sector, unaligned writes or zone | ||||
| condition change). | ||||
| @ -222,7 +222,7 @@ condition change). | ||||
| * A zone condition change to read-only or offline also always triggers zonefs | ||||
|   I/O error recovery. | ||||
| 
 | ||||
| Zonefs minimal I/O error recovery may change a file size and a file access | ||||
| Zonefs minimal I/O error recovery may change a file size and file access | ||||
| permissions. | ||||
| 
 | ||||
| * File size changes: | ||||
| @ -237,7 +237,7 @@ permissions. | ||||
|   A file size may also be reduced to reflect a delayed write error detected on | ||||
|   fsync(): in this case, the amount of data effectively written in the zone may | ||||
|   be less than originally indicated by the file inode size. After such I/O | ||||
|   error, zonefs always fixes a file inode size to reflect the amount of data | ||||
|   error, zonefs always fixes the file inode size to reflect the amount of data | ||||
|   persistently stored in the file zone. | ||||
| 
 | ||||
| * Access permission changes: | ||||
| @ -281,11 +281,11 @@ Further notes: | ||||
|   permissions to read-only applies to all files. The file system is remounted | ||||
|   read-only. | ||||
| * Access permission and file size changes due to the device transitioning zones | ||||
|   to the offline condition are permanent. Remounting or reformating the device | ||||
|   to the offline condition are permanent. Remounting or reformatting the device | ||||
|   with mkfs.zonefs (mkzonefs) will not change back offline zone files to a good | ||||
|   state. | ||||
| * File access permission changes to read-only due to the device transitioning | ||||
|   zones to the read-only condition are permanent. Remounting or reformating | ||||
|   zones to the read-only condition are permanent. Remounting or reformatting | ||||
|   the device will not re-enable file write access. | ||||
| * File access permission changes implied by the remount-ro, zone-ro and | ||||
|   zone-offline mount options are temporary for zones in a good condition. | ||||
| @ -301,13 +301,13 @@ Mount options | ||||
| 
 | ||||
| zonefs define the "errors=<behavior>" mount option to allow the user to specify | ||||
| zonefs behavior in response to I/O errors, inode size inconsistencies or zone | ||||
| condition chages. The defined behaviors are as follow: | ||||
| condition changes. The defined behaviors are as follow: | ||||
| * remount-ro (default) | ||||
| * zone-ro | ||||
| * zone-offline | ||||
| * repair | ||||
| 
 | ||||
| The I/O error actions defined for each behavior is detailed in the previous | ||||
| The I/O error actions defined for each behavior are detailed in the previous | ||||
| section. | ||||
| 
 | ||||
| Zonefs User Space Tools | ||||
|  | ||||
| @ -24,6 +24,7 @@ This driver implements support for Infineon Multi-phase XDPE122 family | ||||
| dual loop voltage regulators. | ||||
| The family includes XDPE12284 and XDPE12254 devices. | ||||
| The devices from this family complaint with: | ||||
| 
 | ||||
| - Intel VR13 and VR13HC rev 1.3, IMVP8 rev 1.2 and IMPVP9 rev 1.3 DC-DC | ||||
|   converter specification. | ||||
| - Intel SVID rev 1.9. protocol. | ||||
|  | ||||
| @ -765,7 +765,7 @@ is not sufficient this sometimes needs to be explicit. | ||||
| 	Example:: | ||||
| 
 | ||||
| 		#arch/x86/boot/Makefile | ||||
| 		subdir- := compressed/ | ||||
| 		subdir- := compressed | ||||
| 
 | ||||
| The above assignment instructs kbuild to descend down in the | ||||
| directory compressed/ when "make clean" is executed. | ||||
| @ -1379,9 +1379,6 @@ See subsequent chapter for the syntax of the Kbuild file. | ||||
| 	in arch/$(ARCH)/include/(uapi/)/asm, Kbuild will automatically generate | ||||
| 	a wrapper of the asm-generic one. | ||||
| 
 | ||||
| 	The convention is to list one subdir per line and | ||||
| 	preferably in alphabetic order. | ||||
| 
 | ||||
| 8 Kbuild Variables | ||||
| ================== | ||||
| 
 | ||||
|  | ||||
| @ -487,8 +487,9 @@ phy_register_fixup_for_id():: | ||||
| The stubs set one of the two matching criteria, and set the other one to | ||||
| match anything. | ||||
| 
 | ||||
| When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module, | ||||
| unregister fixup and free allocate memory are required. | ||||
| When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module load | ||||
| time, the module needs to unregister the fixup and free allocated memory when | ||||
| it's unloaded. | ||||
| 
 | ||||
| Call one of following function before unloading module:: | ||||
| 
 | ||||
|  | ||||
| @ -13,7 +13,6 @@ Power Management | ||||
|     drivers-testing | ||||
|     energy-model | ||||
|     freezing-of-tasks | ||||
|     interface | ||||
|     opp | ||||
|     pci | ||||
|     pm_qos_interface | ||||
|  | ||||
| @ -244,23 +244,23 @@ disclosure of a particular issue, unless requested by a response team or by | ||||
| an involved disclosed party. The current ambassadors list: | ||||
| 
 | ||||
|   ============= ======================================================== | ||||
|   ARM | ||||
|   ARM           Grant Likely <grant.likely@arm.com> | ||||
|   AMD		Tom Lendacky <tom.lendacky@amd.com> | ||||
|   IBM | ||||
|   Intel		Tony Luck <tony.luck@intel.com> | ||||
|   Qualcomm	Trilok Soni <tsoni@codeaurora.org> | ||||
| 
 | ||||
|   Microsoft	Sasha Levin <sashal@kernel.org> | ||||
|   Microsoft	James Morris <jamorris@linux.microsoft.com> | ||||
|   VMware | ||||
|   Xen		Andrew Cooper <andrew.cooper3@citrix.com> | ||||
| 
 | ||||
|   Canonical	Tyler Hicks <tyhicks@canonical.com> | ||||
|   Canonical	John Johansen <john.johansen@canonical.com> | ||||
|   Debian	Ben Hutchings <ben@decadent.org.uk> | ||||
|   Oracle	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||||
|   Red Hat	Josh Poimboeuf <jpoimboe@redhat.com> | ||||
|   SUSE		Jiri Kosina <jkosina@suse.cz> | ||||
| 
 | ||||
|   Amazon	Peter Bowen <pzb@amzn.com> | ||||
|   Amazon | ||||
|   Google	Kees Cook <keescook@chromium.org> | ||||
|   ============= ======================================================== | ||||
| 
 | ||||
|  | ||||
| @ -30,4 +30,4 @@ if [ -n "$parallel" ] ; then | ||||
| 	parallel="-j$parallel" | ||||
| fi | ||||
| 
 | ||||
| exec "$sphinx" "$parallel" "$@" | ||||
| exec "$sphinx" $parallel "$@" | ||||
|  | ||||
| @ -183,7 +183,7 @@ CVE分配 | ||||
|   VMware | ||||
|   Xen		Andrew Cooper <andrew.cooper3@citrix.com> | ||||
| 
 | ||||
|   Canonical	Tyler Hicks <tyhicks@canonical.com> | ||||
|   Canonical	John Johansen <john.johansen@canonical.com> | ||||
|   Debian	Ben Hutchings <ben@decadent.org.uk> | ||||
|   Oracle	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | ||||
|   Red Hat	Josh Poimboeuf <jpoimboe@redhat.com> | ||||
|  | ||||
| @ -1,9 +1,11 @@ | ||||
| ================== | ||||
| Guest halt polling | ||||
| ================== | ||||
| 
 | ||||
| The cpuidle_haltpoll driver, with the haltpoll governor, allows | ||||
| the guest vcpus to poll for a specified amount of time before | ||||
| halting. | ||||
| 
 | ||||
| This provides the following benefits to host side polling: | ||||
| 
 | ||||
| 	1) The POLL flag is set while polling is performed, which allows | ||||
| @ -29,18 +31,21 @@ Module Parameters | ||||
| The haltpoll governor has 5 tunable module parameters: | ||||
| 
 | ||||
| 1) guest_halt_poll_ns: | ||||
| 
 | ||||
| Maximum amount of time, in nanoseconds, that polling is | ||||
| performed before halting. | ||||
| 
 | ||||
| Default: 200000 | ||||
| 
 | ||||
| 2) guest_halt_poll_shrink: | ||||
| 
 | ||||
| Division factor used to shrink per-cpu guest_halt_poll_ns when | ||||
| wakeup event occurs after the global guest_halt_poll_ns. | ||||
| 
 | ||||
| Default: 2 | ||||
| 
 | ||||
| 3) guest_halt_poll_grow: | ||||
| 
 | ||||
| Multiplication factor used to grow per-cpu guest_halt_poll_ns | ||||
| when event occurs after per-cpu guest_halt_poll_ns | ||||
| but before global guest_halt_poll_ns. | ||||
| @ -48,6 +53,7 @@ but before global guest_halt_poll_ns. | ||||
| Default: 2 | ||||
| 
 | ||||
| 4) guest_halt_poll_grow_start: | ||||
| 
 | ||||
| The per-cpu guest_halt_poll_ns eventually reaches zero | ||||
| in case of an idle system. This value sets the initial | ||||
| per-cpu guest_halt_poll_ns when growing. This can | ||||
| @ -66,7 +72,7 @@ high once achieves global guest_halt_poll_ns value). | ||||
| 
 | ||||
| Default: Y | ||||
| 
 | ||||
| The module parameters can be set from the debugfs files in: | ||||
| The module parameters can be set from the debugfs files in:: | ||||
| 
 | ||||
| 	/sys/module/haltpoll/parameters/ | ||||
| 
 | ||||
| @ -74,5 +80,5 @@ Further Notes | ||||
| ============= | ||||
| 
 | ||||
| - Care should be taken when setting the guest_halt_poll_ns parameter as a | ||||
| large value has the potential to drive the cpu usage to 100% on a machine which | ||||
| would be almost entirely idle otherwise. | ||||
|   large value has the potential to drive the cpu usage to 100% on a machine | ||||
|   which would be almost entirely idle otherwise. | ||||
| @ -8,7 +8,9 @@ Linux Virtualization Support | ||||
|    :maxdepth: 2 | ||||
| 
 | ||||
|    kvm/index | ||||
|    uml/user_mode_linux | ||||
|    paravirt_ops | ||||
|    guest-halt-polling | ||||
| 
 | ||||
| .. only:: html and subproject | ||||
| 
 | ||||
|  | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -1,4 +1,8 @@ | ||||
| * Internal ABI between the kernel and HYP | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ======================================= | ||||
| Internal ABI between the kernel and HYP | ||||
| ======================================= | ||||
| 
 | ||||
| This file documents the interaction between the Linux kernel and the | ||||
| hypervisor layer when running Linux as a hypervisor (for example | ||||
| @ -19,25 +23,31 @@ and only act on individual CPUs. | ||||
| Unless specified otherwise, any built-in hypervisor must implement | ||||
| these functions (see arch/arm{,64}/include/asm/virt.h): | ||||
| 
 | ||||
| * r0/x0 = HVC_SET_VECTORS | ||||
|   r1/x1 = vectors | ||||
| * :: | ||||
| 
 | ||||
|     r0/x0 = HVC_SET_VECTORS | ||||
|     r1/x1 = vectors | ||||
| 
 | ||||
|   Set HVBAR/VBAR_EL2 to 'vectors' to enable a hypervisor. 'vectors' | ||||
|   must be a physical address, and respect the alignment requirements | ||||
|   of the architecture. Only implemented by the initial stubs, not by | ||||
|   Linux hypervisors. | ||||
| 
 | ||||
| * r0/x0 = HVC_RESET_VECTORS | ||||
| * :: | ||||
| 
 | ||||
|     r0/x0 = HVC_RESET_VECTORS | ||||
| 
 | ||||
|   Turn HYP/EL2 MMU off, and reset HVBAR/VBAR_EL2 to the initials | ||||
|   stubs' exception vector value. This effectively disables an existing | ||||
|   hypervisor. | ||||
| 
 | ||||
| * r0/x0 = HVC_SOFT_RESTART | ||||
|   r1/x1 = restart address | ||||
|   x2 = x0's value when entering the next payload (arm64) | ||||
|   x3 = x1's value when entering the next payload (arm64) | ||||
|   x4 = x2's value when entering the next payload (arm64) | ||||
| * :: | ||||
| 
 | ||||
|     r0/x0 = HVC_SOFT_RESTART | ||||
|     r1/x1 = restart address | ||||
|     x2 = x0's value when entering the next payload (arm64) | ||||
|     x3 = x1's value when entering the next payload (arm64) | ||||
|     x4 = x2's value when entering the next payload (arm64) | ||||
| 
 | ||||
|   Mask all exceptions, disable the MMU, move the arguments into place | ||||
|   (arm64 only), and jump to the restart address while at HYP/EL2. This | ||||
							
								
								
									
										12
									
								
								Documentation/virt/kvm/arm/index.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								Documentation/virt/kvm/arm/index.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,12 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| === | ||||
| ARM | ||||
| === | ||||
| 
 | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
| 
 | ||||
|    hyp-abi | ||||
|    psci | ||||
|    pvtime | ||||
| @ -1,3 +1,9 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ========================================= | ||||
| Power State Coordination Interface (PSCI) | ||||
| ========================================= | ||||
| 
 | ||||
| KVM implements the PSCI (Power State Coordination Interface) | ||||
| specification in order to provide services such as CPU on/off, reset | ||||
| and power-off to the guest. | ||||
| @ -30,32 +36,42 @@ The following register is defined: | ||||
|   - Affects the whole VM (even if the register view is per-vcpu) | ||||
| 
 | ||||
| * KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: | ||||
|   Holds the state of the firmware support to mitigate CVE-2017-5715, as | ||||
|   offered by KVM to the guest via a HVC call. The workaround is described | ||||
|   under SMCCC_ARCH_WORKAROUND_1 in [1]. | ||||
|     Holds the state of the firmware support to mitigate CVE-2017-5715, as | ||||
|     offered by KVM to the guest via a HVC call. The workaround is described | ||||
|     under SMCCC_ARCH_WORKAROUND_1 in [1]. | ||||
| 
 | ||||
|   Accepted values are: | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer | ||||
| 
 | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: | ||||
|       KVM does not offer | ||||
|       firmware support for the workaround. The mitigation status for the | ||||
|       guest is unknown. | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: | ||||
|       The workaround HVC call is | ||||
|       available to the guest and required for the mitigation. | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: | ||||
|       The workaround HVC call | ||||
|       is available to the guest, but it is not needed on this VCPU. | ||||
| 
 | ||||
| * KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: | ||||
|   Holds the state of the firmware support to mitigate CVE-2018-3639, as | ||||
|   offered by KVM to the guest via a HVC call. The workaround is described | ||||
|   under SMCCC_ARCH_WORKAROUND_2 in [1]. | ||||
|     Holds the state of the firmware support to mitigate CVE-2018-3639, as | ||||
|     offered by KVM to the guest via a HVC call. The workaround is described | ||||
|     under SMCCC_ARCH_WORKAROUND_2 in [1]_. | ||||
| 
 | ||||
|   Accepted values are: | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not | ||||
| 
 | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: | ||||
|       A workaround is not | ||||
|       available. KVM does not offer firmware support for the workaround. | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: | ||||
|       The workaround state is | ||||
|       unknown. KVM does not offer firmware support for the workaround. | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available, | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: | ||||
|       The workaround is available, | ||||
|       and can be disabled by a vCPU. If | ||||
|       KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for | ||||
|       this vCPU. | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is | ||||
|       always active on this vCPU or it is not needed. | ||||
|     KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: | ||||
|       The workaround is always active on this vCPU or it is not needed. | ||||
| 
 | ||||
| [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf | ||||
| .. [1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf | ||||
| @ -1,3 +1,6 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| =============================================== | ||||
| ARM Virtual Interrupt Translation Service (ITS) | ||||
| =============================================== | ||||
| 
 | ||||
| @ -12,22 +15,32 @@ There can be multiple ITS controllers per guest, each of them has to have | ||||
| a separate, non-overlapping MMIO region. | ||||
| 
 | ||||
| 
 | ||||
| Groups: | ||||
|   KVM_DEV_ARM_VGIC_GRP_ADDR | ||||
| Groups | ||||
| ====== | ||||
| 
 | ||||
| KVM_DEV_ARM_VGIC_GRP_ADDR | ||||
| ------------------------- | ||||
| 
 | ||||
|   Attributes: | ||||
|     KVM_VGIC_ITS_ADDR_TYPE (rw, 64-bit) | ||||
|       Base address in the guest physical address space of the GICv3 ITS | ||||
|       control register frame. | ||||
|       This address needs to be 64K aligned and the region covers 128K. | ||||
| 
 | ||||
|   Errors: | ||||
|     -E2BIG:  Address outside of addressable IPA range | ||||
|     -EINVAL: Incorrectly aligned address | ||||
|     -EEXIST: Address already configured | ||||
|     -EFAULT: Invalid user pointer for attr->addr. | ||||
|     -ENODEV: Incorrect attribute or the ITS is not supported. | ||||
| 
 | ||||
|     =======  ================================================= | ||||
|     -E2BIG   Address outside of addressable IPA range | ||||
|     -EINVAL  Incorrectly aligned address | ||||
|     -EEXIST  Address already configured | ||||
|     -EFAULT  Invalid user pointer for attr->addr. | ||||
|     -ENODEV  Incorrect attribute or the ITS is not supported. | ||||
|     =======  ================================================= | ||||
| 
 | ||||
| 
 | ||||
|   KVM_DEV_ARM_VGIC_GRP_CTRL | ||||
| KVM_DEV_ARM_VGIC_GRP_CTRL | ||||
| ------------------------- | ||||
| 
 | ||||
|   Attributes: | ||||
|     KVM_DEV_ARM_VGIC_CTRL_INIT | ||||
|       request the initialization of the ITS, no additional parameter in | ||||
| @ -58,16 +71,21 @@ Groups: | ||||
|       "ITS Restore Sequence". | ||||
| 
 | ||||
|   Errors: | ||||
|     -ENXIO:  ITS not properly configured as required prior to setting | ||||
|              this attribute | ||||
|     -ENOMEM: Memory shortage when allocating ITS internal data | ||||
|     -EINVAL: Inconsistent restored data | ||||
|     -EFAULT: Invalid guest ram access | ||||
|     -EBUSY:  One or more VCPUS are running | ||||
|     -EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the | ||||
| 	     state is not available | ||||
| 
 | ||||
|   KVM_DEV_ARM_VGIC_GRP_ITS_REGS | ||||
|     =======  ========================================================== | ||||
|      -ENXIO  ITS not properly configured as required prior to setting | ||||
|              this attribute | ||||
|     -ENOMEM  Memory shortage when allocating ITS internal data | ||||
|     -EINVAL  Inconsistent restored data | ||||
|     -EFAULT  Invalid guest ram access | ||||
|     -EBUSY   One or more VCPUS are running | ||||
|     -EACCES  The virtual ITS is backed by a physical GICv4 ITS, and the | ||||
| 	     state is not available | ||||
|     =======  ========================================================== | ||||
| 
 | ||||
| KVM_DEV_ARM_VGIC_GRP_ITS_REGS | ||||
| ----------------------------- | ||||
| 
 | ||||
|   Attributes: | ||||
|       The attr field of kvm_device_attr encodes the offset of the | ||||
|       ITS register, relative to the ITS control frame base address | ||||
| @ -78,6 +96,7 @@ Groups: | ||||
|       be accessed with full length. | ||||
| 
 | ||||
|       Writes to read-only registers are ignored by the kernel except for: | ||||
| 
 | ||||
|       - GITS_CREADR. It must be restored otherwise commands in the queue | ||||
|         will be re-executed after restoring CWRITER. GITS_CREADR must be | ||||
|         restored before restoring the GITS_CTLR which is likely to enable the | ||||
| @ -91,30 +110,36 @@ Groups: | ||||
| 
 | ||||
|       For other registers, getting or setting a register has the same | ||||
|       effect as reading/writing the register on real hardware. | ||||
|   Errors: | ||||
|     -ENXIO: Offset does not correspond to any supported register | ||||
|     -EFAULT: Invalid user pointer for attr->addr | ||||
|     -EINVAL: Offset is not 64-bit aligned | ||||
|     -EBUSY: one or more VCPUS are running | ||||
| 
 | ||||
|  ITS Restore Sequence: | ||||
|  ------------------------- | ||||
|   Errors: | ||||
| 
 | ||||
|     =======  ==================================================== | ||||
|     -ENXIO   Offset does not correspond to any supported register | ||||
|     -EFAULT  Invalid user pointer for attr->addr | ||||
|     -EINVAL  Offset is not 64-bit aligned | ||||
|     -EBUSY   one or more VCPUS are running | ||||
|     =======  ==================================================== | ||||
| 
 | ||||
| ITS Restore Sequence: | ||||
| --------------------- | ||||
| 
 | ||||
| The following ordering must be followed when restoring the GIC and the ITS: | ||||
| 
 | ||||
| a) restore all guest memory and create vcpus | ||||
| b) restore all redistributors | ||||
| c) provide the ITS base address | ||||
|    (KVM_DEV_ARM_VGIC_GRP_ADDR) | ||||
| d) restore the ITS in the following order: | ||||
|    1. Restore GITS_CBASER | ||||
|    2. Restore all other GITS_ registers, except GITS_CTLR! | ||||
|    3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES) | ||||
|    4. Restore GITS_CTLR | ||||
| 
 | ||||
|      1. Restore GITS_CBASER | ||||
|      2. Restore all other ``GITS_`` registers, except GITS_CTLR! | ||||
|      3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES) | ||||
|      4. Restore GITS_CTLR | ||||
| 
 | ||||
| Then vcpus can be started. | ||||
| 
 | ||||
|  ITS Table ABI REV0: | ||||
|  ------------------- | ||||
| ITS Table ABI REV0: | ||||
| ------------------- | ||||
| 
 | ||||
|  Revision 0 of the ABI only supports the features of a virtual GICv3, and does | ||||
|  not support a virtual GICv4 with support for direct injection of virtual | ||||
| @ -125,12 +150,13 @@ Then vcpus can be started. | ||||
|  entries in the collection are listed in no particular order. | ||||
|  All entries are 8 bytes. | ||||
| 
 | ||||
|  Device Table Entry (DTE): | ||||
|  Device Table Entry (DTE):: | ||||
| 
 | ||||
|  bits:     | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 | | ||||
|  values:   | V |   next    | ITT_addr |  Size   | | ||||
|    bits:     | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 | | ||||
|    values:   | V |   next    | ITT_addr |  Size   | | ||||
| 
 | ||||
|  where: | ||||
| 
 | ||||
|  where; | ||||
|  - V indicates whether the entry is valid. If not, other fields | ||||
|    are not meaningful. | ||||
|  - next: equals to 0 if this entry is the last one; otherwise it | ||||
| @ -140,32 +166,34 @@ Then vcpus can be started. | ||||
|  - Size specifies the supported number of bits for the EventID, | ||||
|    minus one | ||||
| 
 | ||||
|  Collection Table Entry (CTE): | ||||
|  Collection Table Entry (CTE):: | ||||
| 
 | ||||
|  bits:     | 63| 62 ..  52  | 51 ... 16 | 15  ...   0 | | ||||
|  values:   | V |    RES0    |  RDBase   |    ICID     | | ||||
|    bits:     | 63| 62 ..  52  | 51 ... 16 | 15  ...   0 | | ||||
|    values:   | V |    RES0    |  RDBase   |    ICID     | | ||||
| 
 | ||||
|  where: | ||||
| 
 | ||||
|  - V indicates whether the entry is valid. If not, other fields are | ||||
|    not meaningful. | ||||
|  - RES0: reserved field with Should-Be-Zero-or-Preserved behavior. | ||||
|  - RDBase is the PE number (GICR_TYPER.Processor_Number semantic), | ||||
|  - ICID is the collection ID | ||||
| 
 | ||||
|  Interrupt Translation Entry (ITE): | ||||
|  Interrupt Translation Entry (ITE):: | ||||
| 
 | ||||
|  bits:     | 63 ... 48 | 47 ... 16 | 15 ... 0 | | ||||
|  values:   |    next   |   pINTID  |  ICID    | | ||||
|    bits:     | 63 ... 48 | 47 ... 16 | 15 ... 0 | | ||||
|    values:   |    next   |   pINTID  |  ICID    | | ||||
| 
 | ||||
|  where: | ||||
| 
 | ||||
|  - next: equals to 0 if this entry is the last one; otherwise it corresponds | ||||
|    to the EventID offset to the next ITE capped by 2^16 -1. | ||||
|  - pINTID is the physical LPI ID; if zero, it means the entry is not valid | ||||
|    and other fields are not meaningful. | ||||
|  - ICID is the collection ID | ||||
| 
 | ||||
|  ITS Reset State: | ||||
|  ---------------- | ||||
| ITS Reset State: | ||||
| ---------------- | ||||
| 
 | ||||
| RESET returns the ITS to the same state that it was when first created and | ||||
| initialized. When the RESET command returns, the following things are | ||||
| @ -1,9 +1,12 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ============================================================== | ||||
| ARM Virtual Generic Interrupt Controller v3 and later (VGICv3) | ||||
| ============================================================== | ||||
| 
 | ||||
| 
 | ||||
| Device types supported: | ||||
|   KVM_DEV_TYPE_ARM_VGIC_V3     ARM Generic Interrupt Controller v3.0 | ||||
|   - KVM_DEV_TYPE_ARM_VGIC_V3     ARM Generic Interrupt Controller v3.0 | ||||
| 
 | ||||
| Only one VGIC instance may be instantiated through this API.  The created VGIC | ||||
| will act as the VM interrupt controller, requiring emulated user-space devices | ||||
| @ -15,7 +18,8 @@ Creating a guest GICv3 device requires a host GICv3 as well. | ||||
| 
 | ||||
| Groups: | ||||
|   KVM_DEV_ARM_VGIC_GRP_ADDR | ||||
|   Attributes: | ||||
|    Attributes: | ||||
| 
 | ||||
|     KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit) | ||||
|       Base address in the guest physical address space of the GICv3 distributor | ||||
|       register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. | ||||
| @ -29,21 +33,25 @@ Groups: | ||||
|       This address needs to be 64K aligned. | ||||
| 
 | ||||
|     KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit) | ||||
|       The attribute data pointed to by kvm_device_attr.addr is a __u64 value: | ||||
|       bits:     | 63   ....  52  |  51   ....   16 | 15 - 12  |11 - 0 | ||||
|       values:   |     count      |       base      |  flags   | index | ||||
|       The attribute data pointed to by kvm_device_attr.addr is a __u64 value:: | ||||
| 
 | ||||
|         bits:     | 63   ....  52  |  51   ....   16 | 15 - 12  |11 - 0 | ||||
|         values:   |     count      |       base      |  flags   | index | ||||
| 
 | ||||
|       - index encodes the unique redistributor region index | ||||
|       - flags: reserved for future use, currently 0 | ||||
|       - base field encodes bits [51:16] of the guest physical base address | ||||
|         of the first redistributor in the region. | ||||
|       - count encodes the number of redistributors in the region. Must be | ||||
|         greater than 0. | ||||
| 
 | ||||
|       There are two 64K pages for each redistributor in the region and | ||||
|       redistributors are laid out contiguously within the region. Regions | ||||
|       are filled with redistributors in the index order. The sum of all | ||||
|       region count fields must be greater than or equal to the number of | ||||
|       VCPUs. Redistributor regions must be registered in the incremental | ||||
|       index order, starting from index 0. | ||||
| 
 | ||||
|       The characteristics of a specific redistributor region can be read | ||||
|       by presetting the index field in the attr data. | ||||
|       Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. | ||||
| @ -52,23 +60,27 @@ Groups: | ||||
|   KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes. | ||||
| 
 | ||||
|   Errors: | ||||
|     -E2BIG:  Address outside of addressable IPA range | ||||
|     -EINVAL: Incorrectly aligned address, bad redistributor region | ||||
| 
 | ||||
|     =======  ============================================================= | ||||
|     -E2BIG   Address outside of addressable IPA range | ||||
|     -EINVAL  Incorrectly aligned address, bad redistributor region | ||||
|              count/index, mixed redistributor region attribute usage | ||||
|     -EEXIST: Address already configured | ||||
|     -ENOENT: Attempt to read the characteristics of a non existing | ||||
|     -EEXIST  Address already configured | ||||
|     -ENOENT  Attempt to read the characteristics of a non existing | ||||
|              redistributor region | ||||
|     -ENXIO:  The group or attribute is unknown/unsupported for this device | ||||
|     -ENXIO   The group or attribute is unknown/unsupported for this device | ||||
|              or hardware support is missing. | ||||
|     -EFAULT: Invalid user pointer for attr->addr. | ||||
|     -EFAULT  Invalid user pointer for attr->addr. | ||||
|     =======  ============================================================= | ||||
| 
 | ||||
| 
 | ||||
|   KVM_DEV_ARM_VGIC_GRP_DIST_REGS | ||||
|   KVM_DEV_ARM_VGIC_GRP_REDIST_REGS | ||||
|   Attributes: | ||||
|     The attr field of kvm_device_attr encodes two values: | ||||
|     bits:     | 63   ....  32  |  31   ....    0 | | ||||
|     values:   |      mpidr     |      offset     | | ||||
|   KVM_DEV_ARM_VGIC_GRP_DIST_REGS, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS | ||||
|    Attributes: | ||||
| 
 | ||||
|     The attr field of kvm_device_attr encodes two values:: | ||||
| 
 | ||||
|       bits:     | 63   ....  32  |  31   ....    0 | | ||||
|       values:   |      mpidr     |      offset     | | ||||
| 
 | ||||
|     All distributor regs are (rw, 32-bit) and kvm_device_attr.addr points to a | ||||
|     __u32 value.  64-bit registers must be accessed by separately accessing the | ||||
| @ -93,7 +105,8 @@ Groups: | ||||
|     redistributor is accessed.  The mpidr is ignored for the distributor. | ||||
| 
 | ||||
|     The mpidr encoding is based on the affinity information in the | ||||
|     architecture defined MPIDR, and the field is encoded as follows: | ||||
|     architecture defined MPIDR, and the field is encoded as follows:: | ||||
| 
 | ||||
|       | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 | | ||||
|       |    Aff3    |    Aff2    |    Aff1    |    Aff0    | | ||||
| 
 | ||||
| @ -148,24 +161,30 @@ Groups: | ||||
|     ignored. | ||||
| 
 | ||||
|   Errors: | ||||
|     -ENXIO: Getting or setting this register is not yet supported | ||||
|     -EBUSY: One or more VCPUs are running | ||||
| 
 | ||||
|     ======  ===================================================== | ||||
|     -ENXIO  Getting or setting this register is not yet supported | ||||
|     -EBUSY  One or more VCPUs are running | ||||
|     ======  ===================================================== | ||||
| 
 | ||||
| 
 | ||||
|   KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS | ||||
|   Attributes: | ||||
|     The attr field of kvm_device_attr encodes two values: | ||||
|     bits:     | 63      ....       32 | 31  ....  16 | 15  ....  0 | | ||||
|     values:   |         mpidr         |      RES     |    instr    | | ||||
|    Attributes: | ||||
| 
 | ||||
|     The attr field of kvm_device_attr encodes two values:: | ||||
| 
 | ||||
|       bits:     | 63      ....       32 | 31  ....  16 | 15  ....  0 | | ||||
|       values:   |         mpidr         |      RES     |    instr    | | ||||
| 
 | ||||
|     The mpidr field encodes the CPU ID based on the affinity information in the | ||||
|     architecture defined MPIDR, and the field is encoded as follows: | ||||
|     architecture defined MPIDR, and the field is encoded as follows:: | ||||
| 
 | ||||
|       | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 | | ||||
|       |    Aff3    |    Aff2    |    Aff1    |    Aff0    | | ||||
| 
 | ||||
|     The instr field encodes the system register to access based on the fields | ||||
|     defined in the A64 instruction set encoding for system register access | ||||
|     (RES means the bits are reserved for future use and should be zero): | ||||
|     (RES means the bits are reserved for future use and should be zero):: | ||||
| 
 | ||||
|       | 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 | | ||||
|       |   Op 0    |    Op1    |    CRn   |   CRm   |   Op2   | | ||||
| @ -178,26 +197,35 @@ Groups: | ||||
| 
 | ||||
|     CPU interface registers access is not implemented for AArch32 mode. | ||||
|     Error -ENXIO is returned when accessed in AArch32 mode. | ||||
| 
 | ||||
|   Errors: | ||||
|     -ENXIO: Getting or setting this register is not yet supported | ||||
|     -EBUSY: VCPU is running | ||||
|     -EINVAL: Invalid mpidr or register value supplied | ||||
| 
 | ||||
|     =======  ===================================================== | ||||
|     -ENXIO   Getting or setting this register is not yet supported | ||||
|     -EBUSY   VCPU is running | ||||
|     -EINVAL  Invalid mpidr or register value supplied | ||||
|     =======  ===================================================== | ||||
| 
 | ||||
| 
 | ||||
|   KVM_DEV_ARM_VGIC_GRP_NR_IRQS | ||||
|   Attributes: | ||||
|    Attributes: | ||||
| 
 | ||||
|     A value describing the number of interrupts (SGI, PPI and SPI) for | ||||
|     this GIC instance, ranging from 64 to 1024, in increments of 32. | ||||
| 
 | ||||
|     kvm_device_attr.addr points to a __u32 value. | ||||
| 
 | ||||
|   Errors: | ||||
|     -EINVAL: Value set is out of the expected range | ||||
|     -EBUSY: Value has already be set. | ||||
| 
 | ||||
|     =======  ====================================== | ||||
|     -EINVAL  Value set is out of the expected range | ||||
|     -EBUSY   Value has already be set. | ||||
|     =======  ====================================== | ||||
| 
 | ||||
| 
 | ||||
|   KVM_DEV_ARM_VGIC_GRP_CTRL | ||||
|   Attributes: | ||||
|    Attributes: | ||||
| 
 | ||||
|     KVM_DEV_ARM_VGIC_CTRL_INIT | ||||
|       request the initialization of the VGIC, no additional parameter in | ||||
|       kvm_device_attr.addr. | ||||
| @ -205,20 +233,26 @@ Groups: | ||||
|       save all LPI pending bits into guest RAM pending tables. | ||||
| 
 | ||||
|       The first kB of the pending table is not altered by this operation. | ||||
| 
 | ||||
|   Errors: | ||||
|     -ENXIO: VGIC not properly configured as required prior to calling | ||||
|      this attribute | ||||
|     -ENODEV: no online VCPU | ||||
|     -ENOMEM: memory shortage when allocating vgic internal data | ||||
|     -EFAULT: Invalid guest ram access | ||||
|     -EBUSY:  One or more VCPUS are running | ||||
| 
 | ||||
|     =======  ======================================================== | ||||
|     -ENXIO   VGIC not properly configured as required prior to calling | ||||
|              this attribute | ||||
|     -ENODEV  no online VCPU | ||||
|     -ENOMEM  memory shortage when allocating vgic internal data | ||||
|     -EFAULT  Invalid guest ram access | ||||
|     -EBUSY   One or more VCPUS are running | ||||
|     =======  ======================================================== | ||||
| 
 | ||||
| 
 | ||||
|   KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO | ||||
|   Attributes: | ||||
|     The attr field of kvm_device_attr encodes the following values: | ||||
|     bits:     | 63      ....       32 | 31   ....    10 | 9  ....  0 | | ||||
|     values:   |         mpidr         |      info       |   vINTID   | | ||||
|    Attributes: | ||||
| 
 | ||||
|     The attr field of kvm_device_attr encodes the following values:: | ||||
| 
 | ||||
|       bits:     | 63      ....       32 | 31   ....    10 | 9  ....  0 | | ||||
|       values:   |         mpidr         |      info       |   vINTID   | | ||||
| 
 | ||||
|     The vINTID specifies which set of IRQs is reported on. | ||||
| 
 | ||||
| @ -228,6 +262,7 @@ Groups: | ||||
|       VGIC_LEVEL_INFO_LINE_LEVEL: | ||||
| 	Get/Set the input level of the IRQ line for a set of 32 contiguously | ||||
| 	numbered interrupts. | ||||
| 
 | ||||
| 	vINTID must be a multiple of 32. | ||||
| 
 | ||||
| 	kvm_device_attr.addr points to a __u32 value which will contain a | ||||
| @ -243,9 +278,14 @@ Groups: | ||||
|     reported with the same value regardless of the mpidr specified. | ||||
| 
 | ||||
|     The mpidr field encodes the CPU ID based on the affinity information in the | ||||
|     architecture defined MPIDR, and the field is encoded as follows: | ||||
|     architecture defined MPIDR, and the field is encoded as follows:: | ||||
| 
 | ||||
|       | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 | | ||||
|       |    Aff3    |    Aff2    |    Aff1    |    Aff0    | | ||||
| 
 | ||||
|   Errors: | ||||
|     -EINVAL: vINTID is not multiple of 32 or | ||||
|      info field is not VGIC_LEVEL_INFO_LINE_LEVEL | ||||
| 
 | ||||
|     =======  ============================================= | ||||
|     -EINVAL  vINTID is not multiple of 32 or info field is | ||||
| 	     not VGIC_LEVEL_INFO_LINE_LEVEL | ||||
|     =======  ============================================= | ||||
| @ -1,8 +1,12 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ================================================== | ||||
| ARM Virtual Generic Interrupt Controller v2 (VGIC) | ||||
| ================================================== | ||||
| 
 | ||||
| Device types supported: | ||||
|   KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0 | ||||
| 
 | ||||
|   - KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0 | ||||
| 
 | ||||
| Only one VGIC instance may be instantiated through either this API or the | ||||
| legacy KVM_CREATE_IRQCHIP API.  The created VGIC will act as the VM interrupt | ||||
| @ -17,7 +21,8 @@ create both a GICv3 and GICv2 device on the same VM. | ||||
| 
 | ||||
| Groups: | ||||
|   KVM_DEV_ARM_VGIC_GRP_ADDR | ||||
|   Attributes: | ||||
|    Attributes: | ||||
| 
 | ||||
|     KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit) | ||||
|       Base address in the guest physical address space of the GIC distributor | ||||
|       register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2. | ||||
| @ -27,19 +32,25 @@ Groups: | ||||
|       Base address in the guest physical address space of the GIC virtual cpu | ||||
|       interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2. | ||||
|       This address needs to be 4K aligned and the region covers 4 KByte. | ||||
| 
 | ||||
|   Errors: | ||||
|     -E2BIG:  Address outside of addressable IPA range | ||||
|     -EINVAL: Incorrectly aligned address | ||||
|     -EEXIST: Address already configured | ||||
|     -ENXIO:  The group or attribute is unknown/unsupported for this device | ||||
| 
 | ||||
|     =======  ============================================================= | ||||
|     -E2BIG   Address outside of addressable IPA range | ||||
|     -EINVAL  Incorrectly aligned address | ||||
|     -EEXIST  Address already configured | ||||
|     -ENXIO   The group or attribute is unknown/unsupported for this device | ||||
|              or hardware support is missing. | ||||
|     -EFAULT: Invalid user pointer for attr->addr. | ||||
|     -EFAULT  Invalid user pointer for attr->addr. | ||||
|     =======  ============================================================= | ||||
| 
 | ||||
|   KVM_DEV_ARM_VGIC_GRP_DIST_REGS | ||||
|   Attributes: | ||||
|     The attr field of kvm_device_attr encodes two values: | ||||
|     bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 | | ||||
|     values:   |    reserved   | vcpu_index |      offset     | | ||||
|    Attributes: | ||||
| 
 | ||||
|     The attr field of kvm_device_attr encodes two values:: | ||||
| 
 | ||||
|       bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 | | ||||
|       values:   |    reserved   | vcpu_index |      offset     | | ||||
| 
 | ||||
|     All distributor regs are (rw, 32-bit) | ||||
| 
 | ||||
| @ -58,16 +69,22 @@ Groups: | ||||
|     KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure | ||||
|     the expected behavior. Unless GICD_IIDR has been set from userspace, writes | ||||
|     to the interrupt group registers (GICD_IGROUPR) are ignored. | ||||
| 
 | ||||
|   Errors: | ||||
|     -ENXIO: Getting or setting this register is not yet supported | ||||
|     -EBUSY: One or more VCPUs are running | ||||
|     -EINVAL: Invalid vcpu_index supplied | ||||
| 
 | ||||
|     =======  ===================================================== | ||||
|     -ENXIO   Getting or setting this register is not yet supported | ||||
|     -EBUSY   One or more VCPUs are running | ||||
|     -EINVAL  Invalid vcpu_index supplied | ||||
|     =======  ===================================================== | ||||
| 
 | ||||
|   KVM_DEV_ARM_VGIC_GRP_CPU_REGS | ||||
|   Attributes: | ||||
|     The attr field of kvm_device_attr encodes two values: | ||||
|     bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 | | ||||
|     values:   |    reserved   | vcpu_index |      offset     | | ||||
|    Attributes: | ||||
| 
 | ||||
|     The attr field of kvm_device_attr encodes two values:: | ||||
| 
 | ||||
|       bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 | | ||||
|       values:   |    reserved   | vcpu_index |      offset     | | ||||
| 
 | ||||
|     All CPU interface regs are (rw, 32-bit) | ||||
| 
 | ||||
| @ -101,27 +118,39 @@ Groups: | ||||
|     value left by 3 places to obtain the actual priority mask level. | ||||
| 
 | ||||
|   Errors: | ||||
|     -ENXIO: Getting or setting this register is not yet supported | ||||
|     -EBUSY: One or more VCPUs are running | ||||
|     -EINVAL: Invalid vcpu_index supplied | ||||
| 
 | ||||
|     =======  ===================================================== | ||||
|     -ENXIO   Getting or setting this register is not yet supported | ||||
|     -EBUSY   One or more VCPUs are running | ||||
|     -EINVAL  Invalid vcpu_index supplied | ||||
|     =======  ===================================================== | ||||
| 
 | ||||
|   KVM_DEV_ARM_VGIC_GRP_NR_IRQS | ||||
|   Attributes: | ||||
|    Attributes: | ||||
| 
 | ||||
|     A value describing the number of interrupts (SGI, PPI and SPI) for | ||||
|     this GIC instance, ranging from 64 to 1024, in increments of 32. | ||||
| 
 | ||||
|   Errors: | ||||
|     -EINVAL: Value set is out of the expected range | ||||
|     -EBUSY: Value has already be set, or GIC has already been initialized | ||||
|             with default values. | ||||
| 
 | ||||
|     =======  ============================================================= | ||||
|     -EINVAL  Value set is out of the expected range | ||||
|     -EBUSY   Value has already be set, or GIC has already been initialized | ||||
|              with default values. | ||||
|     =======  ============================================================= | ||||
| 
 | ||||
|   KVM_DEV_ARM_VGIC_GRP_CTRL | ||||
|   Attributes: | ||||
|    Attributes: | ||||
| 
 | ||||
|     KVM_DEV_ARM_VGIC_CTRL_INIT | ||||
|       request the initialization of the VGIC or ITS, no additional parameter | ||||
|       in kvm_device_attr.addr. | ||||
| 
 | ||||
|   Errors: | ||||
|     -ENXIO: VGIC not properly configured as required prior to calling | ||||
|      this attribute | ||||
|     -ENODEV: no online VCPU | ||||
|     -ENOMEM: memory shortage when allocating vgic internal data | ||||
| 
 | ||||
|     =======  ========================================================= | ||||
|     -ENXIO   VGIC not properly configured as required prior to calling | ||||
|              this attribute | ||||
|     -ENODEV  no online VCPU | ||||
|     -ENOMEM  memory shortage when allocating vgic internal data | ||||
|     =======  ========================================================= | ||||
							
								
								
									
										19
									
								
								Documentation/virt/kvm/devices/index.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								Documentation/virt/kvm/devices/index.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,19 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ======= | ||||
| Devices | ||||
| ======= | ||||
| 
 | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
| 
 | ||||
|    arm-vgic-its | ||||
|    arm-vgic | ||||
|    arm-vgic-v3 | ||||
|    mpic | ||||
|    s390_flic | ||||
|    vcpu | ||||
|    vfio | ||||
|    vm | ||||
|    xics | ||||
|    xive | ||||
| @ -1,9 +1,13 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ========================= | ||||
| MPIC interrupt controller | ||||
| ========================= | ||||
| 
 | ||||
| Device types supported: | ||||
|   KVM_DEV_TYPE_FSL_MPIC_20     Freescale MPIC v2.0 | ||||
|   KVM_DEV_TYPE_FSL_MPIC_42     Freescale MPIC v4.2 | ||||
| 
 | ||||
|   - KVM_DEV_TYPE_FSL_MPIC_20     Freescale MPIC v2.0 | ||||
|   - KVM_DEV_TYPE_FSL_MPIC_42     Freescale MPIC v4.2 | ||||
| 
 | ||||
| Only one MPIC instance, of any type, may be instantiated.  The created | ||||
| MPIC will act as the system interrupt controller, connecting to each | ||||
| @ -11,7 +15,8 @@ vcpu's interrupt inputs. | ||||
| 
 | ||||
| Groups: | ||||
|   KVM_DEV_MPIC_GRP_MISC | ||||
|   Attributes: | ||||
|    Attributes: | ||||
| 
 | ||||
|     KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit) | ||||
|       Base address of the 256 KiB MPIC register space.  Must be | ||||
|       naturally aligned.  A value of zero disables the mapping. | ||||
| @ -1,3 +1,6 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ==================================== | ||||
| FLIC (floating interrupt controller) | ||||
| ==================================== | ||||
| 
 | ||||
| @ -31,8 +34,10 @@ Groups: | ||||
|     Copies all floating interrupts into a buffer provided by userspace. | ||||
|     When the buffer is too small it returns -ENOMEM, which is the indication | ||||
|     for userspace to try again with a bigger buffer. | ||||
| 
 | ||||
|     -ENOBUFS is returned when the allocation of a kernelspace buffer has | ||||
|     failed. | ||||
| 
 | ||||
|     -EFAULT is returned when copying data to userspace failed. | ||||
|     All interrupts remain pending, i.e. are not deleted from the list of | ||||
|     currently pending interrupts. | ||||
| @ -60,38 +65,41 @@ Groups: | ||||
| 
 | ||||
|   KVM_DEV_FLIC_ADAPTER_REGISTER | ||||
|     Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter | ||||
|     describing the adapter to register: | ||||
|     describing the adapter to register:: | ||||
| 
 | ||||
| struct kvm_s390_io_adapter { | ||||
| 	__u32 id; | ||||
| 	__u8 isc; | ||||
| 	__u8 maskable; | ||||
| 	__u8 swap; | ||||
| 	__u8 flags; | ||||
| }; | ||||
| 	struct kvm_s390_io_adapter { | ||||
| 		__u32 id; | ||||
| 		__u8 isc; | ||||
| 		__u8 maskable; | ||||
| 		__u8 swap; | ||||
| 		__u8 flags; | ||||
| 	}; | ||||
| 
 | ||||
|    id contains the unique id for the adapter, isc the I/O interruption subclass | ||||
|    to use, maskable whether this adapter may be masked (interrupts turned off), | ||||
|    swap whether the indicators need to be byte swapped, and flags contains | ||||
|    further characteristics of the adapter. | ||||
| 
 | ||||
|    Currently defined values for 'flags' are: | ||||
| 
 | ||||
|    - KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS | ||||
|      (adapter-interrupt-suppression) facility. This flag only has an effect if | ||||
|      the AIS capability is enabled. | ||||
| 
 | ||||
|    Unknown flag values are ignored. | ||||
| 
 | ||||
| 
 | ||||
|   KVM_DEV_FLIC_ADAPTER_MODIFY | ||||
|     Modifies attributes of an existing I/O adapter interrupt source. Takes | ||||
|     a kvm_s390_io_adapter_req specifying the adapter and the operation: | ||||
|     a kvm_s390_io_adapter_req specifying the adapter and the operation:: | ||||
| 
 | ||||
| struct kvm_s390_io_adapter_req { | ||||
| 	__u32 id; | ||||
| 	__u8 type; | ||||
| 	__u8 mask; | ||||
| 	__u16 pad0; | ||||
| 	__u64 addr; | ||||
| }; | ||||
| 	struct kvm_s390_io_adapter_req { | ||||
| 		__u32 id; | ||||
| 		__u8 type; | ||||
| 		__u8 mask; | ||||
| 		__u16 pad0; | ||||
| 		__u64 addr; | ||||
| 	}; | ||||
| 
 | ||||
|     id specifies the adapter and type the operation. The supported operations | ||||
|     are: | ||||
| @ -103,8 +111,9 @@ struct kvm_s390_io_adapter_req { | ||||
|       perform a gmap translation for the guest address provided in addr, | ||||
|       pin a userspace page for the translated address and add it to the | ||||
|       list of mappings | ||||
|       Note: A new mapping will be created unconditionally; therefore, | ||||
|             the calling code should avoid making duplicate mappings. | ||||
| 
 | ||||
|       .. note:: A new mapping will be created unconditionally; therefore, | ||||
| 	        the calling code should avoid making duplicate mappings. | ||||
| 
 | ||||
|     KVM_S390_IO_ADAPTER_UNMAP | ||||
|       release a userspace page for the translated address specified in addr | ||||
| @ -112,16 +121,17 @@ struct kvm_s390_io_adapter_req { | ||||
| 
 | ||||
|   KVM_DEV_FLIC_AISM | ||||
|     modify the adapter-interruption-suppression mode for a given isc if the | ||||
|     AIS capability is enabled. Takes a kvm_s390_ais_req describing: | ||||
|     AIS capability is enabled. Takes a kvm_s390_ais_req describing:: | ||||
| 
 | ||||
| struct kvm_s390_ais_req { | ||||
| 	__u8 isc; | ||||
| 	__u16 mode; | ||||
| }; | ||||
| 	struct kvm_s390_ais_req { | ||||
| 		__u8 isc; | ||||
| 		__u16 mode; | ||||
| 	}; | ||||
| 
 | ||||
|     isc contains the target I/O interruption subclass, mode the target | ||||
|     adapter-interruption-suppression mode. The following modes are | ||||
|     currently supported: | ||||
| 
 | ||||
|     - KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection | ||||
|       is always allowed; | ||||
|     - KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq | ||||
| @ -139,12 +149,12 @@ struct kvm_s390_ais_req { | ||||
| 
 | ||||
|   KVM_DEV_FLIC_AISM_ALL | ||||
|     Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes | ||||
|     a kvm_s390_ais_all describing: | ||||
|     a kvm_s390_ais_all describing:: | ||||
| 
 | ||||
| struct kvm_s390_ais_all { | ||||
|        __u8 simm; /* Single-Interruption-Mode mask */ | ||||
|        __u8 nimm; /* No-Interruption-Mode mask * | ||||
| }; | ||||
| 	struct kvm_s390_ais_all { | ||||
| 	       __u8 simm; /* Single-Interruption-Mode mask */ | ||||
| 	       __u8 nimm; /* No-Interruption-Mode mask * | ||||
| 	}; | ||||
| 
 | ||||
|     simm contains Single-Interruption-Mode mask for all ISCs, nimm contains | ||||
|     No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds | ||||
| @ -159,5 +169,5 @@ ENXIO, as specified in the API documentation). It is not possible to conclude | ||||
| that a FLIC operation is unavailable based on the error code resulting from a | ||||
| usage attempt. | ||||
| 
 | ||||
| Note: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a zero | ||||
| schid is specified. | ||||
| .. note:: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a | ||||
| 	  zero schid is specified. | ||||
							
								
								
									
										114
									
								
								Documentation/virt/kvm/devices/vcpu.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								Documentation/virt/kvm/devices/vcpu.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,114 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ====================== | ||||
| Generic vcpu interface | ||||
| ====================== | ||||
| 
 | ||||
| The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR, | ||||
| KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct | ||||
| kvm_device_attr as other devices, but targets VCPU-wide settings and controls. | ||||
| 
 | ||||
| The groups and attributes per virtual cpu, if any, are architecture specific. | ||||
| 
 | ||||
| 1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL | ||||
| ================================== | ||||
| 
 | ||||
| :Architectures: ARM64 | ||||
| 
 | ||||
| 1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ | ||||
| --------------------------------------- | ||||
| 
 | ||||
| :Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a | ||||
| 	     pointer to an int | ||||
| 
 | ||||
| Returns: | ||||
| 
 | ||||
| 	 =======  ======================================================== | ||||
| 	 -EBUSY   The PMU overflow interrupt is already set | ||||
| 	 -ENXIO   The overflow interrupt not set when attempting to get it | ||||
| 	 -ENODEV  PMUv3 not supported | ||||
| 	 -EINVAL  Invalid PMU overflow interrupt number supplied or | ||||
| 		  trying to set the IRQ number without using an in-kernel | ||||
| 		  irqchip. | ||||
| 	 =======  ======================================================== | ||||
| 
 | ||||
| A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt | ||||
| number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt | ||||
| type must be same for each vcpu. As a PPI, the interrupt number is the same for | ||||
| all vcpus, while as an SPI it must be a separate number per vcpu. | ||||
| 
 | ||||
| 1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT | ||||
| --------------------------------------- | ||||
| 
 | ||||
| :Parameters: no additional parameter in kvm_device_attr.addr | ||||
| 
 | ||||
| Returns: | ||||
| 
 | ||||
| 	 =======  ====================================================== | ||||
| 	 -ENODEV  PMUv3 not supported or GIC not initialized | ||||
| 	 -ENXIO   PMUv3 not properly configured or in-kernel irqchip not | ||||
| 		  configured as required prior to calling this attribute | ||||
| 	 -EBUSY   PMUv3 already initialized | ||||
| 	 =======  ====================================================== | ||||
| 
 | ||||
| Request the initialization of the PMUv3.  If using the PMUv3 with an in-kernel | ||||
| virtual GIC implementation, this must be done after initializing the in-kernel | ||||
| irqchip. | ||||
| 
 | ||||
| 
 | ||||
| 2. GROUP: KVM_ARM_VCPU_TIMER_CTRL | ||||
| ================================= | ||||
| 
 | ||||
| :Architectures: ARM, ARM64 | ||||
| 
 | ||||
| 2.1. ATTRIBUTES: KVM_ARM_VCPU_TIMER_IRQ_VTIMER, KVM_ARM_VCPU_TIMER_IRQ_PTIMER | ||||
| ----------------------------------------------------------------------------- | ||||
| 
 | ||||
| :Parameters: in kvm_device_attr.addr the address for the timer interrupt is a | ||||
| 	     pointer to an int | ||||
| 
 | ||||
| Returns: | ||||
| 
 | ||||
| 	 =======  ================================= | ||||
| 	 -EINVAL  Invalid timer interrupt number | ||||
| 	 -EBUSY   One or more VCPUs has already run | ||||
| 	 =======  ================================= | ||||
| 
 | ||||
| A value describing the architected timer interrupt number when connected to an | ||||
| in-kernel virtual GIC.  These must be a PPI (16 <= intid < 32).  Setting the | ||||
| attribute overrides the default values (see below). | ||||
| 
 | ||||
| =============================  ========================================== | ||||
| KVM_ARM_VCPU_TIMER_IRQ_VTIMER  The EL1 virtual timer intid (default: 27) | ||||
| KVM_ARM_VCPU_TIMER_IRQ_PTIMER  The EL1 physical timer intid (default: 30) | ||||
| =============================  ========================================== | ||||
| 
 | ||||
| Setting the same PPI for different timers will prevent the VCPUs from running. | ||||
| Setting the interrupt number on a VCPU configures all VCPUs created at that | ||||
| time to use the number provided for a given timer, overwriting any previously | ||||
| configured values on other VCPUs.  Userspace should configure the interrupt | ||||
| numbers on at least one VCPU after creating all VCPUs and before running any | ||||
| VCPUs. | ||||
| 
 | ||||
| 3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL | ||||
| ================================== | ||||
| 
 | ||||
| :Architectures: ARM64 | ||||
| 
 | ||||
| 3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA | ||||
| -------------------------------------- | ||||
| 
 | ||||
| :Parameters: 64-bit base address | ||||
| 
 | ||||
| Returns: | ||||
| 
 | ||||
| 	 =======  ====================================== | ||||
| 	 -ENXIO   Stolen time not implemented | ||||
| 	 -EEXIST  Base address already set for this VCPU | ||||
| 	 -EINVAL  Base address not 64 byte aligned | ||||
| 	 =======  ====================================== | ||||
| 
 | ||||
| Specifies the base address of the stolen time structure for this VCPU. The | ||||
| base address must be 64 byte aligned and exist within a valid guest memory | ||||
| region. See Documentation/virt/kvm/arm/pvtime.txt for more information | ||||
| including the layout of the stolen time structure. | ||||
| @ -1,76 +0,0 @@ | ||||
| Generic vcpu interface | ||||
| ==================================== | ||||
| 
 | ||||
| The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR, | ||||
| KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct | ||||
| kvm_device_attr as other devices, but targets VCPU-wide settings and controls. | ||||
| 
 | ||||
| The groups and attributes per virtual cpu, if any, are architecture specific. | ||||
| 
 | ||||
| 1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL | ||||
| Architectures: ARM64 | ||||
| 
 | ||||
| 1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ | ||||
| Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a | ||||
|             pointer to an int | ||||
| Returns: -EBUSY: The PMU overflow interrupt is already set | ||||
|          -ENXIO: The overflow interrupt not set when attempting to get it | ||||
|          -ENODEV: PMUv3 not supported | ||||
|          -EINVAL: Invalid PMU overflow interrupt number supplied or | ||||
|                   trying to set the IRQ number without using an in-kernel | ||||
|                   irqchip. | ||||
| 
 | ||||
| A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt | ||||
| number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt | ||||
| type must be same for each vcpu. As a PPI, the interrupt number is the same for | ||||
| all vcpus, while as an SPI it must be a separate number per vcpu. | ||||
| 
 | ||||
| 1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT | ||||
| Parameters: no additional parameter in kvm_device_attr.addr | ||||
| Returns: -ENODEV: PMUv3 not supported or GIC not initialized | ||||
|          -ENXIO: PMUv3 not properly configured or in-kernel irqchip not | ||||
|                  configured as required prior to calling this attribute | ||||
|          -EBUSY: PMUv3 already initialized | ||||
| 
 | ||||
| Request the initialization of the PMUv3.  If using the PMUv3 with an in-kernel | ||||
| virtual GIC implementation, this must be done after initializing the in-kernel | ||||
| irqchip. | ||||
| 
 | ||||
| 
 | ||||
| 2. GROUP: KVM_ARM_VCPU_TIMER_CTRL | ||||
| Architectures: ARM,ARM64 | ||||
| 
 | ||||
| 2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER | ||||
| 2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER | ||||
| Parameters: in kvm_device_attr.addr the address for the timer interrupt is a | ||||
|             pointer to an int | ||||
| Returns: -EINVAL: Invalid timer interrupt number | ||||
|          -EBUSY:  One or more VCPUs has already run | ||||
| 
 | ||||
| A value describing the architected timer interrupt number when connected to an | ||||
| in-kernel virtual GIC.  These must be a PPI (16 <= intid < 32).  Setting the | ||||
| attribute overrides the default values (see below). | ||||
| 
 | ||||
| KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27) | ||||
| KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30) | ||||
| 
 | ||||
| Setting the same PPI for different timers will prevent the VCPUs from running. | ||||
| Setting the interrupt number on a VCPU configures all VCPUs created at that | ||||
| time to use the number provided for a given timer, overwriting any previously | ||||
| configured values on other VCPUs.  Userspace should configure the interrupt | ||||
| numbers on at least one VCPU after creating all VCPUs and before running any | ||||
| VCPUs. | ||||
| 
 | ||||
| 3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL | ||||
| Architectures: ARM64 | ||||
| 
 | ||||
| 3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA | ||||
| Parameters: 64-bit base address | ||||
| Returns: -ENXIO:  Stolen time not implemented | ||||
|          -EEXIST: Base address already set for this VCPU | ||||
|          -EINVAL: Base address not 64 byte aligned | ||||
| 
 | ||||
| Specifies the base address of the stolen time structure for this VCPU. The | ||||
| base address must be 64 byte aligned and exist within a valid guest memory | ||||
| region. See Documentation/virt/kvm/arm/pvtime.txt for more information | ||||
| including the layout of the stolen time structure. | ||||
| @ -1,8 +1,12 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| =================== | ||||
| VFIO virtual device | ||||
| =================== | ||||
| 
 | ||||
| Device types supported: | ||||
|   KVM_DEV_TYPE_VFIO | ||||
| 
 | ||||
|   - KVM_DEV_TYPE_VFIO | ||||
| 
 | ||||
| Only one VFIO instance may be created per VM.  The created device | ||||
| tracks VFIO groups in use by the VM and features of those groups | ||||
| @ -23,14 +27,15 @@ KVM_DEV_VFIO_GROUP attributes: | ||||
| 	for the VFIO group. | ||||
|   KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table | ||||
| 	allocated by sPAPR KVM. | ||||
| 	kvm_device_attr.addr points to a struct: | ||||
| 	kvm_device_attr.addr points to a struct:: | ||||
| 
 | ||||
| 	struct kvm_vfio_spapr_tce { | ||||
| 		__s32	groupfd; | ||||
| 		__s32	tablefd; | ||||
| 	}; | ||||
| 		struct kvm_vfio_spapr_tce { | ||||
| 			__s32	groupfd; | ||||
| 			__s32	tablefd; | ||||
| 		}; | ||||
| 
 | ||||
| 	where | ||||
| 	@groupfd is a file descriptor for a VFIO group; | ||||
| 	@tablefd is a file descriptor for a TCE table allocated via | ||||
| 		KVM_CREATE_SPAPR_TCE. | ||||
| 	where: | ||||
| 
 | ||||
| 	- @groupfd is a file descriptor for a VFIO group; | ||||
| 	- @tablefd is a file descriptor for a TCE table allocated via | ||||
| 	  KVM_CREATE_SPAPR_TCE. | ||||
| @ -1,5 +1,8 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ==================== | ||||
| Generic vm interface | ||||
| ==================================== | ||||
| ==================== | ||||
| 
 | ||||
| The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR, | ||||
| KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same | ||||
| @ -10,30 +13,38 @@ The groups and attributes per virtual machine, if any, are architecture | ||||
| specific. | ||||
| 
 | ||||
| 1. GROUP: KVM_S390_VM_MEM_CTRL | ||||
| Architectures: s390 | ||||
| ============================== | ||||
| 
 | ||||
| :Architectures: s390 | ||||
| 
 | ||||
| 1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA | ||||
| Parameters: none | ||||
| Returns: -EBUSY if a vcpu is already defined, otherwise 0 | ||||
| ------------------------------------------- | ||||
| 
 | ||||
| :Parameters: none | ||||
| :Returns: -EBUSY if a vcpu is already defined, otherwise 0 | ||||
| 
 | ||||
| Enables Collaborative Memory Management Assist (CMMA) for the virtual machine. | ||||
| 
 | ||||
| 1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA | ||||
| Parameters: none | ||||
| Returns: -EINVAL if CMMA was not enabled | ||||
|          0 otherwise | ||||
| ---------------------------------------- | ||||
| 
 | ||||
| :Parameters: none | ||||
| :Returns: -EINVAL if CMMA was not enabled; | ||||
| 	  0 otherwise | ||||
| 
 | ||||
| Clear the CMMA status for all guest pages, so any pages the guest marked | ||||
| as unused are again used any may not be reclaimed by the host. | ||||
| 
 | ||||
| 1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE | ||||
| Parameters: in attr->addr the address for the new limit of guest memory | ||||
| Returns: -EFAULT if the given address is not accessible | ||||
|          -EINVAL if the virtual machine is of type UCONTROL | ||||
|          -E2BIG if the given guest memory is to big for that machine | ||||
|          -EBUSY if a vcpu is already defined | ||||
|          -ENOMEM if not enough memory is available for a new shadow guest mapping | ||||
|           0 otherwise | ||||
| ----------------------------------------- | ||||
| 
 | ||||
| :Parameters: in attr->addr the address for the new limit of guest memory | ||||
| :Returns: -EFAULT if the given address is not accessible; | ||||
| 	  -EINVAL if the virtual machine is of type UCONTROL; | ||||
| 	  -E2BIG if the given guest memory is to big for that machine; | ||||
| 	  -EBUSY if a vcpu is already defined; | ||||
| 	  -ENOMEM if not enough memory is available for a new shadow guest mapping; | ||||
| 	  0 otherwise. | ||||
| 
 | ||||
| Allows userspace to query the actual limit and set a new limit for | ||||
| the maximum guest memory size. The limit will be rounded up to | ||||
| @ -42,78 +53,92 @@ the number of page table levels. In the case that there is no limit we will set | ||||
| the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX). | ||||
| 
 | ||||
| 2. GROUP: KVM_S390_VM_CPU_MODEL | ||||
| Architectures: s390 | ||||
| =============================== | ||||
| 
 | ||||
| :Architectures: s390 | ||||
| 
 | ||||
| 2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o) | ||||
| --------------------------------------------- | ||||
| 
 | ||||
| Allows user space to retrieve machine and kvm specific cpu related information: | ||||
| Allows user space to retrieve machine and kvm specific cpu related information:: | ||||
| 
 | ||||
| struct kvm_s390_vm_cpu_machine { | ||||
|   struct kvm_s390_vm_cpu_machine { | ||||
|        __u64 cpuid;           # CPUID of host | ||||
|        __u32 ibc;             # IBC level range offered by host | ||||
|        __u8  pad[4]; | ||||
|        __u64 fac_mask[256];   # set of cpu facilities enabled by KVM | ||||
|        __u64 fac_list[256];   # set of cpu facilities offered by host | ||||
| } | ||||
|   } | ||||
| 
 | ||||
| Parameters: address of buffer to store the machine related cpu data | ||||
|             of type struct kvm_s390_vm_cpu_machine* | ||||
| Returns:    -EFAULT if the given address is not accessible from kernel space | ||||
| 	    -ENOMEM if not enough memory is available to process the ioctl | ||||
| 	    0 in case of success | ||||
| :Parameters: address of buffer to store the machine related cpu data | ||||
| 	     of type struct kvm_s390_vm_cpu_machine* | ||||
| :Returns:   -EFAULT if the given address is not accessible from kernel space; | ||||
| 	    -ENOMEM if not enough memory is available to process the ioctl; | ||||
| 	    0 in case of success. | ||||
| 
 | ||||
| 2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w) | ||||
| =============================================== | ||||
| 
 | ||||
| Allows user space to retrieve or request to change cpu related information for a vcpu: | ||||
| Allows user space to retrieve or request to change cpu related information for a vcpu:: | ||||
| 
 | ||||
| struct kvm_s390_vm_cpu_processor { | ||||
|   struct kvm_s390_vm_cpu_processor { | ||||
|        __u64 cpuid;           # CPUID currently (to be) used by this vcpu | ||||
|        __u16 ibc;             # IBC level currently (to be) used by this vcpu | ||||
|        __u8  pad[6]; | ||||
|        __u64 fac_list[256];   # set of cpu facilities currently (to be) used | ||||
|                               # by this vcpu | ||||
| } | ||||
| 			      # by this vcpu | ||||
|   } | ||||
| 
 | ||||
| KVM does not enforce or limit the cpu model data in any form. Take the information | ||||
| retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration | ||||
| setups. Instruction interceptions triggered by additionally set facility bits that | ||||
| are not handled by KVM need to by imlemented in the VM driver code. | ||||
| 
 | ||||
| Parameters: address of buffer to store/set the processor related cpu | ||||
| 	    data of type struct kvm_s390_vm_cpu_processor*. | ||||
| Returns:    -EBUSY in case 1 or more vcpus are already activated (only in write case) | ||||
| 	    -EFAULT if the given address is not accessible from kernel space | ||||
| 	    -ENOMEM if not enough memory is available to process the ioctl | ||||
| 	    0 in case of success | ||||
| :Parameters: address of buffer to store/set the processor related cpu | ||||
| 	     data of type struct kvm_s390_vm_cpu_processor*. | ||||
| :Returns:  -EBUSY in case 1 or more vcpus are already activated (only in write case); | ||||
| 	   -EFAULT if the given address is not accessible from kernel space; | ||||
| 	   -ENOMEM if not enough memory is available to process the ioctl; | ||||
| 	   0 in case of success. | ||||
| 
 | ||||
| .. _KVM_S390_VM_CPU_MACHINE_FEAT: | ||||
| 
 | ||||
| 2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o) | ||||
| -------------------------------------------------- | ||||
| 
 | ||||
| Allows user space to retrieve available cpu features. A feature is available if | ||||
| provided by the hardware and supported by kvm. In theory, cpu features could | ||||
| even be completely emulated by kvm. | ||||
| 
 | ||||
| struct kvm_s390_vm_cpu_feat { | ||||
|         __u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering | ||||
| }; | ||||
| :: | ||||
| 
 | ||||
| Parameters: address of a buffer to load the feature list from. | ||||
| Returns:    -EFAULT if the given address is not accessible from kernel space. | ||||
| 	    0 in case of success. | ||||
|   struct kvm_s390_vm_cpu_feat { | ||||
| 	__u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering | ||||
|   }; | ||||
| 
 | ||||
| :Parameters: address of a buffer to load the feature list from. | ||||
| :Returns:  -EFAULT if the given address is not accessible from kernel space; | ||||
| 	   0 in case of success. | ||||
| 
 | ||||
| 2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w) | ||||
| ---------------------------------------------------- | ||||
| 
 | ||||
| Allows user space to retrieve or change enabled cpu features for all VCPUs of a | ||||
| VM. Features that are not available cannot be enabled. | ||||
| 
 | ||||
| See 2.3. for a description of the parameter struct. | ||||
| See :ref:`KVM_S390_VM_CPU_MACHINE_FEAT` for | ||||
| a description of the parameter struct. | ||||
| 
 | ||||
| Parameters: address of a buffer to store/load the feature list from. | ||||
| Returns:    -EFAULT if the given address is not accessible from kernel space. | ||||
| 	    -EINVAL if a cpu feature that is not available is to be enabled. | ||||
| 	    -EBUSY if at least one VCPU has already been defined. | ||||
| :Parameters: address of a buffer to store/load the feature list from. | ||||
| :Returns:   -EFAULT if the given address is not accessible from kernel space; | ||||
| 	    -EINVAL if a cpu feature that is not available is to be enabled; | ||||
| 	    -EBUSY if at least one VCPU has already been defined; | ||||
| 	    0 in case of success. | ||||
| 
 | ||||
| .. _KVM_S390_VM_CPU_MACHINE_SUBFUNC: | ||||
| 
 | ||||
| 2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o) | ||||
| ----------------------------------------------------- | ||||
| 
 | ||||
| Allows user space to retrieve available cpu subfunctions without any filtering | ||||
| done by a set IBC. These subfunctions are indicated to the guest VCPU via | ||||
| @ -126,7 +151,9 @@ contained in the returned struct. If the affected instruction | ||||
| indicates subfunctions via a "test bit" mechanism, the subfunction codes are | ||||
| contained in the returned struct in MSB 0 bit numbering. | ||||
| 
 | ||||
| struct kvm_s390_vm_cpu_subfunc { | ||||
| :: | ||||
| 
 | ||||
|   struct kvm_s390_vm_cpu_subfunc { | ||||
|        u8 plo[32];           # always valid (ESA/390 feature) | ||||
|        u8 ptff[16];          # valid with TOD-clock steering | ||||
|        u8 kmac[16];          # valid with Message-Security-Assist | ||||
| @ -143,13 +170,14 @@ struct kvm_s390_vm_cpu_subfunc { | ||||
|        u8 kma[16];           # valid with Message-Security-Assist-Extension 8 | ||||
|        u8 kdsa[16];          # valid with Message-Security-Assist-Extension 9 | ||||
|        u8 reserved[1792];    # reserved for future instructions | ||||
| }; | ||||
|   }; | ||||
| 
 | ||||
| Parameters: address of a buffer to load the subfunction blocks from. | ||||
| Returns:    -EFAULT if the given address is not accessible from kernel space. | ||||
| :Parameters: address of a buffer to load the subfunction blocks from. | ||||
| :Returns:   -EFAULT if the given address is not accessible from kernel space; | ||||
| 	    0 in case of success. | ||||
| 
 | ||||
| 2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w) | ||||
| ------------------------------------------------------- | ||||
| 
 | ||||
| Allows user space to retrieve or change cpu subfunctions to be indicated for | ||||
| all VCPUs of a VM. This attribute will only be available if kernel and | ||||
| @ -164,107 +192,125 @@ As long as no data has been written, a read will fail. The IBC will be used | ||||
| to determine available subfunctions in this case, this will guarantee backward | ||||
| compatibility. | ||||
| 
 | ||||
| See 2.5. for a description of the parameter struct. | ||||
| See :ref:`KVM_S390_VM_CPU_MACHINE_SUBFUNC` for a | ||||
| description of the parameter struct. | ||||
| 
 | ||||
| Parameters: address of a buffer to store/load the subfunction blocks from. | ||||
| Returns:    -EFAULT if the given address is not accessible from kernel space. | ||||
| 	    -EINVAL when reading, if there was no write yet. | ||||
| 	    -EBUSY if at least one VCPU has already been defined. | ||||
| :Parameters: address of a buffer to store/load the subfunction blocks from. | ||||
| :Returns:   -EFAULT if the given address is not accessible from kernel space; | ||||
| 	    -EINVAL when reading, if there was no write yet; | ||||
| 	    -EBUSY if at least one VCPU has already been defined; | ||||
| 	    0 in case of success. | ||||
| 
 | ||||
| 3. GROUP: KVM_S390_VM_TOD | ||||
| Architectures: s390 | ||||
| ========================= | ||||
| 
 | ||||
| :Architectures: s390 | ||||
| 
 | ||||
| 3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH | ||||
| ------------------------------------ | ||||
| 
 | ||||
| Allows user space to set/get the TOD clock extension (u8) (superseded by | ||||
| KVM_S390_VM_TOD_EXT). | ||||
| 
 | ||||
| Parameters: address of a buffer in user space to store the data (u8) to | ||||
| Returns:    -EFAULT if the given address is not accessible from kernel space | ||||
| :Parameters: address of a buffer in user space to store the data (u8) to | ||||
| :Returns:   -EFAULT if the given address is not accessible from kernel space; | ||||
| 	    -EINVAL if setting the TOD clock extension to != 0 is not supported | ||||
| 
 | ||||
| 3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW | ||||
| ----------------------------------- | ||||
| 
 | ||||
| Allows user space to set/get bits 0-63 of the TOD clock register as defined in | ||||
| the POP (u64). | ||||
| 
 | ||||
| Parameters: address of a buffer in user space to store the data (u64) to | ||||
| Returns:    -EFAULT if the given address is not accessible from kernel space | ||||
| :Parameters: address of a buffer in user space to store the data (u64) to | ||||
| :Returns:    -EFAULT if the given address is not accessible from kernel space | ||||
| 
 | ||||
| 3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT | ||||
| ----------------------------------- | ||||
| 
 | ||||
| Allows user space to set/get bits 0-63 of the TOD clock register as defined in | ||||
| the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it | ||||
| also allows user space to get/set it. If the guest CPU model does not support | ||||
| it, it is stored as 0 and not allowed to be set to a value != 0. | ||||
| 
 | ||||
| Parameters: address of a buffer in user space to store the data | ||||
|             (kvm_s390_vm_tod_clock) to | ||||
| Returns:    -EFAULT if the given address is not accessible from kernel space | ||||
| :Parameters: address of a buffer in user space to store the data | ||||
| 	     (kvm_s390_vm_tod_clock) to | ||||
| :Returns:   -EFAULT if the given address is not accessible from kernel space; | ||||
| 	    -EINVAL if setting the TOD clock extension to != 0 is not supported | ||||
| 
 | ||||
| 4. GROUP: KVM_S390_VM_CRYPTO | ||||
| Architectures: s390 | ||||
| ============================ | ||||
| 
 | ||||
| :Architectures: s390 | ||||
| 
 | ||||
| 4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o) | ||||
| ------------------------------------------------------ | ||||
| 
 | ||||
| Allows user space to enable aes key wrapping, including generating a new | ||||
| wrapping key. | ||||
| 
 | ||||
| Parameters: none | ||||
| Returns:    0 | ||||
| :Parameters: none | ||||
| :Returns:    0 | ||||
| 
 | ||||
| 4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o) | ||||
| ------------------------------------------------------ | ||||
| 
 | ||||
| Allows user space to enable dea key wrapping, including generating a new | ||||
| wrapping key. | ||||
| 
 | ||||
| Parameters: none | ||||
| Returns:    0 | ||||
| :Parameters: none | ||||
| :Returns:    0 | ||||
| 
 | ||||
| 4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o) | ||||
| ------------------------------------------------------- | ||||
| 
 | ||||
| Allows user space to disable aes key wrapping, clearing the wrapping key. | ||||
| 
 | ||||
| Parameters: none | ||||
| Returns:    0 | ||||
| :Parameters: none | ||||
| :Returns:    0 | ||||
| 
 | ||||
| 4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o) | ||||
| ------------------------------------------------------- | ||||
| 
 | ||||
| Allows user space to disable dea key wrapping, clearing the wrapping key. | ||||
| 
 | ||||
| Parameters: none | ||||
| Returns:    0 | ||||
| :Parameters: none | ||||
| :Returns:    0 | ||||
| 
 | ||||
| 5. GROUP: KVM_S390_VM_MIGRATION | ||||
| Architectures: s390 | ||||
| =============================== | ||||
| 
 | ||||
| :Architectures: s390 | ||||
| 
 | ||||
| 5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o) | ||||
| ------------------------------------------------ | ||||
| 
 | ||||
| Allows userspace to stop migration mode, needed for PGSTE migration. | ||||
| Setting this attribute when migration mode is not active will have no | ||||
| effects. | ||||
| 
 | ||||
| Parameters: none | ||||
| Returns:    0 | ||||
| :Parameters: none | ||||
| :Returns:    0 | ||||
| 
 | ||||
| 5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o) | ||||
| ------------------------------------------------- | ||||
| 
 | ||||
| Allows userspace to start migration mode, needed for PGSTE migration. | ||||
| Setting this attribute when migration mode is already active will have | ||||
| no effects. | ||||
| 
 | ||||
| Parameters: none | ||||
| Returns:    -ENOMEM if there is not enough free memory to start migration mode | ||||
| 	    -EINVAL if the state of the VM is invalid (e.g. no memory defined) | ||||
| :Parameters: none | ||||
| :Returns:   -ENOMEM if there is not enough free memory to start migration mode; | ||||
| 	    -EINVAL if the state of the VM is invalid (e.g. no memory defined); | ||||
| 	    0 in case of success. | ||||
| 
 | ||||
| 5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o) | ||||
| -------------------------------------------------- | ||||
| 
 | ||||
| Allows userspace to query the status of migration mode. | ||||
| 
 | ||||
| Parameters: address of a buffer in user space to store the data (u64) to; | ||||
| 	    the data itself is either 0 if migration mode is disabled or 1 | ||||
| 	    if it is enabled | ||||
| Returns:    -EFAULT if the given address is not accessible from kernel space | ||||
| :Parameters: address of a buffer in user space to store the data (u64) to; | ||||
| 	     the data itself is either 0 if migration mode is disabled or 1 | ||||
| 	     if it is enabled | ||||
| :Returns:   -EFAULT if the given address is not accessible from kernel space; | ||||
| 	    0 in case of success. | ||||
| @ -1,20 +1,31 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ========================= | ||||
| XICS interrupt controller | ||||
| ========================= | ||||
| 
 | ||||
| Device type supported: KVM_DEV_TYPE_XICS | ||||
| 
 | ||||
| Groups: | ||||
|   1. KVM_DEV_XICS_GRP_SOURCES | ||||
|   Attributes: One per interrupt source, indexed by the source number. | ||||
|        Attributes: | ||||
| 
 | ||||
|          One per interrupt source, indexed by the source number. | ||||
|   2. KVM_DEV_XICS_GRP_CTRL | ||||
|   Attributes: | ||||
|     2.1 KVM_DEV_XICS_NR_SERVERS (write only) | ||||
|        Attributes: | ||||
| 
 | ||||
|          2.1 KVM_DEV_XICS_NR_SERVERS (write only) | ||||
| 
 | ||||
|   The kvm_device_attr.addr points to a __u32 value which is the number of | ||||
|   interrupt server numbers (ie, highest possible vcpu id plus one). | ||||
| 
 | ||||
|   Errors: | ||||
|     -EINVAL: Value greater than KVM_MAX_VCPU_ID. | ||||
|     -EFAULT: Invalid user pointer for attr->addr. | ||||
|     -EBUSY:  A vcpu is already connected to the device. | ||||
| 
 | ||||
|     =======  ========================================== | ||||
|     -EINVAL  Value greater than KVM_MAX_VCPU_ID. | ||||
|     -EFAULT  Invalid user pointer for attr->addr. | ||||
|     -EBUSY   A vcpu is already connected to the device. | ||||
|     =======  ========================================== | ||||
| 
 | ||||
| This device emulates the XICS (eXternal Interrupt Controller | ||||
| Specification) defined in PAPR.  The XICS has a set of interrupt | ||||
| @ -53,24 +64,29 @@ the interrupt source number.  The 64 bit state word has the following | ||||
| bitfields, starting from the least-significant end of the word: | ||||
| 
 | ||||
| * Destination (server number), 32 bits | ||||
| 
 | ||||
|   This specifies where the interrupt should be sent, and is the | ||||
|   interrupt server number specified for the destination vcpu. | ||||
| 
 | ||||
| * Priority, 8 bits | ||||
| 
 | ||||
|   This is the priority specified for this interrupt source, where 0 is | ||||
|   the highest priority and 255 is the lowest.  An interrupt with a | ||||
|   priority of 255 will never be delivered. | ||||
| 
 | ||||
| * Level sensitive flag, 1 bit | ||||
| 
 | ||||
|   This bit is 1 for a level-sensitive interrupt source, or 0 for | ||||
|   edge-sensitive (or MSI). | ||||
| 
 | ||||
| * Masked flag, 1 bit | ||||
| 
 | ||||
|   This bit is set to 1 if the interrupt is masked (cannot be delivered | ||||
|   regardless of its priority), for example by the ibm,int-off RTAS | ||||
|   call, or 0 if it is not masked. | ||||
| 
 | ||||
| * Pending flag, 1 bit | ||||
| 
 | ||||
|   This bit is 1 if the source has a pending interrupt, otherwise 0. | ||||
| 
 | ||||
| Only one XICS instance may be created per VM. | ||||
| @ -1,8 +1,11 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| =========================================================== | ||||
| POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1) | ||||
| ========================================================== | ||||
| =========================================================== | ||||
| 
 | ||||
| Device types supported: | ||||
|   KVM_DEV_TYPE_XIVE     POWER9 XIVE Interrupt Controller generation 1 | ||||
|   - KVM_DEV_TYPE_XIVE     POWER9 XIVE Interrupt Controller generation 1 | ||||
| 
 | ||||
| This device acts as a VM interrupt controller. It provides the KVM | ||||
| interface to configure the interrupt sources of a VM in the underlying | ||||
| @ -64,72 +67,100 @@ the legacy interrupt mode, referred as XICS (POWER7/8). | ||||
| 
 | ||||
| * Groups: | ||||
| 
 | ||||
|   1. KVM_DEV_XIVE_GRP_CTRL | ||||
|   Provides global controls on the device | ||||
| 1. KVM_DEV_XIVE_GRP_CTRL | ||||
|      Provides global controls on the device | ||||
| 
 | ||||
|   Attributes: | ||||
|     1.1 KVM_DEV_XIVE_RESET (write only) | ||||
|     Resets the interrupt controller configuration for sources and event | ||||
|     queues. To be used by kexec and kdump. | ||||
| 
 | ||||
|     Errors: none | ||||
| 
 | ||||
|     1.2 KVM_DEV_XIVE_EQ_SYNC (write only) | ||||
|     Sync all the sources and queues and mark the EQ pages dirty. This | ||||
|     to make sure that a consistent memory state is captured when | ||||
|     migrating the VM. | ||||
| 
 | ||||
|     Errors: none | ||||
| 
 | ||||
|     1.3 KVM_DEV_XIVE_NR_SERVERS (write only) | ||||
|     The kvm_device_attr.addr points to a __u32 value which is the number of | ||||
|     interrupt server numbers (ie, highest possible vcpu id plus one). | ||||
|     Errors: | ||||
|       -EINVAL: Value greater than KVM_MAX_VCPU_ID. | ||||
|       -EFAULT: Invalid user pointer for attr->addr. | ||||
|       -EBUSY:  A vCPU is already connected to the device. | ||||
| 
 | ||||
|   2. KVM_DEV_XIVE_GRP_SOURCE (write only) | ||||
|   Initializes a new source in the XIVE device and mask it. | ||||
|     Errors: | ||||
| 
 | ||||
|       =======  ========================================== | ||||
|       -EINVAL  Value greater than KVM_MAX_VCPU_ID. | ||||
|       -EFAULT  Invalid user pointer for attr->addr. | ||||
|       -EBUSY   A vCPU is already connected to the device. | ||||
|       =======  ========================================== | ||||
| 
 | ||||
| 2. KVM_DEV_XIVE_GRP_SOURCE (write only) | ||||
|      Initializes a new source in the XIVE device and mask it. | ||||
| 
 | ||||
|   Attributes: | ||||
|     Interrupt source number  (64-bit) | ||||
|   The kvm_device_attr.addr points to a __u64 value: | ||||
|   bits:     | 63   ....  2 |   1   |   0 | ||||
|   values:   |    unused    | level | type | ||||
| 
 | ||||
|   The kvm_device_attr.addr points to a __u64 value:: | ||||
| 
 | ||||
|     bits:     | 63   ....  2 |   1   |   0 | ||||
|     values:   |    unused    | level | type | ||||
| 
 | ||||
|   - type:  0:MSI 1:LSI | ||||
|   - level: assertion level in case of an LSI. | ||||
|   Errors: | ||||
|     -E2BIG:  Interrupt source number is out of range | ||||
|     -ENOMEM: Could not create a new source block | ||||
|     -EFAULT: Invalid user pointer for attr->addr. | ||||
|     -ENXIO:  Could not allocate underlying HW interrupt | ||||
| 
 | ||||
|   3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only) | ||||
|   Configures source targeting | ||||
|   Errors: | ||||
| 
 | ||||
|     =======  ========================================== | ||||
|     -E2BIG   Interrupt source number is out of range | ||||
|     -ENOMEM  Could not create a new source block | ||||
|     -EFAULT  Invalid user pointer for attr->addr. | ||||
|     -ENXIO   Could not allocate underlying HW interrupt | ||||
|     =======  ========================================== | ||||
| 
 | ||||
| 3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only) | ||||
|      Configures source targeting | ||||
| 
 | ||||
|   Attributes: | ||||
|     Interrupt source number  (64-bit) | ||||
|   The kvm_device_attr.addr points to a __u64 value: | ||||
|   bits:     | 63   ....  33 |  32  | 31 .. 3 |  2 .. 0 | ||||
|   values:   |    eisn       | mask |  server | priority | ||||
| 
 | ||||
|   The kvm_device_attr.addr points to a __u64 value:: | ||||
| 
 | ||||
|     bits:     | 63   ....  33 |  32  | 31 .. 3 |  2 .. 0 | ||||
|     values:   |    eisn       | mask |  server | priority | ||||
| 
 | ||||
|   - priority: 0-7 interrupt priority level | ||||
|   - server: CPU number chosen to handle the interrupt | ||||
|   - mask: mask flag (unused) | ||||
|   - eisn: Effective Interrupt Source Number | ||||
|   Errors: | ||||
|     -ENOENT: Unknown source number | ||||
|     -EINVAL: Not initialized source number | ||||
|     -EINVAL: Invalid priority | ||||
|     -EINVAL: Invalid CPU number. | ||||
|     -EFAULT: Invalid user pointer for attr->addr. | ||||
|     -ENXIO:  CPU event queues not configured or configuration of the | ||||
|              underlying HW interrupt failed | ||||
|     -EBUSY:  No CPU available to serve interrupt | ||||
| 
 | ||||
|   4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write) | ||||
|   Configures an event queue of a CPU | ||||
|   Errors: | ||||
| 
 | ||||
|     =======  ======================================================= | ||||
|     -ENOENT  Unknown source number | ||||
|     -EINVAL  Not initialized source number | ||||
|     -EINVAL  Invalid priority | ||||
|     -EINVAL  Invalid CPU number. | ||||
|     -EFAULT  Invalid user pointer for attr->addr. | ||||
|     -ENXIO   CPU event queues not configured or configuration of the | ||||
| 	     underlying HW interrupt failed | ||||
|     -EBUSY   No CPU available to serve interrupt | ||||
|     =======  ======================================================= | ||||
| 
 | ||||
| 4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write) | ||||
|      Configures an event queue of a CPU | ||||
| 
 | ||||
|   Attributes: | ||||
|     EQ descriptor identifier (64-bit) | ||||
|   The EQ descriptor identifier is a tuple (server, priority) : | ||||
|   bits:     | 63   ....  32 | 31 .. 3 |  2 .. 0 | ||||
|   values:   |    unused     |  server | priority | ||||
|   The kvm_device_attr.addr points to : | ||||
| 
 | ||||
|   The EQ descriptor identifier is a tuple (server, priority):: | ||||
| 
 | ||||
|     bits:     | 63   ....  32 | 31 .. 3 |  2 .. 0 | ||||
|     values:   |    unused     |  server | priority | ||||
| 
 | ||||
|   The kvm_device_attr.addr points to:: | ||||
| 
 | ||||
|     struct kvm_ppc_xive_eq { | ||||
| 	__u32 flags; | ||||
| 	__u32 qshift; | ||||
| @ -138,8 +169,9 @@ the legacy interrupt mode, referred as XICS (POWER7/8). | ||||
| 	__u32 qindex; | ||||
| 	__u8  pad[40]; | ||||
|     }; | ||||
| 
 | ||||
|   - flags: queue flags | ||||
|     KVM_XIVE_EQ_ALWAYS_NOTIFY (required) | ||||
|       KVM_XIVE_EQ_ALWAYS_NOTIFY (required) | ||||
| 	forces notification without using the coalescing mechanism | ||||
| 	provided by the XIVE END ESBs. | ||||
|   - qshift: queue size (power of 2) | ||||
| @ -147,22 +179,31 @@ the legacy interrupt mode, referred as XICS (POWER7/8). | ||||
|   - qtoggle: current queue toggle bit | ||||
|   - qindex: current queue index | ||||
|   - pad: reserved for future use | ||||
|   Errors: | ||||
|     -ENOENT: Invalid CPU number | ||||
|     -EINVAL: Invalid priority | ||||
|     -EINVAL: Invalid flags | ||||
|     -EINVAL: Invalid queue size | ||||
|     -EINVAL: Invalid queue address | ||||
|     -EFAULT: Invalid user pointer for attr->addr. | ||||
|     -EIO:    Configuration of the underlying HW failed | ||||
| 
 | ||||
|   5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only) | ||||
|   Synchronize the source to flush event notifications | ||||
|   Errors: | ||||
| 
 | ||||
|     =======  ========================================= | ||||
|     -ENOENT  Invalid CPU number | ||||
|     -EINVAL  Invalid priority | ||||
|     -EINVAL  Invalid flags | ||||
|     -EINVAL  Invalid queue size | ||||
|     -EINVAL  Invalid queue address | ||||
|     -EFAULT  Invalid user pointer for attr->addr. | ||||
|     -EIO     Configuration of the underlying HW failed | ||||
|     =======  ========================================= | ||||
| 
 | ||||
| 5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only) | ||||
|      Synchronize the source to flush event notifications | ||||
| 
 | ||||
|   Attributes: | ||||
|     Interrupt source number  (64-bit) | ||||
| 
 | ||||
|   Errors: | ||||
|     -ENOENT: Unknown source number | ||||
|     -EINVAL: Not initialized source number | ||||
| 
 | ||||
|     =======  ============================= | ||||
|     -ENOENT  Unknown source number | ||||
|     -EINVAL  Not initialized source number | ||||
|     =======  ============================= | ||||
| 
 | ||||
| * VCPU state | ||||
| 
 | ||||
| @ -175,11 +216,12 @@ the legacy interrupt mode, referred as XICS (POWER7/8). | ||||
|   as it synthesizes the priorities of the pending interrupts. We | ||||
|   capture a bit more to report debug information. | ||||
| 
 | ||||
|   KVM_REG_PPC_VP_STATE (2 * 64bits) | ||||
|   bits:     |  63  ....  32  |  31  ....  0  | | ||||
|   values:   |   TIMA word0   |   TIMA word1  | | ||||
|   bits:     | 127       ..........       64  | | ||||
|   values:   |            unused              | | ||||
|   KVM_REG_PPC_VP_STATE (2 * 64bits):: | ||||
| 
 | ||||
|     bits:     |  63  ....  32  |  31  ....  0  | | ||||
|     values:   |   TIMA word0   |   TIMA word1  | | ||||
|     bits:     | 127       ..........       64  | | ||||
|     values:   |            unused              | | ||||
| 
 | ||||
| * Migration: | ||||
| 
 | ||||
| @ -196,7 +238,7 @@ the legacy interrupt mode, referred as XICS (POWER7/8). | ||||
|   3. Capture the state of the source targeting, the EQs configuration | ||||
|   and the state of thread interrupt context registers. | ||||
| 
 | ||||
|   Restore is similar : | ||||
|   Restore is similar: | ||||
| 
 | ||||
|   1. Restore the EQ configuration. As targeting depends on it. | ||||
|   2. Restore targeting | ||||
| @ -1,3 +1,6 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| =========================== | ||||
| The KVM halt polling system | ||||
| =========================== | ||||
| 
 | ||||
| @ -68,7 +71,8 @@ steady state polling interval but will only really do a good job for wakeups | ||||
| which come at an approximately constant rate, otherwise there will be constant | ||||
| adjustment of the polling interval. | ||||
| 
 | ||||
| [0] total block time: the time between when the halt polling function is | ||||
| [0] total block time: | ||||
| 		      the time between when the halt polling function is | ||||
| 		      invoked and a wakeup source received (irrespective of | ||||
| 		      whether the scheduler is invoked within that function). | ||||
| 
 | ||||
| @ -81,31 +85,32 @@ shrunk. These variables are defined in include/linux/kvm_host.h and as module | ||||
| parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the | ||||
| powerpc kvm-hv case. | ||||
| 
 | ||||
| Module Parameter	|   Description		    |	     Default Value | ||||
| -------------------------------------------------------------------------------- | ||||
| halt_poll_ns		| The global max polling    | KVM_HALT_POLL_NS_DEFAULT | ||||
| 			| interval which defines    | | ||||
| 			| the ceiling value of the  | | ||||
| 			| polling interval for      | (per arch value) | ||||
| 			| each vcpu.		    | | ||||
| -------------------------------------------------------------------------------- | ||||
| halt_poll_ns_grow	| The value by which the    | 2 | ||||
| 			| halt polling interval is  | | ||||
| 			| multiplied in the	    | | ||||
| 			| grow_halt_poll_ns()	    | | ||||
| 			| function.		    | | ||||
| -------------------------------------------------------------------------------- | ||||
| halt_poll_ns_grow_start | The initial value to grow | 10000 | ||||
| 			| to from zero in the	    | | ||||
| 			| grow_halt_poll_ns()	    | | ||||
| 			| function.		    | | ||||
| -------------------------------------------------------------------------------- | ||||
| halt_poll_ns_shrink	| The value by which the    | 0 | ||||
| 			| halt polling interval is  | | ||||
| 			| divided in the	    | | ||||
| 			| shrink_halt_poll_ns()	    | | ||||
| 			| function.		    | | ||||
| -------------------------------------------------------------------------------- | ||||
| +-----------------------+---------------------------+-------------------------+ | ||||
| |Module Parameter	|   Description		    |	     Default Value    | | ||||
| +-----------------------+---------------------------+-------------------------+ | ||||
| |halt_poll_ns		| The global max polling    | KVM_HALT_POLL_NS_DEFAULT| | ||||
| |			| interval which defines    |			      | | ||||
| |			| the ceiling value of the  |			      | | ||||
| |			| polling interval for      | (per arch value)	      | | ||||
| |			| each vcpu.		    |			      | | ||||
| +-----------------------+---------------------------+-------------------------+ | ||||
| |halt_poll_ns_grow	| The value by which the    | 2			      | | ||||
| |			| halt polling interval is  |			      | | ||||
| |			| multiplied in the	    |			      | | ||||
| |			| grow_halt_poll_ns()	    |			      | | ||||
| |			| function.		    |			      | | ||||
| +-----------------------+---------------------------+-------------------------+ | ||||
| |halt_poll_ns_grow_start| The initial value to grow | 10000		      | | ||||
| |			| to from zero in the	    |			      | | ||||
| |			| grow_halt_poll_ns()	    |			      | | ||||
| |			| function.		    |			      | | ||||
| +-----------------------+---------------------------+-------------------------+ | ||||
| |halt_poll_ns_shrink	| The value by which the    | 0			      | | ||||
| |			| halt polling interval is  |			      | | ||||
| |			| divided in the	    |			      | | ||||
| |			| shrink_halt_poll_ns()	    |			      | | ||||
| |			| function.		    |			      | | ||||
| +-----------------------+---------------------------+-------------------------+ | ||||
| 
 | ||||
| These module parameters can be set from the debugfs files in: | ||||
| 
 | ||||
| @ -117,20 +122,19 @@ Note: that these module parameters are system wide values and are not able to | ||||
| Further Notes | ||||
| ============= | ||||
| 
 | ||||
| - Care should be taken when setting the halt_poll_ns module parameter as a | ||||
| large value has the potential to drive the cpu usage to 100% on a machine which | ||||
| would be almost entirely idle otherwise. This is because even if a guest has | ||||
| wakeups during which very little work is done and which are quite far apart, if | ||||
| the period is shorter than the global max polling interval (halt_poll_ns) then | ||||
| the host will always poll for the entire block time and thus cpu utilisation | ||||
| will go to 100%. | ||||
| - Care should be taken when setting the halt_poll_ns module parameter as a large value | ||||
|   has the potential to drive the cpu usage to 100% on a machine which would be almost | ||||
|   entirely idle otherwise. This is because even if a guest has wakeups during which very | ||||
|   little work is done and which are quite far apart, if the period is shorter than the | ||||
|   global max polling interval (halt_poll_ns) then the host will always poll for the | ||||
|   entire block time and thus cpu utilisation will go to 100%. | ||||
| 
 | ||||
| - Halt polling essentially presents a trade off between power usage and latency | ||||
| and the module parameters should be used to tune the affinity for this. Idle | ||||
| cpu time is essentially converted to host kernel time with the aim of decreasing | ||||
| latency when entering the guest. | ||||
| - Halt polling essentially presents a trade off between power usage and latency and | ||||
|   the module parameters should be used to tune the affinity for this. Idle cpu time is | ||||
|   essentially converted to host kernel time with the aim of decreasing latency when | ||||
|   entering the guest. | ||||
| 
 | ||||
| - Halt polling will only be conducted by the host when no other tasks are | ||||
| runnable on that cpu, otherwise the polling will cease immediately and | ||||
| schedule will be invoked to allow that other task to run. Thus this doesn't | ||||
| allow a guest to denial of service the cpu. | ||||
| - Halt polling will only be conducted by the host when no other tasks are runnable on | ||||
|   that cpu, otherwise the polling will cease immediately and schedule will be invoked to | ||||
|   allow that other task to run. Thus this doesn't allow a guest to denial of service the | ||||
|   cpu. | ||||
| @ -1,5 +1,9 @@ | ||||
| Linux KVM Hypercall: | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| =================== | ||||
| Linux KVM Hypercall | ||||
| =================== | ||||
| 
 | ||||
| X86: | ||||
|  KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall | ||||
|  instruction. The hypervisor can replace it with instructions that are | ||||
| @ -20,7 +24,7 @@ S390: | ||||
|   For further information on the S390 diagnose call as supported by KVM, | ||||
|   refer to Documentation/virt/kvm/s390-diag.txt. | ||||
| 
 | ||||
|  PowerPC: | ||||
| PowerPC: | ||||
|   It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers. | ||||
|   Return value is placed in R3. | ||||
| 
 | ||||
| @ -34,7 +38,8 @@ MIPS: | ||||
|   the return value is placed in $2 (v0). | ||||
| 
 | ||||
| KVM Hypercalls Documentation | ||||
| =========================== | ||||
| ============================ | ||||
| 
 | ||||
| The template for each hypercall is: | ||||
| 1. Hypercall name. | ||||
| 2. Architecture(s) | ||||
| @ -43,56 +48,64 @@ The template for each hypercall is: | ||||
| 
 | ||||
| 1. KVM_HC_VAPIC_POLL_IRQ | ||||
| ------------------------ | ||||
| Architecture: x86 | ||||
| Status: active | ||||
| Purpose: Trigger guest exit so that the host can check for pending | ||||
| interrupts on reentry. | ||||
| 
 | ||||
| :Architecture: x86 | ||||
| :Status: active | ||||
| :Purpose: Trigger guest exit so that the host can check for pending | ||||
|           interrupts on reentry. | ||||
| 
 | ||||
| 2. KVM_HC_MMU_OP | ||||
| ------------------------ | ||||
| Architecture: x86 | ||||
| Status: deprecated. | ||||
| Purpose: Support MMU operations such as writing to PTE, | ||||
| flushing TLB, release PT. | ||||
| ---------------- | ||||
| 
 | ||||
| :Architecture: x86 | ||||
| :Status: deprecated. | ||||
| :Purpose: Support MMU operations such as writing to PTE, | ||||
|           flushing TLB, release PT. | ||||
| 
 | ||||
| 3. KVM_HC_FEATURES | ||||
| ------------------------ | ||||
| Architecture: PPC | ||||
| Status: active | ||||
| Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid | ||||
| used to enumerate which hypercalls are available. On PPC, either device tree | ||||
| based lookup ( which is also what EPAPR dictates) OR KVM specific enumeration | ||||
| mechanism (which is this hypercall) can be used. | ||||
| ------------------ | ||||
| 
 | ||||
| :Architecture: PPC | ||||
| :Status: active | ||||
| :Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid | ||||
|           used to enumerate which hypercalls are available. On PPC, either | ||||
| 	  device tree based lookup ( which is also what EPAPR dictates) | ||||
| 	  OR KVM specific enumeration mechanism (which is this hypercall) | ||||
| 	  can be used. | ||||
| 
 | ||||
| 4. KVM_HC_PPC_MAP_MAGIC_PAGE | ||||
| ------------------------ | ||||
| Architecture: PPC | ||||
| Status: active | ||||
| Purpose: To enable communication between the hypervisor and guest there is a | ||||
| shared page that contains parts of supervisor visible register state. | ||||
| The guest can map this shared page to access its supervisor register through | ||||
| memory using this hypercall. | ||||
| ---------------------------- | ||||
| 
 | ||||
| :Architecture: PPC | ||||
| :Status: active | ||||
| :Purpose: To enable communication between the hypervisor and guest there is a | ||||
| 	  shared page that contains parts of supervisor visible register state. | ||||
| 	  The guest can map this shared page to access its supervisor register | ||||
| 	  through memory using this hypercall. | ||||
| 
 | ||||
| 5. KVM_HC_KICK_CPU | ||||
| ------------------------ | ||||
| Architecture: x86 | ||||
| Status: active | ||||
| Purpose: Hypercall used to wakeup a vcpu from HLT state | ||||
| Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest | ||||
| kernel mode for an event to occur (ex: a spinlock to become available) can | ||||
| execute HLT instruction once it has busy-waited for more than a threshold | ||||
| time-interval. Execution of HLT instruction would cause the hypervisor to put | ||||
| the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the | ||||
| same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall, | ||||
| specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0) | ||||
| is used in the hypercall for future use. | ||||
| ------------------ | ||||
| 
 | ||||
| :Architecture: x86 | ||||
| :Status: active | ||||
| :Purpose: Hypercall used to wakeup a vcpu from HLT state | ||||
| :Usage example: | ||||
|   A vcpu of a paravirtualized guest that is busywaiting in guest | ||||
|   kernel mode for an event to occur (ex: a spinlock to become available) can | ||||
|   execute HLT instruction once it has busy-waited for more than a threshold | ||||
|   time-interval. Execution of HLT instruction would cause the hypervisor to put | ||||
|   the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the | ||||
|   same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall, | ||||
|   specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0) | ||||
|   is used in the hypercall for future use. | ||||
| 
 | ||||
| 
 | ||||
| 6. KVM_HC_CLOCK_PAIRING | ||||
| ------------------------ | ||||
| Architecture: x86 | ||||
| Status: active | ||||
| Purpose: Hypercall used to synchronize host and guest clocks. | ||||
| ----------------------- | ||||
| :Architecture: x86 | ||||
| :Status: active | ||||
| :Purpose: Hypercall used to synchronize host and guest clocks. | ||||
| 
 | ||||
| Usage: | ||||
| 
 | ||||
| a0: guest physical address where host copies | ||||
| @ -101,6 +114,8 @@ a0: guest physical address where host copies | ||||
| a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0) | ||||
| is supported (corresponding to the host's CLOCK_REALTIME clock). | ||||
| 
 | ||||
|        :: | ||||
| 
 | ||||
| 		struct kvm_clock_pairing { | ||||
| 			__s64 sec; | ||||
| 			__s64 nsec; | ||||
| @ -123,15 +138,16 @@ Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource, | ||||
| or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK. | ||||
| 
 | ||||
| 6. KVM_HC_SEND_IPI | ||||
| ------------------------ | ||||
| Architecture: x86 | ||||
| Status: active | ||||
| Purpose: Send IPIs to multiple vCPUs. | ||||
| ------------------ | ||||
| 
 | ||||
| a0: lower part of the bitmap of destination APIC IDs | ||||
| a1: higher part of the bitmap of destination APIC IDs | ||||
| a2: the lowest APIC ID in bitmap | ||||
| a3: APIC ICR | ||||
| :Architecture: x86 | ||||
| :Status: active | ||||
| :Purpose: Send IPIs to multiple vCPUs. | ||||
| 
 | ||||
| - a0: lower part of the bitmap of destination APIC IDs | ||||
| - a1: higher part of the bitmap of destination APIC IDs | ||||
| - a2: the lowest APIC ID in bitmap | ||||
| - a3: APIC ICR | ||||
| 
 | ||||
| The hypercall lets a guest send multicast IPIs, with at most 128 | ||||
| 128 destinations per hypercall in 64-bit mode and 64 vCPUs per | ||||
| @ -143,12 +159,13 @@ corresponds to the APIC ID a2+1, and so on. | ||||
| Returns the number of CPUs to which the IPIs were delivered successfully. | ||||
| 
 | ||||
| 7. KVM_HC_SCHED_YIELD | ||||
| ------------------------ | ||||
| Architecture: x86 | ||||
| Status: active | ||||
| Purpose: Hypercall used to yield if the IPI target vCPU is preempted | ||||
| --------------------- | ||||
| 
 | ||||
| :Architecture: x86 | ||||
| :Status: active | ||||
| :Purpose: Hypercall used to yield if the IPI target vCPU is preempted | ||||
| 
 | ||||
| a0: destination APIC ID | ||||
| 
 | ||||
| Usage example: When sending a call-function IPI-many to vCPUs, yield if | ||||
| any of the IPI target vCPUs was preempted. | ||||
| :Usage example: When sending a call-function IPI-many to vCPUs, yield if | ||||
| 	        any of the IPI target vCPUs was preempted. | ||||
| @ -7,6 +7,22 @@ KVM | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
| 
 | ||||
|    api | ||||
|    amd-memory-encryption | ||||
|    cpuid | ||||
|    halt-polling | ||||
|    hypercalls | ||||
|    locking | ||||
|    mmu | ||||
|    msr | ||||
|    nested-vmx | ||||
|    ppc-pv | ||||
|    s390-diag | ||||
|    timekeeping | ||||
|    vcpu-requests | ||||
| 
 | ||||
|    review-checklist | ||||
| 
 | ||||
|    arm/index | ||||
| 
 | ||||
|    devices/index | ||||
|  | ||||
							
								
								
									
										243
									
								
								Documentation/virt/kvm/locking.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										243
									
								
								Documentation/virt/kvm/locking.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,243 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ================= | ||||
| KVM Lock Overview | ||||
| ================= | ||||
| 
 | ||||
| 1. Acquisition Orders | ||||
| --------------------- | ||||
| 
 | ||||
| The acquisition orders for mutexes are as follows: | ||||
| 
 | ||||
| - kvm->lock is taken outside vcpu->mutex | ||||
| 
 | ||||
| - kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock | ||||
| 
 | ||||
| - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring | ||||
|   them together is quite rare. | ||||
| 
 | ||||
| On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. | ||||
| 
 | ||||
| Everything else is a leaf: no other lock is taken inside the critical | ||||
| sections. | ||||
| 
 | ||||
| 2. Exception | ||||
| ------------ | ||||
| 
 | ||||
| Fast page fault: | ||||
| 
 | ||||
| Fast page fault is the fast path which fixes the guest page fault out of | ||||
| the mmu-lock on x86. Currently, the page fault can be fast in one of the | ||||
| following two cases: | ||||
| 
 | ||||
| 1. Access Tracking: The SPTE is not present, but it is marked for access | ||||
|    tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to | ||||
|    restore the saved R/X bits. This is described in more detail later below. | ||||
| 
 | ||||
| 2. Write-Protection: The SPTE is present and the fault is | ||||
|    caused by write-protect. That means we just need to change the W bit of | ||||
|    the spte. | ||||
| 
 | ||||
| What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and | ||||
| SPTE_MMU_WRITEABLE bit on the spte: | ||||
| 
 | ||||
| - SPTE_HOST_WRITEABLE means the gfn is writable on host. | ||||
| - SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when | ||||
|   the gfn is writable on guest mmu and it is not write-protected by shadow | ||||
|   page write-protection. | ||||
| 
 | ||||
| On fast page fault path, we will use cmpxchg to atomically set the spte W | ||||
| bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or | ||||
| restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This | ||||
| is safe because whenever changing these bits can be detected by cmpxchg. | ||||
| 
 | ||||
| But we need carefully check these cases: | ||||
| 
 | ||||
| 1) The mapping from gfn to pfn | ||||
| 
 | ||||
| The mapping from gfn to pfn may be changed since we can only ensure the pfn | ||||
| is not changed during cmpxchg. This is a ABA problem, for example, below case | ||||
| will happen: | ||||
| 
 | ||||
| +------------------------------------------------------------------------+ | ||||
| | At the beginning::                                                     | | ||||
| |                                                                        | | ||||
| |	gpte = gfn1                                                      | | ||||
| |	gfn1 is mapped to pfn1 on host                                   | | ||||
| |	spte is the shadow page table entry corresponding with gpte and  | | ||||
| |	spte = pfn1                                                      | | ||||
| +------------------------------------------------------------------------+ | ||||
| | On fast page fault path:                                               | | ||||
| +------------------------------------+-----------------------------------+ | ||||
| | CPU 0:                             | CPU 1:                            | | ||||
| +------------------------------------+-----------------------------------+ | ||||
| | ::                                 |                                   | | ||||
| |                                    |                                   | | ||||
| |   old_spte = *spte;                |                                   | | ||||
| +------------------------------------+-----------------------------------+ | ||||
| |                                    | pfn1 is swapped out::             | | ||||
| |                                    |                                   | | ||||
| |                                    |    spte = 0;                      | | ||||
| |                                    |                                   | | ||||
| |                                    | pfn1 is re-alloced for gfn2.      | | ||||
| |                                    |                                   | | ||||
| |                                    | gpte is changed to point to       | | ||||
| |                                    | gfn2 by the guest::               | | ||||
| |                                    |                                   | | ||||
| |                                    |    spte = pfn1;                   | | ||||
| +------------------------------------+-----------------------------------+ | ||||
| | ::                                                                     | | ||||
| |                                                                        | | ||||
| |   if (cmpxchg(spte, old_spte, old_spte+W)                              | | ||||
| |	mark_page_dirty(vcpu->kvm, gfn1)                                 | | ||||
| |            OOPS!!!                                                     | | ||||
| +------------------------------------------------------------------------+ | ||||
| 
 | ||||
| We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap. | ||||
| 
 | ||||
| For direct sp, we can easily avoid it since the spte of direct sp is fixed | ||||
| to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic() | ||||
| to pin gfn to pfn, because after gfn_to_pfn_atomic(): | ||||
| 
 | ||||
| - We have held the refcount of pfn that means the pfn can not be freed and | ||||
|   be reused for another gfn. | ||||
| - The pfn is writable that means it can not be shared between different gfns | ||||
|   by KSM. | ||||
| 
 | ||||
| Then, we can ensure the dirty bitmaps is correctly set for a gfn. | ||||
| 
 | ||||
| Currently, to simplify the whole things, we disable fast page fault for | ||||
| indirect shadow page. | ||||
| 
 | ||||
| 2) Dirty bit tracking | ||||
| 
 | ||||
| In the origin code, the spte can be fast updated (non-atomically) if the | ||||
| spte is read-only and the Accessed bit has already been set since the | ||||
| Accessed bit and Dirty bit can not be lost. | ||||
| 
 | ||||
| But it is not true after fast page fault since the spte can be marked | ||||
| writable between reading spte and updating spte. Like below case: | ||||
| 
 | ||||
| +------------------------------------------------------------------------+ | ||||
| | At the beginning::                                                     | | ||||
| |                                                                        | | ||||
| |	spte.W = 0                                                       | | ||||
| |	spte.Accessed = 1                                                | | ||||
| +------------------------------------+-----------------------------------+ | ||||
| | CPU 0:                             | CPU 1:                            | | ||||
| +------------------------------------+-----------------------------------+ | ||||
| | In mmu_spte_clear_track_bits()::   |                                   | | ||||
| |                                    |                                   | | ||||
| |  old_spte = *spte;                 |                                   | | ||||
| |                                    |                                   | | ||||
| |                                    |                                   | | ||||
| |  /* 'if' condition is satisfied. */|                                   | | ||||
| |  if (old_spte.Accessed == 1 &&     |                                   | | ||||
| |       old_spte.W == 0)             |                                   | | ||||
| |     spte = 0ull;                   |                                   | | ||||
| +------------------------------------+-----------------------------------+ | ||||
| |                                    | on fast page fault path::         | | ||||
| |                                    |                                   | | ||||
| |                                    |    spte.W = 1                     | | ||||
| |                                    |                                   | | ||||
| |                                    | memory write on the spte::        | | ||||
| |                                    |                                   | | ||||
| |                                    |    spte.Dirty = 1                 | | ||||
| +------------------------------------+-----------------------------------+ | ||||
| |  ::                                |                                   | | ||||
| |                                    |                                   | | ||||
| |   else                             |                                   | | ||||
| |     old_spte = xchg(spte, 0ull)    |                                   | | ||||
| |   if (old_spte.Accessed == 1)      |                                   | | ||||
| |     kvm_set_pfn_accessed(spte.pfn);|                                   | | ||||
| |   if (old_spte.Dirty == 1)         |                                   | | ||||
| |     kvm_set_pfn_dirty(spte.pfn);   |                                   | | ||||
| |     OOPS!!!                        |                                   | | ||||
| +------------------------------------+-----------------------------------+ | ||||
| 
 | ||||
| The Dirty bit is lost in this case. | ||||
| 
 | ||||
| In order to avoid this kind of issue, we always treat the spte as "volatile" | ||||
| if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means, | ||||
| the spte is always atomically updated in this case. | ||||
| 
 | ||||
| 3) flush tlbs due to spte updated | ||||
| 
 | ||||
| If the spte is updated from writable to readonly, we should flush all TLBs, | ||||
| otherwise rmap_write_protect will find a read-only spte, even though the | ||||
| writable spte might be cached on a CPU's TLB. | ||||
| 
 | ||||
| As mentioned before, the spte can be updated to writable out of mmu-lock on | ||||
| fast page fault path, in order to easily audit the path, we see if TLBs need | ||||
| be flushed caused by this reason in mmu_spte_update() since this is a common | ||||
| function to update spte (present -> present). | ||||
| 
 | ||||
| Since the spte is "volatile" if it can be updated out of mmu-lock, we always | ||||
| atomically update the spte, the race caused by fast page fault can be avoided, | ||||
| See the comments in spte_has_volatile_bits() and mmu_spte_update(). | ||||
| 
 | ||||
| Lockless Access Tracking: | ||||
| 
 | ||||
| This is used for Intel CPUs that are using EPT but do not support the EPT A/D | ||||
| bits. In this case, when the KVM MMU notifier is called to track accesses to a | ||||
| page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present | ||||
| by clearing the RWX bits in the PTE and storing the original R & X bits in | ||||
| some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the | ||||
| PTE (using the ignored bit 62). When the VM tries to access the page later on, | ||||
| a fault is generated and the fast page fault mechanism described above is used | ||||
| to atomically restore the PTE to a Present state. The W bit is not saved when | ||||
| the PTE is marked for access tracking and during restoration to the Present | ||||
| state, the W bit is set depending on whether or not it was a write access. If | ||||
| it wasn't, then the W bit will remain clear until a write access happens, at | ||||
| which time it will be set using the Dirty tracking mechanism described above. | ||||
| 
 | ||||
| 3. Reference | ||||
| ------------ | ||||
| 
 | ||||
| :Name:		kvm_lock | ||||
| :Type:		mutex | ||||
| :Arch:		any | ||||
| :Protects:	- vm_list | ||||
| 
 | ||||
| :Name:		kvm_count_lock | ||||
| :Type:		raw_spinlock_t | ||||
| :Arch:		any | ||||
| :Protects:	- hardware virtualization enable/disable | ||||
| :Comment:	'raw' because hardware enabling/disabling must be atomic /wrt | ||||
| 		migration. | ||||
| 
 | ||||
| :Name:		kvm_arch::tsc_write_lock | ||||
| :Type:		raw_spinlock | ||||
| :Arch:		x86 | ||||
| :Protects:	- kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset} | ||||
| 		- tsc offset in vmcb | ||||
| :Comment:	'raw' because updating the tsc offsets must not be preempted. | ||||
| 
 | ||||
| :Name:		kvm->mmu_lock | ||||
| :Type:		spinlock_t | ||||
| :Arch:		any | ||||
| :Protects:	-shadow page/shadow tlb entry | ||||
| :Comment:	it is a spinlock since it is used in mmu notifier. | ||||
| 
 | ||||
| :Name:		kvm->srcu | ||||
| :Type:		srcu lock | ||||
| :Arch:		any | ||||
| :Protects:	- kvm->memslots | ||||
| 		- kvm->buses | ||||
| :Comment:	The srcu read lock must be held while accessing memslots (e.g. | ||||
| 		when using gfn_to_* functions) and while accessing in-kernel | ||||
| 		MMIO/PIO address->device structure mapping (kvm->buses). | ||||
| 		The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu | ||||
| 		if it is needed by multiple functions. | ||||
| 
 | ||||
| :Name:		blocked_vcpu_on_cpu_lock | ||||
| :Type:		spinlock_t | ||||
| :Arch:		x86 | ||||
| :Protects:	blocked_vcpu_on_cpu | ||||
| :Comment:	This is a per-CPU lock and it is used for VT-d posted-interrupts. | ||||
| 		When VT-d posted-interrupts is supported and the VM has assigned | ||||
| 		devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu | ||||
| 		protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues | ||||
| 		wakeup notification event since external interrupts from the | ||||
| 		assigned devices happens, we will find the vCPU on the list to | ||||
| 		wakeup. | ||||
| @ -1,215 +0,0 @@ | ||||
| KVM Lock Overview | ||||
| ================= | ||||
| 
 | ||||
| 1. Acquisition Orders | ||||
| --------------------- | ||||
| 
 | ||||
| The acquisition orders for mutexes are as follows: | ||||
| 
 | ||||
| - kvm->lock is taken outside vcpu->mutex | ||||
| 
 | ||||
| - kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock | ||||
| 
 | ||||
| - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring | ||||
|   them together is quite rare. | ||||
| 
 | ||||
| On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. | ||||
| 
 | ||||
| Everything else is a leaf: no other lock is taken inside the critical | ||||
| sections. | ||||
| 
 | ||||
| 2: Exception | ||||
| ------------ | ||||
| 
 | ||||
| Fast page fault: | ||||
| 
 | ||||
| Fast page fault is the fast path which fixes the guest page fault out of | ||||
| the mmu-lock on x86. Currently, the page fault can be fast in one of the | ||||
| following two cases: | ||||
| 
 | ||||
| 1. Access Tracking: The SPTE is not present, but it is marked for access | ||||
| tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to | ||||
| restore the saved R/X bits. This is described in more detail later below. | ||||
| 
 | ||||
| 2. Write-Protection: The SPTE is present and the fault is | ||||
| caused by write-protect. That means we just need to change the W bit of the  | ||||
| spte. | ||||
| 
 | ||||
| What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and | ||||
| SPTE_MMU_WRITEABLE bit on the spte: | ||||
| - SPTE_HOST_WRITEABLE means the gfn is writable on host. | ||||
| - SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when | ||||
|   the gfn is writable on guest mmu and it is not write-protected by shadow | ||||
|   page write-protection. | ||||
| 
 | ||||
| On fast page fault path, we will use cmpxchg to atomically set the spte W | ||||
| bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or  | ||||
| restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This | ||||
| is safe because whenever changing these bits can be detected by cmpxchg. | ||||
| 
 | ||||
| But we need carefully check these cases: | ||||
| 1): The mapping from gfn to pfn | ||||
| The mapping from gfn to pfn may be changed since we can only ensure the pfn | ||||
| is not changed during cmpxchg. This is a ABA problem, for example, below case | ||||
| will happen: | ||||
| 
 | ||||
| At the beginning: | ||||
| gpte = gfn1 | ||||
| gfn1 is mapped to pfn1 on host | ||||
| spte is the shadow page table entry corresponding with gpte and | ||||
| spte = pfn1 | ||||
| 
 | ||||
|    VCPU 0                           VCPU0 | ||||
| on fast page fault path: | ||||
| 
 | ||||
|    old_spte = *spte; | ||||
|                                  pfn1 is swapped out: | ||||
|                                     spte = 0; | ||||
| 
 | ||||
|                                  pfn1 is re-alloced for gfn2. | ||||
| 
 | ||||
|                                  gpte is changed to point to | ||||
|                                  gfn2 by the guest: | ||||
|                                     spte = pfn1; | ||||
| 
 | ||||
|    if (cmpxchg(spte, old_spte, old_spte+W) | ||||
| 	mark_page_dirty(vcpu->kvm, gfn1) | ||||
|              OOPS!!! | ||||
| 
 | ||||
| We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap. | ||||
| 
 | ||||
| For direct sp, we can easily avoid it since the spte of direct sp is fixed | ||||
| to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic() | ||||
| to pin gfn to pfn, because after gfn_to_pfn_atomic(): | ||||
| - We have held the refcount of pfn that means the pfn can not be freed and | ||||
|   be reused for another gfn. | ||||
| - The pfn is writable that means it can not be shared between different gfns | ||||
|   by KSM. | ||||
| 
 | ||||
| Then, we can ensure the dirty bitmaps is correctly set for a gfn. | ||||
| 
 | ||||
| Currently, to simplify the whole things, we disable fast page fault for | ||||
| indirect shadow page. | ||||
| 
 | ||||
| 2): Dirty bit tracking | ||||
| In the origin code, the spte can be fast updated (non-atomically) if the | ||||
| spte is read-only and the Accessed bit has already been set since the | ||||
| Accessed bit and Dirty bit can not be lost. | ||||
| 
 | ||||
| But it is not true after fast page fault since the spte can be marked | ||||
| writable between reading spte and updating spte. Like below case: | ||||
| 
 | ||||
| At the beginning: | ||||
| spte.W = 0 | ||||
| spte.Accessed = 1 | ||||
| 
 | ||||
|    VCPU 0                                       VCPU0 | ||||
| In mmu_spte_clear_track_bits(): | ||||
| 
 | ||||
|    old_spte = *spte; | ||||
| 
 | ||||
|    /* 'if' condition is satisfied. */ | ||||
|    if (old_spte.Accessed == 1 && | ||||
|         old_spte.W == 0) | ||||
|       spte = 0ull; | ||||
|                                          on fast page fault path: | ||||
|                                              spte.W = 1 | ||||
|                                          memory write on the spte: | ||||
|                                              spte.Dirty = 1 | ||||
| 
 | ||||
| 
 | ||||
|    else | ||||
|       old_spte = xchg(spte, 0ull) | ||||
| 
 | ||||
| 
 | ||||
|    if (old_spte.Accessed == 1) | ||||
|       kvm_set_pfn_accessed(spte.pfn); | ||||
|    if (old_spte.Dirty == 1) | ||||
|       kvm_set_pfn_dirty(spte.pfn); | ||||
|       OOPS!!! | ||||
| 
 | ||||
| The Dirty bit is lost in this case. | ||||
| 
 | ||||
| In order to avoid this kind of issue, we always treat the spte as "volatile" | ||||
| if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means, | ||||
| the spte is always atomically updated in this case. | ||||
| 
 | ||||
| 3): flush tlbs due to spte updated | ||||
| If the spte is updated from writable to readonly, we should flush all TLBs, | ||||
| otherwise rmap_write_protect will find a read-only spte, even though the | ||||
| writable spte might be cached on a CPU's TLB. | ||||
| 
 | ||||
| As mentioned before, the spte can be updated to writable out of mmu-lock on | ||||
| fast page fault path, in order to easily audit the path, we see if TLBs need | ||||
| be flushed caused by this reason in mmu_spte_update() since this is a common | ||||
| function to update spte (present -> present). | ||||
| 
 | ||||
| Since the spte is "volatile" if it can be updated out of mmu-lock, we always | ||||
| atomically update the spte, the race caused by fast page fault can be avoided, | ||||
| See the comments in spte_has_volatile_bits() and mmu_spte_update(). | ||||
| 
 | ||||
| Lockless Access Tracking: | ||||
| 
 | ||||
| This is used for Intel CPUs that are using EPT but do not support the EPT A/D | ||||
| bits. In this case, when the KVM MMU notifier is called to track accesses to a | ||||
| page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present | ||||
| by clearing the RWX bits in the PTE and storing the original R & X bits in | ||||
| some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the | ||||
| PTE (using the ignored bit 62). When the VM tries to access the page later on, | ||||
| a fault is generated and the fast page fault mechanism described above is used | ||||
| to atomically restore the PTE to a Present state. The W bit is not saved when | ||||
| the PTE is marked for access tracking and during restoration to the Present | ||||
| state, the W bit is set depending on whether or not it was a write access. If | ||||
| it wasn't, then the W bit will remain clear until a write access happens, at  | ||||
| which time it will be set using the Dirty tracking mechanism described above. | ||||
| 
 | ||||
| 3. Reference | ||||
| ------------ | ||||
| 
 | ||||
| Name:		kvm_lock | ||||
| Type:		mutex | ||||
| Arch:		any | ||||
| Protects:	- vm_list | ||||
| 
 | ||||
| Name:		kvm_count_lock | ||||
| Type:		raw_spinlock_t | ||||
| Arch:		any | ||||
| Protects:	- hardware virtualization enable/disable | ||||
| Comment:	'raw' because hardware enabling/disabling must be atomic /wrt | ||||
| 		migration. | ||||
| 
 | ||||
| Name:		kvm_arch::tsc_write_lock | ||||
| Type:		raw_spinlock | ||||
| Arch:		x86 | ||||
| Protects:	- kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset} | ||||
| 		- tsc offset in vmcb | ||||
| Comment:	'raw' because updating the tsc offsets must not be preempted. | ||||
| 
 | ||||
| Name:		kvm->mmu_lock | ||||
| Type:		spinlock_t | ||||
| Arch:		any | ||||
| Protects:	-shadow page/shadow tlb entry | ||||
| Comment:	it is a spinlock since it is used in mmu notifier. | ||||
| 
 | ||||
| Name:		kvm->srcu | ||||
| Type:		srcu lock | ||||
| Arch:		any | ||||
| Protects:	- kvm->memslots | ||||
| 		- kvm->buses | ||||
| Comment:	The srcu read lock must be held while accessing memslots (e.g. | ||||
| 		when using gfn_to_* functions) and while accessing in-kernel | ||||
| 		MMIO/PIO address->device structure mapping (kvm->buses). | ||||
| 		The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu | ||||
| 		if it is needed by multiple functions. | ||||
| 
 | ||||
| Name:		blocked_vcpu_on_cpu_lock | ||||
| Type:		spinlock_t | ||||
| Arch:		x86 | ||||
| Protects:	blocked_vcpu_on_cpu | ||||
| Comment:	This is a per-CPU lock and it is used for VT-d posted-interrupts. | ||||
| 		When VT-d posted-interrupts is supported and the VM has assigned | ||||
| 		devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu | ||||
| 		protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues | ||||
| 		wakeup notification event since external interrupts from the | ||||
| 		assigned devices happens, we will find the vCPU on the list to | ||||
| 		wakeup. | ||||
| @ -1,3 +1,6 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ====================== | ||||
| The x86 kvm shadow mmu | ||||
| ====================== | ||||
| 
 | ||||
| @ -7,27 +10,37 @@ physical addresses to host physical addresses. | ||||
| 
 | ||||
| The mmu code attempts to satisfy the following requirements: | ||||
| 
 | ||||
| - correctness: the guest should not be able to determine that it is running | ||||
| - correctness: | ||||
| 	       the guest should not be able to determine that it is running | ||||
|                on an emulated mmu except for timing (we attempt to comply | ||||
|                with the specification, not emulate the characteristics of | ||||
|                a particular implementation such as tlb size) | ||||
| - security:    the guest must not be able to touch host memory not assigned | ||||
| - security: | ||||
| 	       the guest must not be able to touch host memory not assigned | ||||
|                to it | ||||
| - performance: minimize the performance penalty imposed by the mmu | ||||
| - scaling:     need to scale to large memory and large vcpu guests | ||||
| - hardware:    support the full range of x86 virtualization hardware | ||||
| - integration: Linux memory management code must be in control of guest memory | ||||
| - performance: | ||||
|                minimize the performance penalty imposed by the mmu | ||||
| - scaling: | ||||
|                need to scale to large memory and large vcpu guests | ||||
| - hardware: | ||||
|                support the full range of x86 virtualization hardware | ||||
| - integration: | ||||
|                Linux memory management code must be in control of guest memory | ||||
|                so that swapping, page migration, page merging, transparent | ||||
|                hugepages, and similar features work without change | ||||
| - dirty tracking: report writes to guest memory to enable live migration | ||||
| - dirty tracking: | ||||
|                report writes to guest memory to enable live migration | ||||
|                and framebuffer-based displays | ||||
| - footprint:   keep the amount of pinned kernel memory low (most memory | ||||
| - footprint: | ||||
|                keep the amount of pinned kernel memory low (most memory | ||||
|                should be shrinkable) | ||||
| - reliability:  avoid multipage or GFP_ATOMIC allocations | ||||
| - reliability: | ||||
|                avoid multipage or GFP_ATOMIC allocations | ||||
| 
 | ||||
| Acronyms | ||||
| ======== | ||||
| 
 | ||||
| ====  ==================================================================== | ||||
| pfn   host page frame number | ||||
| hpa   host physical address | ||||
| hva   host virtual address | ||||
| @ -41,6 +54,7 @@ pte   page table entry (used also to refer generically to paging structure | ||||
| gpte  guest pte (referring to gfns) | ||||
| spte  shadow pte (referring to pfns) | ||||
| tdp   two dimensional paging (vendor neutral term for NPT and EPT) | ||||
| ====  ==================================================================== | ||||
| 
 | ||||
| Virtual and real hardware supported | ||||
| =================================== | ||||
| @ -90,11 +104,13 @@ Events | ||||
| The mmu is driven by events, some from the guest, some from the host. | ||||
| 
 | ||||
| Guest generated events: | ||||
| 
 | ||||
| - writes to control registers (especially cr3) | ||||
| - invlpg/invlpga instruction execution | ||||
| - access to missing or protected translations | ||||
| 
 | ||||
| Host generated events: | ||||
| 
 | ||||
| - changes in the gpa->hpa translation (either through gpa->hva changes or | ||||
|   through hva->hpa changes) | ||||
| - memory pressure (the shrinker) | ||||
| @ -117,16 +133,19 @@ Leaf ptes point at guest pages. | ||||
| The following table shows translations encoded by leaf ptes, with higher-level | ||||
| translations in parentheses: | ||||
| 
 | ||||
|  Non-nested guests: | ||||
|  Non-nested guests:: | ||||
| 
 | ||||
|   nonpaging:     gpa->hpa | ||||
|   paging:        gva->gpa->hpa | ||||
|   paging, tdp:   (gva->)gpa->hpa | ||||
|  Nested guests: | ||||
| 
 | ||||
|  Nested guests:: | ||||
| 
 | ||||
|   non-tdp:       ngva->gpa->hpa  (*) | ||||
|   tdp:           (ngva->)ngpa->gpa->hpa | ||||
| 
 | ||||
| (*) the guest hypervisor will encode the ngva->gpa translation into its page | ||||
|     tables if npt is not present | ||||
|   (*) the guest hypervisor will encode the ngva->gpa translation into its page | ||||
|       tables if npt is not present | ||||
| 
 | ||||
| Shadow pages contain the following information: | ||||
|   role.level: | ||||
| @ -291,28 +310,41 @@ Handling a page fault is performed as follows: | ||||
| 
 | ||||
|  - if the RSV bit of the error code is set, the page fault is caused by guest | ||||
|    accessing MMIO and cached MMIO information is available. | ||||
| 
 | ||||
|    - walk shadow page table | ||||
|    - check for valid generation number in the spte (see "Fast invalidation of | ||||
|      MMIO sptes" below) | ||||
|    - cache the information to vcpu->arch.mmio_gva, vcpu->arch.mmio_access and | ||||
|      vcpu->arch.mmio_gfn, and call the emulator | ||||
| 
 | ||||
|  - If both P bit and R/W bit of error code are set, this could possibly | ||||
|    be handled as a "fast page fault" (fixed without taking the MMU lock).  See | ||||
|    the description in Documentation/virt/kvm/locking.txt. | ||||
| 
 | ||||
|  - if needed, walk the guest page tables to determine the guest translation | ||||
|    (gva->gpa or ngpa->gpa) | ||||
| 
 | ||||
|    - if permissions are insufficient, reflect the fault back to the guest | ||||
| 
 | ||||
|  - determine the host page | ||||
| 
 | ||||
|    - if this is an mmio request, there is no host page; cache the info to | ||||
|      vcpu->arch.mmio_gva, vcpu->arch.mmio_access and vcpu->arch.mmio_gfn | ||||
| 
 | ||||
|  - walk the shadow page table to find the spte for the translation, | ||||
|    instantiating missing intermediate page tables as necessary | ||||
| 
 | ||||
|    - If this is an mmio request, cache the mmio info to the spte and set some | ||||
|      reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask) | ||||
| 
 | ||||
|  - try to unsynchronize the page | ||||
| 
 | ||||
|    - if successful, we can let the guest continue and modify the gpte | ||||
| 
 | ||||
|  - emulate the instruction | ||||
| 
 | ||||
|    - if failed, unshadow the page and let the guest continue | ||||
| 
 | ||||
|  - update any translations that were modified by the instruction | ||||
| 
 | ||||
| invlpg handling: | ||||
| @ -324,10 +356,12 @@ invlpg handling: | ||||
| Guest control register updates: | ||||
| 
 | ||||
| - mov to cr3 | ||||
| 
 | ||||
|   - look up new shadow roots | ||||
|   - synchronize newly reachable shadow pages | ||||
| 
 | ||||
| - mov to cr0/cr4/efer | ||||
| 
 | ||||
|   - set up mmu context for new paging mode | ||||
|   - look up new shadow roots | ||||
|   - synchronize newly reachable shadow pages | ||||
| @ -358,6 +392,7 @@ on fault type: | ||||
| (user write faults generate a #PF) | ||||
| 
 | ||||
| In the first case there are two additional complications: | ||||
| 
 | ||||
| - if CR4.SMEP is enabled: since we've turned the page into a kernel page, | ||||
|   the kernel may now execute it.  We handle this by also setting spte.nx. | ||||
|   If we get a user fetch or read fault, we'll change spte.u=1 and | ||||
| @ -446,4 +481,3 @@ Further reading | ||||
| 
 | ||||
| - NPT presentation from KVM Forum 2008 | ||||
|   http://www.linux-kvm.org/images/c/c8/KvmForum2008%24kdf2008_21.pdf | ||||
| 
 | ||||
| @ -1,6 +1,10 @@ | ||||
| KVM-specific MSRs. | ||||
| Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010 | ||||
| ===================================================== | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ================= | ||||
| KVM-specific MSRs | ||||
| ================= | ||||
| 
 | ||||
| :Author: Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010 | ||||
| 
 | ||||
| KVM makes use of some custom MSRs to service some requests. | ||||
| 
 | ||||
| @ -9,34 +13,39 @@ Custom MSRs have a range reserved for them, that goes from | ||||
| but they are deprecated and their use is discouraged. | ||||
| 
 | ||||
| Custom MSR list | ||||
| -------- | ||||
| --------------- | ||||
| 
 | ||||
| The current supported Custom MSR list is: | ||||
| 
 | ||||
| MSR_KVM_WALL_CLOCK_NEW:   0x4b564d00 | ||||
| MSR_KVM_WALL_CLOCK_NEW: | ||||
| 	0x4b564d00 | ||||
| 
 | ||||
| 	data: 4-byte alignment physical address of a memory area which must be | ||||
| data: | ||||
| 	4-byte alignment physical address of a memory area which must be | ||||
| 	in guest RAM. This memory is expected to hold a copy of the following | ||||
| 	structure: | ||||
| 	structure:: | ||||
| 
 | ||||
| 	struct pvclock_wall_clock { | ||||
| 	 struct pvclock_wall_clock { | ||||
| 		u32   version; | ||||
| 		u32   sec; | ||||
| 		u32   nsec; | ||||
| 	} __attribute__((__packed__)); | ||||
| 	  } __attribute__((__packed__)); | ||||
| 
 | ||||
| 	whose data will be filled in by the hypervisor. The hypervisor is only | ||||
| 	guaranteed to update this data at the moment of MSR write. | ||||
| 	Users that want to reliably query this information more than once have | ||||
| 	to write more than once to this MSR. Fields have the following meanings: | ||||
| 
 | ||||
| 		version: guest has to check version before and after grabbing | ||||
| 	version: | ||||
| 		guest has to check version before and after grabbing | ||||
| 		time information and check that they are both equal and even. | ||||
| 		An odd version indicates an in-progress update. | ||||
| 
 | ||||
| 		sec: number of seconds for wallclock at time of boot. | ||||
| 	sec: | ||||
| 		 number of seconds for wallclock at time of boot. | ||||
| 
 | ||||
| 		nsec: number of nanoseconds for wallclock at time of boot. | ||||
| 	nsec: | ||||
| 		 number of nanoseconds for wallclock at time of boot. | ||||
| 
 | ||||
| 	In order to get the current wallclock time, the system_time from | ||||
| 	MSR_KVM_SYSTEM_TIME_NEW needs to be added. | ||||
| @ -47,13 +56,15 @@ MSR_KVM_WALL_CLOCK_NEW:   0x4b564d00 | ||||
| 	Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid | ||||
| 	leaf prior to usage. | ||||
| 
 | ||||
| MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01 | ||||
| MSR_KVM_SYSTEM_TIME_NEW: | ||||
| 	0x4b564d01 | ||||
| 
 | ||||
| 	data: 4-byte aligned physical address of a memory area which must be in | ||||
| data: | ||||
| 	4-byte aligned physical address of a memory area which must be in | ||||
| 	guest RAM, plus an enable bit in bit 0. This memory is expected to hold | ||||
| 	a copy of the following structure: | ||||
| 	a copy of the following structure:: | ||||
| 
 | ||||
| 	struct pvclock_vcpu_time_info { | ||||
| 	  struct pvclock_vcpu_time_info { | ||||
| 		u32   version; | ||||
| 		u32   pad0; | ||||
| 		u64   tsc_timestamp; | ||||
| @ -62,7 +73,7 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01 | ||||
| 		s8    tsc_shift; | ||||
| 		u8    flags; | ||||
| 		u8    pad[2]; | ||||
| 	} __attribute__((__packed__)); /* 32 bytes */ | ||||
| 	  } __attribute__((__packed__)); /* 32 bytes */ | ||||
| 
 | ||||
| 	whose data will be filled in by the hypervisor periodically. Only one | ||||
| 	write, or registration, is needed for each VCPU. The interval between | ||||
| @ -72,23 +83,28 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01 | ||||
| 
 | ||||
| 	Fields have the following meanings: | ||||
| 
 | ||||
| 		version: guest has to check version before and after grabbing | ||||
| 	version: | ||||
| 		guest has to check version before and after grabbing | ||||
| 		time information and check that they are both equal and even. | ||||
| 		An odd version indicates an in-progress update. | ||||
| 
 | ||||
| 		tsc_timestamp: the tsc value at the current VCPU at the time | ||||
| 	tsc_timestamp: | ||||
| 		the tsc value at the current VCPU at the time | ||||
| 		of the update of this structure. Guests can subtract this value | ||||
| 		from current tsc to derive a notion of elapsed time since the | ||||
| 		structure update. | ||||
| 
 | ||||
| 		system_time: a host notion of monotonic time, including sleep | ||||
| 	system_time: | ||||
| 		a host notion of monotonic time, including sleep | ||||
| 		time at the time this structure was last updated. Unit is | ||||
| 		nanoseconds. | ||||
| 
 | ||||
| 		tsc_to_system_mul: multiplier to be used when converting | ||||
| 	tsc_to_system_mul: | ||||
| 		multiplier to be used when converting | ||||
| 		tsc-related quantity to nanoseconds | ||||
| 
 | ||||
| 		tsc_shift: shift to be used when converting tsc-related | ||||
| 	tsc_shift: | ||||
| 		shift to be used when converting tsc-related | ||||
| 		quantity to nanoseconds. This shift will ensure that | ||||
| 		multiplication with tsc_to_system_mul does not overflow. | ||||
| 		A positive value denotes a left shift, a negative value | ||||
| @ -96,7 +112,7 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01 | ||||
| 
 | ||||
| 		The conversion from tsc to nanoseconds involves an additional | ||||
| 		right shift by 32 bits. With this information, guests can | ||||
| 		derive per-CPU time by doing: | ||||
| 		derive per-CPU time by doing:: | ||||
| 
 | ||||
| 			time = (current_tsc - tsc_timestamp) | ||||
| 			if (tsc_shift >= 0) | ||||
| @ -106,29 +122,34 @@ MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01 | ||||
| 			time = (time * tsc_to_system_mul) >> 32 | ||||
| 			time = time + system_time | ||||
| 
 | ||||
| 		flags: bits in this field indicate extended capabilities | ||||
| 	flags: | ||||
| 		bits in this field indicate extended capabilities | ||||
| 		coordinated between the guest and the hypervisor. Availability | ||||
| 		of specific flags has to be checked in 0x40000001 cpuid leaf. | ||||
| 		Current flags are: | ||||
| 
 | ||||
| 		 flag bit   | cpuid bit    | meaning | ||||
| 		------------------------------------------------------------- | ||||
| 			    |	           | time measures taken across | ||||
| 		     0      |	   24      | multiple cpus are guaranteed to | ||||
| 			    |		   | be monotonic | ||||
| 		------------------------------------------------------------- | ||||
| 			    |		   | guest vcpu has been paused by | ||||
| 		     1	    |	  N/A	   | the host | ||||
| 			    |		   | See 4.70 in api.txt | ||||
| 		------------------------------------------------------------- | ||||
| 
 | ||||
| 		+-----------+--------------+----------------------------------+ | ||||
| 		| flag bit  | cpuid bit    | meaning			      | | ||||
| 		+-----------+--------------+----------------------------------+ | ||||
| 		|	    |		   | time measures taken across       | | ||||
| 		|    0      |	   24      | multiple cpus are guaranteed to  | | ||||
| 		|	    |		   | be monotonic		      | | ||||
| 		+-----------+--------------+----------------------------------+ | ||||
| 		|	    |		   | guest vcpu has been paused by    | | ||||
| 		|    1	    |	  N/A	   | the host			      | | ||||
| 		|	    |		   | See 4.70 in api.txt	      | | ||||
| 		+-----------+--------------+----------------------------------+ | ||||
| 
 | ||||
| 	Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid | ||||
| 	leaf prior to usage. | ||||
| 
 | ||||
| 
 | ||||
| MSR_KVM_WALL_CLOCK:  0x11 | ||||
| MSR_KVM_WALL_CLOCK: | ||||
| 	0x11 | ||||
| 
 | ||||
| 	data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead. | ||||
| data and functioning: | ||||
| 	same as MSR_KVM_WALL_CLOCK_NEW. Use that instead. | ||||
| 
 | ||||
| 	This MSR falls outside the reserved KVM range and may be removed in the | ||||
| 	future. Its usage is deprecated. | ||||
| @ -136,9 +157,11 @@ MSR_KVM_WALL_CLOCK:  0x11 | ||||
| 	Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid | ||||
| 	leaf prior to usage. | ||||
| 
 | ||||
| MSR_KVM_SYSTEM_TIME: 0x12 | ||||
| MSR_KVM_SYSTEM_TIME: | ||||
| 	0x12 | ||||
| 
 | ||||
| 	data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead. | ||||
| data and functioning: | ||||
| 	same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead. | ||||
| 
 | ||||
| 	This MSR falls outside the reserved KVM range and may be removed in the | ||||
| 	future. Its usage is deprecated. | ||||
| @ -146,7 +169,7 @@ MSR_KVM_SYSTEM_TIME: 0x12 | ||||
| 	Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid | ||||
| 	leaf prior to usage. | ||||
| 
 | ||||
| 	The suggested algorithm for detecting kvmclock presence is then: | ||||
| 	The suggested algorithm for detecting kvmclock presence is then:: | ||||
| 
 | ||||
| 		if (!kvm_para_available())    /* refer to cpuid.txt */ | ||||
| 			return NON_PRESENT; | ||||
| @ -163,8 +186,11 @@ MSR_KVM_SYSTEM_TIME: 0x12 | ||||
| 		} else | ||||
| 			return NON_PRESENT; | ||||
| 
 | ||||
| MSR_KVM_ASYNC_PF_EN: 0x4b564d02 | ||||
| 	data: Bits 63-6 hold 64-byte aligned physical address of a | ||||
| MSR_KVM_ASYNC_PF_EN: | ||||
| 	0x4b564d02 | ||||
| 
 | ||||
| data: | ||||
| 	Bits 63-6 hold 64-byte aligned physical address of a | ||||
| 	64 byte memory area which must be in guest RAM and must be | ||||
| 	zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1 | ||||
| 	when asynchronous page faults are enabled on the vcpu 0 when | ||||
| @ -200,20 +226,22 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02 | ||||
| 	Currently type 2 APF will be always delivered on the same vcpu as | ||||
| 	type 1 was, but guest should not rely on that. | ||||
| 
 | ||||
| MSR_KVM_STEAL_TIME: 0x4b564d03 | ||||
| MSR_KVM_STEAL_TIME: | ||||
| 	0x4b564d03 | ||||
| 
 | ||||
| 	data: 64-byte alignment physical address of a memory area which must be | ||||
| data: | ||||
| 	64-byte alignment physical address of a memory area which must be | ||||
| 	in guest RAM, plus an enable bit in bit 0. This memory is expected to | ||||
| 	hold a copy of the following structure: | ||||
| 	hold a copy of the following structure:: | ||||
| 
 | ||||
| 	struct kvm_steal_time { | ||||
| 	  struct kvm_steal_time { | ||||
| 		__u64 steal; | ||||
| 		__u32 version; | ||||
| 		__u32 flags; | ||||
| 		__u8  preempted; | ||||
| 		__u8  u8_pad[3]; | ||||
| 		__u32 pad[11]; | ||||
| 	} | ||||
| 	  } | ||||
| 
 | ||||
| 	whose data will be filled in by the hypervisor periodically. Only one | ||||
| 	write, or registration, is needed for each VCPU. The interval between | ||||
| @ -224,25 +252,32 @@ MSR_KVM_STEAL_TIME: 0x4b564d03 | ||||
| 
 | ||||
| 	Fields have the following meanings: | ||||
| 
 | ||||
| 		version: a sequence counter. In other words, guest has to check | ||||
| 	version: | ||||
| 		a sequence counter. In other words, guest has to check | ||||
| 		this field before and after grabbing time information and make | ||||
| 		sure they are both equal and even. An odd version indicates an | ||||
| 		in-progress update. | ||||
| 
 | ||||
| 		flags: At this point, always zero. May be used to indicate | ||||
| 	flags: | ||||
| 		At this point, always zero. May be used to indicate | ||||
| 		changes in this structure in the future. | ||||
| 
 | ||||
| 		steal: the amount of time in which this vCPU did not run, in | ||||
| 	steal: | ||||
| 		the amount of time in which this vCPU did not run, in | ||||
| 		nanoseconds. Time during which the vcpu is idle, will not be | ||||
| 		reported as steal time. | ||||
| 
 | ||||
| 		preempted: indicate the vCPU who owns this struct is running or | ||||
| 	preempted: | ||||
| 		indicate the vCPU who owns this struct is running or | ||||
| 		not. Non-zero values mean the vCPU has been preempted. Zero | ||||
| 		means the vCPU is not preempted. NOTE, it is always zero if the | ||||
| 		the hypervisor doesn't support this field. | ||||
| 
 | ||||
| MSR_KVM_EOI_EN: 0x4b564d04 | ||||
| 	data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0 | ||||
| MSR_KVM_EOI_EN: | ||||
| 	0x4b564d04 | ||||
| 
 | ||||
| data: | ||||
| 	Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0 | ||||
| 	when disabled.  Bit 1 is reserved and must be zero.  When PV end of | ||||
| 	interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned | ||||
| 	physical address of a 4 byte memory area which must be in guest RAM and | ||||
| @ -274,11 +309,13 @@ MSR_KVM_EOI_EN: 0x4b564d04 | ||||
| 	clear it using a single CPU instruction, such as test and clear, or | ||||
| 	compare and exchange. | ||||
| 
 | ||||
| MSR_KVM_POLL_CONTROL: 0x4b564d05 | ||||
| MSR_KVM_POLL_CONTROL: | ||||
| 	0x4b564d05 | ||||
| 
 | ||||
| 	Control host-side polling. | ||||
| 
 | ||||
| 	data: Bit 0 enables (1) or disables (0) host-side HLT polling logic. | ||||
| data: | ||||
| 	Bit 0 enables (1) or disables (0) host-side HLT polling logic. | ||||
| 
 | ||||
| 	KVM guests can request the host not to poll on HLT, for example if | ||||
| 	they are performing polling themselves. | ||||
| 
 | ||||
| @ -1,3 +1,6 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ========== | ||||
| Nested VMX | ||||
| ========== | ||||
| 
 | ||||
| @ -41,9 +44,9 @@ No modifications are required to user space (qemu). However, qemu's default | ||||
| emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be | ||||
| explicitly enabled, by giving qemu one of the following options: | ||||
| 
 | ||||
|      -cpu host              (emulated CPU has all features of the real CPU) | ||||
|      - cpu host              (emulated CPU has all features of the real CPU) | ||||
| 
 | ||||
|      -cpu qemu64,+vmx       (add just the vmx feature to a named CPU type) | ||||
|      - cpu qemu64,+vmx       (add just the vmx feature to a named CPU type) | ||||
| 
 | ||||
| 
 | ||||
| ABIs | ||||
| @ -75,6 +78,8 @@ of this structure changes, this can break live migration across KVM versions. | ||||
| VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner | ||||
| struct shadow_vmcs is ever changed. | ||||
| 
 | ||||
| :: | ||||
| 
 | ||||
| 	typedef u64 natural_width; | ||||
| 	struct __packed vmcs12 { | ||||
| 		/* According to the Intel spec, a VMCS region must start with | ||||
| @ -220,21 +225,21 @@ Authors | ||||
| ------- | ||||
| 
 | ||||
| These patches were written by: | ||||
|      Abel Gordon, abelg <at> il.ibm.com | ||||
|      Nadav Har'El, nyh <at> il.ibm.com | ||||
|      Orit Wasserman, oritw <at> il.ibm.com | ||||
|      Ben-Ami Yassor, benami <at> il.ibm.com | ||||
|      Muli Ben-Yehuda, muli <at> il.ibm.com | ||||
|     - Abel Gordon, abelg <at> il.ibm.com | ||||
|     - Nadav Har'El, nyh <at> il.ibm.com | ||||
|     - Orit Wasserman, oritw <at> il.ibm.com | ||||
|     - Ben-Ami Yassor, benami <at> il.ibm.com | ||||
|     - Muli Ben-Yehuda, muli <at> il.ibm.com | ||||
| 
 | ||||
| With contributions by: | ||||
|      Anthony Liguori, aliguori <at> us.ibm.com | ||||
|      Mike Day, mdday <at> us.ibm.com | ||||
|      Michael Factor, factor <at> il.ibm.com | ||||
|      Zvi Dubitzky, dubi <at> il.ibm.com | ||||
|     - Anthony Liguori, aliguori <at> us.ibm.com | ||||
|     - Mike Day, mdday <at> us.ibm.com | ||||
|     - Michael Factor, factor <at> il.ibm.com | ||||
|     - Zvi Dubitzky, dubi <at> il.ibm.com | ||||
| 
 | ||||
| And valuable reviews by: | ||||
|      Avi Kivity, avi <at> redhat.com | ||||
|      Gleb Natapov, gleb <at> redhat.com | ||||
|      Marcelo Tosatti, mtosatti <at> redhat.com | ||||
|      Kevin Tian, kevin.tian <at> intel.com | ||||
|      and others. | ||||
|     - Avi Kivity, avi <at> redhat.com | ||||
|     - Gleb Natapov, gleb <at> redhat.com | ||||
|     - Marcelo Tosatti, mtosatti <at> redhat.com | ||||
|     - Kevin Tian, kevin.tian <at> intel.com | ||||
|     - and others. | ||||
| @ -1,3 +1,6 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ================================= | ||||
| The PPC KVM paravirtual interface | ||||
| ================================= | ||||
| 
 | ||||
| @ -34,8 +37,9 @@ up the hypercall. To call a hypercall, just call these instructions. | ||||
| 
 | ||||
| The parameters are as follows: | ||||
| 
 | ||||
|         ========	================	================ | ||||
| 	Register	IN			OUT | ||||
| 
 | ||||
|         ========	================	================ | ||||
| 	r0		-			volatile | ||||
| 	r3		1st parameter		Return code | ||||
| 	r4		2nd parameter		1st output value | ||||
| @ -47,6 +51,7 @@ The parameters are as follows: | ||||
| 	r10		8th parameter		7th output value | ||||
| 	r11		hypercall number	8th output value | ||||
| 	r12		-			volatile | ||||
|         ========	================	================ | ||||
| 
 | ||||
| Hypercall definitions are shared in generic code, so the same hypercall numbers | ||||
| apply for x86 and powerpc alike with the exception that each KVM hypercall | ||||
| @ -54,11 +59,13 @@ also needs to be ORed with the KVM vendor code which is (42 << 16). | ||||
| 
 | ||||
| Return codes can be as follows: | ||||
| 
 | ||||
| 	====		========================= | ||||
| 	Code		Meaning | ||||
| 
 | ||||
| 	====		========================= | ||||
| 	0		Success | ||||
| 	12		Hypercall not implemented | ||||
| 	<0		Error | ||||
| 	====		========================= | ||||
| 
 | ||||
| The magic page | ||||
| ============== | ||||
| @ -72,7 +79,7 @@ desired location. The first parameter indicates the effective address when the | ||||
| MMU is enabled. The second parameter indicates the address in real mode, if | ||||
| applicable to the target. For now, we always map the page to -4096. This way we | ||||
| can access it using absolute load and store functions. The following | ||||
| instruction reads the first field of the magic page: | ||||
| instruction reads the first field of the magic page:: | ||||
| 
 | ||||
| 	ld	rX, -4096(0) | ||||
| 
 | ||||
| @ -93,8 +100,10 @@ a bitmap of available features inside the magic page. | ||||
| 
 | ||||
| The following enhancements to the magic page are currently available: | ||||
| 
 | ||||
|   ============================  ======================================= | ||||
|   KVM_MAGIC_FEAT_SR		Maps SR registers r/w in the magic page | ||||
|   KVM_MAGIC_FEAT_MAS0_TO_SPRG7	Maps MASn, ESR, PIR and high SPRGs | ||||
|   ============================  ======================================= | ||||
| 
 | ||||
| For enhanced features in the magic page, please check for the existence of the | ||||
| feature before using them! | ||||
| @ -121,8 +130,8 @@ when entering the guest or don't have any impact on the hypervisor's behavior. | ||||
| 
 | ||||
| The following bits are safe to be set inside the guest: | ||||
| 
 | ||||
|   MSR_EE | ||||
|   MSR_RI | ||||
|   - MSR_EE | ||||
|   - MSR_RI | ||||
| 
 | ||||
| If any other bit changes in the MSR, please still use mtmsr(d). | ||||
| 
 | ||||
| @ -138,9 +147,9 @@ guest. Implementing any of those mappings is optional, as the instruction traps | ||||
| also act on the shared page. So calling privileged instructions still works as | ||||
| before. | ||||
| 
 | ||||
| ======================= ================================ | ||||
| From			To | ||||
| ====			== | ||||
| 
 | ||||
| ======================= ================================ | ||||
| mfmsr	rX		ld	rX, magic_page->msr | ||||
| mfsprg	rX, 0		ld	rX, magic_page->sprg0 | ||||
| mfsprg	rX, 1		ld	rX, magic_page->sprg1 | ||||
| @ -173,7 +182,7 @@ mtsrin	rX, rY		b	<special mtsrin section> | ||||
| 
 | ||||
| [BookE only] | ||||
| wrteei	[0|1]		b	<special wrteei section> | ||||
| 
 | ||||
| ======================= ================================ | ||||
| 
 | ||||
| Some instructions require more logic to determine what's going on than a load | ||||
| or store instruction can deliver. To enable patching of those, we keep some | ||||
| @ -191,6 +200,7 @@ for example. | ||||
| 
 | ||||
| Hypercall ABIs in KVM on PowerPC | ||||
| ================================= | ||||
| 
 | ||||
| 1) KVM hypercalls (ePAPR) | ||||
| 
 | ||||
| These are ePAPR compliant hypercall implementation (mentioned above). Even | ||||
| @ -1,3 +1,6 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ================================ | ||||
| Review checklist for kvm patches | ||||
| ================================ | ||||
| 
 | ||||
| @ -1,3 +1,6 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| ============================= | ||||
| The s390 DIAGNOSE call on KVM | ||||
| ============================= | ||||
| 
 | ||||
| @ -16,12 +19,12 @@ DIAGNOSE calls by the guest cause a mandatory intercept. This implies | ||||
| all supported DIAGNOSE calls need to be handled by either KVM or its | ||||
| userspace. | ||||
| 
 | ||||
| All DIAGNOSE calls supported by KVM use the RS-a format: | ||||
| All DIAGNOSE calls supported by KVM use the RS-a format:: | ||||
| 
 | ||||
| -------------------------------------- | ||||
| |  '83'  | R1 | R3 | B2 |     D2     | | ||||
| -------------------------------------- | ||||
| 0        8    12   16   20           31 | ||||
|   -------------------------------------- | ||||
|   |  '83'  | R1 | R3 | B2 |     D2     | | ||||
|   -------------------------------------- | ||||
|   0        8    12   16   20           31 | ||||
| 
 | ||||
| The second-operand address (obtained by the base/displacement calculation) | ||||
| is not used to address data. Instead, bits 48-63 of this address specify | ||||
| @ -1,17 +1,21 @@ | ||||
| .. SPDX-License-Identifier: GPL-2.0 | ||||
| 
 | ||||
| 	Timekeeping Virtualization for X86-Based Architectures | ||||
| ====================================================== | ||||
| Timekeeping Virtualization for X86-Based Architectures | ||||
| ====================================================== | ||||
| 
 | ||||
| 	Zachary Amsden <zamsden@redhat.com> | ||||
| 	Copyright (c) 2010, Red Hat.  All rights reserved. | ||||
| :Author: Zachary Amsden <zamsden@redhat.com> | ||||
| :Copyright: (c) 2010, Red Hat.  All rights reserved. | ||||
| 
 | ||||
| 1) Overview | ||||
| 2) Timing Devices | ||||
| 3) TSC Hardware | ||||
| 4) Virtualization Problems | ||||
| .. Contents | ||||
| 
 | ||||
| ========================================================================= | ||||
|    1) Overview | ||||
|    2) Timing Devices | ||||
|    3) TSC Hardware | ||||
|    4) Virtualization Problems | ||||
| 
 | ||||
| 1) Overview | ||||
| 1. Overview | ||||
| =========== | ||||
| 
 | ||||
| One of the most complicated parts of the X86 platform, and specifically, | ||||
| the virtualization of this platform is the plethora of timing devices available | ||||
| @ -27,15 +31,15 @@ The purpose of this document is to collect data and information relevant to | ||||
| timekeeping which may be difficult to find elsewhere, specifically, | ||||
| information relevant to KVM and hardware-based virtualization. | ||||
| 
 | ||||
| ========================================================================= | ||||
| 
 | ||||
| 2) Timing Devices | ||||
| 2. Timing Devices | ||||
| ================= | ||||
| 
 | ||||
| First we discuss the basic hardware devices available.  TSC and the related | ||||
| KVM clock are special enough to warrant a full exposition and are described in | ||||
| the following section. | ||||
| 
 | ||||
| 2.1) i8254 - PIT | ||||
| 2.1. i8254 - PIT | ||||
| ---------------- | ||||
| 
 | ||||
| One of the first timer devices available is the programmable interrupt timer, | ||||
| or PIT.  The PIT has a fixed frequency 1.193182 MHz base clock and three | ||||
| @ -50,13 +54,13 @@ The PIT uses I/O ports 0x40 - 0x43.  Access to the 16-bit counters is done | ||||
| using single or multiple byte access to the I/O ports.  There are 6 modes | ||||
| available, but not all modes are available to all timers, as only timer 2 | ||||
| has a connected gate input, required for modes 1 and 5.  The gate line is | ||||
| controlled by port 61h, bit 0, as illustrated in the following diagram. | ||||
| controlled by port 61h, bit 0, as illustrated in the following diagram:: | ||||
| 
 | ||||
|  --------------             ---------------- | ||||
| |              |           |                | | ||||
| |  1.1932 MHz  |---------->| CLOCK      OUT | ---------> IRQ 0 | ||||
| |    Clock     |   |       |                | | ||||
|  --------------    |    +->| GATE  TIMER 0  | | ||||
|   --------------             ---------------- | ||||
|   |            |           |                | | ||||
|   |  1.1932 MHz|---------->| CLOCK      OUT | ---------> IRQ 0 | ||||
|   |    Clock   |   |       |                | | ||||
|   --------------   |    +->| GATE  TIMER 0  | | ||||
|                    |        ---------------- | ||||
|                    | | ||||
|                    |        ---------------- | ||||
| @ -70,29 +74,33 @@ controlled by port 61h, bit 0, as illustrated in the following diagram. | ||||
|                    |       |                | | ||||
|                    |------>| CLOCK      OUT | ---------> Port 61h, bit 5 | ||||
|                            |                |      | | ||||
| Port 61h, bit 0 ---------->| GATE  TIMER 2  |       \_.----   ____ | ||||
|   Port 61h, bit 0 -------->| GATE  TIMER 2  |       \_.----   ____ | ||||
|                             ----------------         _|    )--|LPF|---Speaker | ||||
|                                                     / *----   \___/ | ||||
| Port 61h, bit 1 -----------------------------------/ | ||||
|   Port 61h, bit 1 ---------------------------------/ | ||||
| 
 | ||||
| The timer modes are now described. | ||||
| 
 | ||||
| Mode 0: Single Timeout.   This is a one-shot software timeout that counts down | ||||
| Mode 0: Single Timeout. | ||||
|  This is a one-shot software timeout that counts down | ||||
|  when the gate is high (always true for timers 0 and 1).  When the count | ||||
|  reaches zero, the output goes high. | ||||
| 
 | ||||
| Mode 1: Triggered One-shot.  The output is initially set high.  When the gate | ||||
| Mode 1: Triggered One-shot. | ||||
|  The output is initially set high.  When the gate | ||||
|  line is set high, a countdown is initiated (which does not stop if the gate is | ||||
|  lowered), during which the output is set low.  When the count reaches zero, | ||||
|  the output goes high. | ||||
| 
 | ||||
| Mode 2: Rate Generator.  The output is initially set high.  When the countdown | ||||
| Mode 2: Rate Generator. | ||||
|  The output is initially set high.  When the countdown | ||||
|  reaches 1, the output goes low for one count and then returns high.  The value | ||||
|  is reloaded and the countdown automatically resumes.  If the gate line goes | ||||
|  low, the count is halted.  If the output is low when the gate is lowered, the | ||||
|  output automatically goes high (this only affects timer 2). | ||||
| 
 | ||||
| Mode 3: Square Wave.   This generates a high / low square wave.  The count | ||||
| Mode 3: Square Wave. | ||||
|  This generates a high / low square wave.  The count | ||||
|  determines the length of the pulse, which alternates between high and low | ||||
|  when zero is reached.  The count only proceeds when gate is high and is | ||||
|  automatically reloaded on reaching zero.  The count is decremented twice at | ||||
| @ -103,12 +111,14 @@ Mode 3: Square Wave.   This generates a high / low square wave.  The count | ||||
|  values are not observed when reading.  This is the intended mode for timer 2, | ||||
|  which generates sine-like tones by low-pass filtering the square wave output. | ||||
| 
 | ||||
| Mode 4: Software Strobe.  After programming this mode and loading the counter, | ||||
| Mode 4: Software Strobe. | ||||
|  After programming this mode and loading the counter, | ||||
|  the output remains high until the counter reaches zero.  Then the output | ||||
|  goes low for 1 clock cycle and returns high.  The counter is not reloaded. | ||||
|  Counting only occurs when gate is high. | ||||
| 
 | ||||
| Mode 5: Hardware Strobe.  After programming and loading the counter, the | ||||
| Mode 5: Hardware Strobe. | ||||
|  After programming and loading the counter, the | ||||
|  output remains high.  When the gate is raised, a countdown is initiated | ||||
|  (which does not stop if the gate is lowered).  When the counter reaches zero, | ||||
|  the output goes low for 1 clock cycle and then returns high.  The counter is | ||||
| @ -118,49 +128,49 @@ In addition to normal binary counting, the PIT supports BCD counting.  The | ||||
| command port, 0x43 is used to set the counter and mode for each of the three | ||||
| timers. | ||||
| 
 | ||||
| PIT commands, issued to port 0x43, using the following bit encoding: | ||||
| PIT commands, issued to port 0x43, using the following bit encoding:: | ||||
| 
 | ||||
| Bit 7-4: Command (See table below) | ||||
| Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined) | ||||
| Bit 0  : Binary (0) / BCD (1) | ||||
|   Bit 7-4: Command (See table below) | ||||
|   Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined) | ||||
|   Bit 0  : Binary (0) / BCD (1) | ||||
| 
 | ||||
| Command table: | ||||
| Command table:: | ||||
| 
 | ||||
| 0000 - Latch Timer 0 count for port 0x40 | ||||
|   0000 - Latch Timer 0 count for port 0x40 | ||||
| 	sample and hold the count to be read in port 0x40; | ||||
| 	additional commands ignored until counter is read; | ||||
| 	mode bits ignored. | ||||
| 
 | ||||
| 0001 - Set Timer 0 LSB mode for port 0x40 | ||||
|   0001 - Set Timer 0 LSB mode for port 0x40 | ||||
| 	set timer to read LSB only and force MSB to zero; | ||||
| 	mode bits set timer mode | ||||
| 
 | ||||
| 0010 - Set Timer 0 MSB mode for port 0x40 | ||||
|   0010 - Set Timer 0 MSB mode for port 0x40 | ||||
| 	set timer to read MSB only and force LSB to zero; | ||||
| 	mode bits set timer mode | ||||
| 
 | ||||
| 0011 - Set Timer 0 16-bit mode for port 0x40 | ||||
|   0011 - Set Timer 0 16-bit mode for port 0x40 | ||||
| 	set timer to read / write LSB first, then MSB; | ||||
| 	mode bits set timer mode | ||||
| 
 | ||||
| 0100 - Latch Timer 1 count for port 0x41 - as described above | ||||
| 0101 - Set Timer 1 LSB mode for port 0x41 - as described above | ||||
| 0110 - Set Timer 1 MSB mode for port 0x41 - as described above | ||||
| 0111 - Set Timer 1 16-bit mode for port 0x41 - as described above | ||||
|   0100 - Latch Timer 1 count for port 0x41 - as described above | ||||
|   0101 - Set Timer 1 LSB mode for port 0x41 - as described above | ||||
|   0110 - Set Timer 1 MSB mode for port 0x41 - as described above | ||||
|   0111 - Set Timer 1 16-bit mode for port 0x41 - as described above | ||||
| 
 | ||||
| 1000 - Latch Timer 2 count for port 0x42 - as described above | ||||
| 1001 - Set Timer 2 LSB mode for port 0x42 - as described above | ||||
| 1010 - Set Timer 2 MSB mode for port 0x42 - as described above | ||||
| 1011 - Set Timer 2 16-bit mode for port 0x42 as described above | ||||
|   1000 - Latch Timer 2 count for port 0x42 - as described above | ||||
|   1001 - Set Timer 2 LSB mode for port 0x42 - as described above | ||||
|   1010 - Set Timer 2 MSB mode for port 0x42 - as described above | ||||
|   1011 - Set Timer 2 16-bit mode for port 0x42 as described above | ||||
| 
 | ||||
| 1101 - General counter latch | ||||
|   1101 - General counter latch | ||||
| 	Latch combination of counters into corresponding ports | ||||
| 	Bit 3 = Counter 2 | ||||
| 	Bit 2 = Counter 1 | ||||
| 	Bit 1 = Counter 0 | ||||
| 	Bit 0 = Unused | ||||
| 
 | ||||
| 1110 - Latch timer status | ||||
|   1110 - Latch timer status | ||||
| 	Latch combination of counter mode into corresponding ports | ||||
| 	Bit 3 = Counter 2 | ||||
| 	Bit 2 = Counter 1 | ||||
| @ -177,7 +187,8 @@ Command table: | ||||
| 	Bit 3-1 = Mode | ||||
| 	Bit 0 = Binary (0) / BCD mode (1) | ||||
| 
 | ||||
| 2.2) RTC | ||||
| 2.2. RTC | ||||
| -------- | ||||
| 
 | ||||
| The second device which was available in the original PC was the MC146818 real | ||||
| time clock.  The original device is now obsolete, and usually emulated by the | ||||
| @ -201,21 +212,21 @@ in progress, as indicated in the status register. | ||||
| The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be | ||||
| programmed to a 32kHz divider if the RTC is to count seconds. | ||||
| 
 | ||||
| This is the RAM map originally used for the RTC/CMOS: | ||||
| This is the RAM map originally used for the RTC/CMOS:: | ||||
| 
 | ||||
| Location    Size    Description | ||||
| ------------------------------------------ | ||||
| 00h         byte    Current second (BCD) | ||||
| 01h         byte    Seconds alarm (BCD) | ||||
| 02h         byte    Current minute (BCD) | ||||
| 03h         byte    Minutes alarm (BCD) | ||||
| 04h         byte    Current hour (BCD) | ||||
| 05h         byte    Hours alarm (BCD) | ||||
| 06h         byte    Current day of week (BCD) | ||||
| 07h         byte    Current day of month (BCD) | ||||
| 08h         byte    Current month (BCD) | ||||
| 09h         byte    Current year (BCD) | ||||
| 0Ah         byte    Register A | ||||
|   Location    Size    Description | ||||
|   ------------------------------------------ | ||||
|   00h         byte    Current second (BCD) | ||||
|   01h         byte    Seconds alarm (BCD) | ||||
|   02h         byte    Current minute (BCD) | ||||
|   03h         byte    Minutes alarm (BCD) | ||||
|   04h         byte    Current hour (BCD) | ||||
|   05h         byte    Hours alarm (BCD) | ||||
|   06h         byte    Current day of week (BCD) | ||||
|   07h         byte    Current day of month (BCD) | ||||
|   08h         byte    Current month (BCD) | ||||
|   09h         byte    Current year (BCD) | ||||
|   0Ah         byte    Register A | ||||
|                        bit 7   = Update in progress | ||||
|                        bit 6-4 = Divider for clock | ||||
|                                   000 = 4.194 MHz | ||||
| @ -234,7 +245,7 @@ Location    Size    Description | ||||
|                                  1101 = 125 mS | ||||
|                                  1110 = 250 mS | ||||
|                                  1111 = 500 mS | ||||
| 0Bh         byte    Register B | ||||
|   0Bh         byte    Register B | ||||
|                        bit 7   = Run (0) / Halt (1) | ||||
|                        bit 6   = Periodic interrupt enable | ||||
|                        bit 5   = Alarm interrupt enable | ||||
| @ -243,19 +254,20 @@ Location    Size    Description | ||||
|                        bit 2   = BCD calendar (0) / Binary (1) | ||||
|                        bit 1   = 12-hour mode (0) / 24-hour mode (1) | ||||
|                        bit 0   = 0 (DST off) / 1 (DST enabled) | ||||
| OCh         byte    Register C (read only) | ||||
|   OCh         byte    Register C (read only) | ||||
|                        bit 7   = interrupt request flag (IRQF) | ||||
|                        bit 6   = periodic interrupt flag (PF) | ||||
|                        bit 5   = alarm interrupt flag (AF) | ||||
|                        bit 4   = update interrupt flag (UF) | ||||
|                        bit 3-0 = reserved | ||||
| ODh         byte    Register D (read only) | ||||
|   ODh         byte    Register D (read only) | ||||
|                        bit 7   = RTC has power | ||||
|                        bit 6-0 = reserved | ||||
| 32h         byte    Current century BCD (*) | ||||
|   32h         byte    Current century BCD (*) | ||||
|   (*) location vendor specific and now determined from ACPI global tables | ||||
| 
 | ||||
| 2.3) APIC | ||||
| 2.3. APIC | ||||
| --------- | ||||
| 
 | ||||
| On Pentium and later processors, an on-board timer is available to each CPU | ||||
| as part of the Advanced Programmable Interrupt Controller.  The APIC is | ||||
| @ -276,7 +288,8 @@ timer is programmed through the LVT (local vector timer) register, is capable | ||||
| of one-shot or periodic operation, and is based on the bus clock divided down | ||||
| by the programmable divider register. | ||||
| 
 | ||||
| 2.4) HPET | ||||
| 2.4. HPET | ||||
| --------- | ||||
| 
 | ||||
| HPET is quite complex, and was originally intended to replace the PIT / RTC | ||||
| support of the X86 PC.  It remains to be seen whether that will be the case, as | ||||
| @ -297,7 +310,8 @@ indicated through ACPI tables by the BIOS. | ||||
| Detailed specification of the HPET is beyond the current scope of this | ||||
| document, as it is also very well documented elsewhere. | ||||
| 
 | ||||
| 2.5) Offboard Timers | ||||
| 2.5. Offboard Timers | ||||
| -------------------- | ||||
| 
 | ||||
| Several cards, both proprietary (watchdog boards) and commonplace (e1000) have | ||||
| timing chips built into the cards which may have registers which are accessible | ||||
| @ -307,9 +321,8 @@ general frowned upon as not playing by the agreed rules of the game.  Such a | ||||
| timer device would require additional support to be virtualized properly and is | ||||
| not considered important at this time as no known operating system does this. | ||||
| 
 | ||||
| ========================================================================= | ||||
| 
 | ||||
| 3) TSC Hardware | ||||
| 3. TSC Hardware | ||||
| =============== | ||||
| 
 | ||||
| The TSC or time stamp counter is relatively simple in theory; it counts | ||||
| instruction cycles issued by the processor, which can be used as a measure of | ||||
| @ -340,7 +353,8 @@ allows the guest visible TSC to be offset by a constant.  Newer implementations | ||||
| promise to allow the TSC to additionally be scaled, but this hardware is not | ||||
| yet widely available. | ||||
| 
 | ||||
| 3.1) TSC synchronization | ||||
| 3.1. TSC synchronization | ||||
| ------------------------ | ||||
| 
 | ||||
| The TSC is a CPU-local clock in most implementations.  This means, on SMP | ||||
| platforms, the TSCs of different CPUs may start at different times depending | ||||
| @ -357,7 +371,8 @@ practice, getting a perfectly synchronized TSC will not be possible unless all | ||||
| values are read from the same clock, which generally only is possible on single | ||||
| socket systems or those with special hardware support. | ||||
| 
 | ||||
| 3.2) TSC and CPU hotplug | ||||
| 3.2. TSC and CPU hotplug | ||||
| ------------------------ | ||||
| 
 | ||||
| As touched on already, CPUs which arrive later than the boot time of the system | ||||
| may not have a TSC value that is synchronized with the rest of the system. | ||||
| @ -367,7 +382,8 @@ a guarantee.  This can have the effect of bringing a system from a state where | ||||
| TSC is synchronized back to a state where TSC synchronization flaws, however | ||||
| small, may be exposed to the OS and any virtualization environment. | ||||
| 
 | ||||
| 3.3) TSC and multi-socket / NUMA | ||||
| 3.3. TSC and multi-socket / NUMA | ||||
| -------------------------------- | ||||
| 
 | ||||
| Multi-socket systems, especially large multi-socket systems are likely to have | ||||
| individual clocksources rather than a single, universally distributed clock. | ||||
| @ -385,7 +401,8 @@ standards for telecommunications and computer equipment. | ||||
| It is recommended not to trust the TSCs to remain synchronized on NUMA or | ||||
| multiple socket systems for these reasons. | ||||
| 
 | ||||
| 3.4) TSC and C-states | ||||
| 3.4. TSC and C-states | ||||
| --------------------- | ||||
| 
 | ||||
| C-states, or idling states of the processor, especially C1E and deeper sleep | ||||
| states may be problematic for TSC as well.  The TSC may stop advancing in such | ||||
| @ -396,7 +413,8 @@ based on CPU and chipset identifications. | ||||
| The TSC in such a case may be corrected by catching it up to a known external | ||||
| clocksource. | ||||
| 
 | ||||
| 3.5) TSC frequency change / P-states | ||||
| 3.5. TSC frequency change / P-states | ||||
| ------------------------------------ | ||||
| 
 | ||||
| To make things slightly more interesting, some CPUs may change frequency.  They | ||||
| may or may not run the TSC at the same rate, and because the frequency change | ||||
| @ -416,14 +434,16 @@ other processors.  In such cases, the TSC on halted CPUs could advance faster | ||||
| than that of non-halted processors.  AMD Turion processors are known to have | ||||
| this problem. | ||||
| 
 | ||||
| 3.6) TSC and STPCLK / T-states | ||||
| 3.6. TSC and STPCLK / T-states | ||||
| ------------------------------ | ||||
| 
 | ||||
| External signals given to the processor may also have the effect of stopping | ||||
| the TSC.  This is typically done for thermal emergency power control to prevent | ||||
| an overheating condition, and typically, there is no way to detect that this | ||||
| condition has happened. | ||||
| 
 | ||||
| 3.7) TSC virtualization - VMX | ||||
| 3.7. TSC virtualization - VMX | ||||
| ----------------------------- | ||||
| 
 | ||||
| VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP | ||||
| instructions, which is enough for full virtualization of TSC in any manner.  In | ||||
| @ -431,14 +451,16 @@ addition, VMX allows passing through the host TSC plus an additional TSC_OFFSET | ||||
| field specified in the VMCS.  Special instructions must be used to read and | ||||
| write the VMCS field. | ||||
| 
 | ||||
| 3.8) TSC virtualization - SVM | ||||
| 3.8. TSC virtualization - SVM | ||||
| ----------------------------- | ||||
| 
 | ||||
| SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP | ||||
| instructions, which is enough for full virtualization of TSC in any manner.  In | ||||
| addition, SVM allows passing through the host TSC plus an additional offset | ||||
| field specified in the SVM control block. | ||||
| 
 | ||||
| 3.9) TSC feature bits in Linux | ||||
| 3.9. TSC feature bits in Linux | ||||
| ------------------------------ | ||||
| 
 | ||||
| In summary, there is no way to guarantee the TSC remains in perfect | ||||
| synchronization unless it is explicitly guaranteed by the architecture.  Even | ||||
| @ -448,13 +470,16 @@ despite being locally consistent. | ||||
| The following feature bits are used by Linux to signal various TSC attributes, | ||||
| but they can only be taken to be meaningful for UP or single node systems. | ||||
| 
 | ||||
| X86_FEATURE_TSC 		: The TSC is available in hardware | ||||
| X86_FEATURE_RDTSCP		: The RDTSCP instruction is available | ||||
| X86_FEATURE_CONSTANT_TSC 	: The TSC rate is unchanged with P-states | ||||
| X86_FEATURE_NONSTOP_TSC		: The TSC does not stop in C-states | ||||
| X86_FEATURE_TSC_RELIABLE	: TSC sync checks are skipped (VMware) | ||||
| =========================	======================================= | ||||
| X86_FEATURE_TSC			The TSC is available in hardware | ||||
| X86_FEATURE_RDTSCP		The RDTSCP instruction is available | ||||
| X86_FEATURE_CONSTANT_TSC	The TSC rate is unchanged with P-states | ||||
| X86_FEATURE_NONSTOP_TSC		The TSC does not stop in C-states | ||||
| X86_FEATURE_TSC_RELIABLE	TSC sync checks are skipped (VMware) | ||||
| =========================	======================================= | ||||
| 
 | ||||
| 4) Virtualization Problems | ||||
| 4. Virtualization Problems | ||||
| ========================== | ||||
| 
 | ||||
| Timekeeping is especially problematic for virtualization because a number of | ||||
| challenges arise.  The most obvious problem is that time is now shared between | ||||
| @ -473,7 +498,8 @@ BIOS, but not in such an extreme fashion.  However, the fact that SMM mode may | ||||
| cause similar problems to virtualization makes it a good justification for | ||||
| solving many of these problems on bare metal. | ||||
| 
 | ||||
| 4.1) Interrupt clocking | ||||
| 4.1. Interrupt clocking | ||||
| ----------------------- | ||||
| 
 | ||||
| One of the most immediate problems that occurs with legacy operating systems | ||||
| is that the system timekeeping routines are often designed to keep track of | ||||
| @ -502,7 +528,8 @@ thus requires interrupt slewing to keep proper time.  It does use a low enough | ||||
| rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in | ||||
| practice. | ||||
| 
 | ||||
| 4.2) TSC sampling and serialization | ||||
| 4.2. TSC sampling and serialization | ||||
| ----------------------------------- | ||||
| 
 | ||||
| As the highest precision time source available, the cycle counter of the CPU | ||||
| has aroused much interest from developers.  As explained above, this timer has | ||||
| @ -524,7 +551,8 @@ it may be necessary for an implementation to guard against "backwards" reads of | ||||
| the TSC as seen from other CPUs, even in an otherwise perfectly synchronized | ||||
| system. | ||||
| 
 | ||||
| 4.3) Timespec aliasing | ||||
| 4.3. Timespec aliasing | ||||
| ---------------------- | ||||
| 
 | ||||
| Additionally, this lack of serialization from the TSC poses another challenge | ||||
| when using results of the TSC when measured against another time source.  As | ||||
| @ -548,7 +576,8 @@ This aliasing requires care in the computation and recalibration of kvmclock | ||||
| and any other values derived from TSC computation (such as TSC virtualization | ||||
| itself). | ||||
| 
 | ||||
| 4.4) Migration | ||||
| 4.4. Migration | ||||
| -------------- | ||||
| 
 | ||||
| Migration of a virtual machine raises problems for timekeeping in two ways. | ||||
| First, the migration itself may take time, during which interrupts cannot be | ||||
| @ -566,7 +595,8 @@ always be caught up to the original rate.  KVM clock avoids these problems by | ||||
| simply storing multipliers and offsets against the TSC for the guest to convert | ||||
| back into nanosecond resolution values. | ||||
| 
 | ||||
| 4.5) Scheduling | ||||
| 4.5. Scheduling | ||||
| --------------- | ||||
| 
 | ||||
| Since scheduling may be based on precise timing and firing of interrupts, the | ||||
| scheduling algorithms of an operating system may be adversely affected by | ||||
| @ -579,7 +609,8 @@ In an attempt to work around this, several implementations have provided a | ||||
| paravirtualized scheduler clock, which reveals the true amount of CPU time for | ||||
| which a virtual machine has been running. | ||||
| 
 | ||||
| 4.6) Watchdogs | ||||
| 4.6. Watchdogs | ||||
| -------------- | ||||
| 
 | ||||
| Watchdog timers, such as the lock detector in Linux may fire accidentally when | ||||
| running under hardware virtualization due to timer interrupts being delayed or | ||||
| @ -587,7 +618,8 @@ misinterpretation of the passage of real time.  Usually, these warnings are | ||||
| spurious and can be ignored, but in some circumstances it may be necessary to | ||||
| disable such detection. | ||||
| 
 | ||||
| 4.7) Delays and precision timing | ||||
| 4.7. Delays and precision timing | ||||
| -------------------------------- | ||||
| 
 | ||||
| Precise timing and delays may not be possible in a virtualized system.  This | ||||
| can happen if the system is controlling physical hardware, or issues delays to | ||||
| @ -600,7 +632,8 @@ The second issue may cause performance problems, but this is unlikely to be a | ||||
| significant issue.  In many cases these delays may be eliminated through | ||||
| configuration or paravirtualization. | ||||
| 
 | ||||
| 4.8) Covert channels and leaks | ||||
| 4.8. Covert channels and leaks | ||||
| ------------------------------ | ||||
| 
 | ||||
| In addition to the above problems, time information will inevitably leak to the | ||||
| guest about the host in anything but a perfect implementation of virtualized | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -19,7 +19,6 @@ x86-specific Documentation | ||||
|    tlb | ||||
|    mtrr | ||||
|    pat | ||||
|    intel_mpx | ||||
|    intel-iommu | ||||
|    intel_txt | ||||
|    amd-memory-encryption | ||||
|  | ||||
							
								
								
									
										93
									
								
								MAINTAINERS
									
									
									
									
									
								
							
							
						
						
									
										93
									
								
								MAINTAINERS
									
									
									
									
									
								
							| @ -2796,11 +2796,11 @@ F:	drivers/block/aoe/ | ||||
| 
 | ||||
| ATHEROS 71XX/9XXX GPIO DRIVER | ||||
| M:	Alban Bedel <albeu@free.fr> | ||||
| S:	Maintained | ||||
| W:	https://github.com/AlbanBedel/linux | ||||
| T:	git git://github.com/AlbanBedel/linux | ||||
| S:	Maintained | ||||
| F:	drivers/gpio/gpio-ath79.c | ||||
| F:	Documentation/devicetree/bindings/gpio/gpio-ath79.txt | ||||
| F:	drivers/gpio/gpio-ath79.c | ||||
| 
 | ||||
| ATHEROS 71XX/9XXX USB PHY DRIVER | ||||
| M:	Alban Bedel <albeu@free.fr> | ||||
| @ -3422,8 +3422,8 @@ BROADCOM BRCMSTB GPIO DRIVER | ||||
| M:	Gregory Fong <gregory.0xf0@gmail.com> | ||||
| L:	bcm-kernel-feedback-list@broadcom.com | ||||
| S:	Supported | ||||
| F:	drivers/gpio/gpio-brcmstb.c | ||||
| F:	Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt | ||||
| F:	drivers/gpio/gpio-brcmstb.c | ||||
| 
 | ||||
| BROADCOM BRCMSTB I2C DRIVER | ||||
| M:	Kamal Dasu <kdasu.kdev@gmail.com> | ||||
| @ -3481,8 +3481,8 @@ BROADCOM KONA GPIO DRIVER | ||||
| M:	Ray Jui <rjui@broadcom.com> | ||||
| L:	bcm-kernel-feedback-list@broadcom.com | ||||
| S:	Supported | ||||
| F:	drivers/gpio/gpio-bcm-kona.c | ||||
| F:	Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt | ||||
| F:	drivers/gpio/gpio-bcm-kona.c | ||||
| 
 | ||||
| BROADCOM NETXTREME-E ROCE DRIVER | ||||
| M:	Selvin Xavier <selvin.xavier@broadcom.com> | ||||
| @ -3597,8 +3597,8 @@ F:	sound/pci/bt87x.c | ||||
| 
 | ||||
| BT8XXGPIO DRIVER | ||||
| M:	Michael Buesch <m@bues.ch> | ||||
| W:	http://bu3sch.de/btgpio.php | ||||
| S:	Maintained | ||||
| W:	http://bu3sch.de/btgpio.php | ||||
| F:	drivers/gpio/gpio-bt8xx.c | ||||
| 
 | ||||
| BTRFS FILE SYSTEM | ||||
| @ -3649,6 +3649,7 @@ F:	sound/pci/oxygen/ | ||||
| 
 | ||||
| C-SKY ARCHITECTURE | ||||
| M:	Guo Ren <guoren@kernel.org> | ||||
| L:	linux-csky@vger.kernel.org | ||||
| T:	git https://github.com/c-sky/csky-linux.git | ||||
| S:	Supported | ||||
| F:	arch/csky/ | ||||
| @ -3909,7 +3910,7 @@ S:	Supported | ||||
| F:	Documentation/filesystems/ceph.txt | ||||
| F:	fs/ceph/ | ||||
| 
 | ||||
| CERTIFICATE HANDLING: | ||||
| CERTIFICATE HANDLING | ||||
| M:	David Howells <dhowells@redhat.com> | ||||
| M:	David Woodhouse <dwmw2@infradead.org> | ||||
| L:	keyrings@vger.kernel.org | ||||
| @ -3919,7 +3920,7 @@ F:	certs/ | ||||
| F:	scripts/sign-file.c | ||||
| F:	scripts/extract-cert.c | ||||
| 
 | ||||
| CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: | ||||
| CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM | ||||
| L:	devel@driverdev.osuosl.org | ||||
| S:	Obsolete | ||||
| F:	drivers/staging/wusbcore/ | ||||
| @ -5932,12 +5933,12 @@ S:	Maintained | ||||
| F:	drivers/media/dvb-frontends/ec100* | ||||
| 
 | ||||
| ECRYPT FILE SYSTEM | ||||
| M:	Tyler Hicks <tyhicks@canonical.com> | ||||
| M:	Tyler Hicks <code@tyhicks.com> | ||||
| L:	ecryptfs@vger.kernel.org | ||||
| W:	http://ecryptfs.org | ||||
| W:	https://launchpad.net/ecryptfs | ||||
| T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs.git | ||||
| S:	Supported | ||||
| S:	Odd Fixes | ||||
| F:	Documentation/filesystems/ecryptfs.txt | ||||
| F:	fs/ecryptfs/ | ||||
| 
 | ||||
| @ -7047,7 +7048,7 @@ L:	kvm@vger.kernel.org | ||||
| S:	Supported | ||||
| F:	drivers/uio/uio_pci_generic.c | ||||
| 
 | ||||
| GENERIC VDSO LIBRARY: | ||||
| GENERIC VDSO LIBRARY | ||||
| M:	Andy Lutomirski <luto@kernel.org> | ||||
| M:	Thomas Gleixner <tglx@linutronix.de> | ||||
| M:	Vincenzo Frascino <vincenzo.frascino@arm.com> | ||||
| @ -7143,18 +7144,18 @@ GPIO SUBSYSTEM | ||||
| M:	Linus Walleij <linus.walleij@linaro.org> | ||||
| M:	Bartosz Golaszewski <bgolaszewski@baylibre.com> | ||||
| L:	linux-gpio@vger.kernel.org | ||||
| T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git | ||||
| S:	Maintained | ||||
| T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git | ||||
| F:	Documentation/ABI/obsolete/sysfs-gpio | ||||
| F:	Documentation/ABI/testing/gpio-cdev | ||||
| F:	Documentation/admin-guide/gpio/ | ||||
| F:	Documentation/devicetree/bindings/gpio/ | ||||
| F:	Documentation/driver-api/gpio/ | ||||
| F:	Documentation/admin-guide/gpio/ | ||||
| F:	Documentation/ABI/testing/gpio-cdev | ||||
| F:	Documentation/ABI/obsolete/sysfs-gpio | ||||
| F:	drivers/gpio/ | ||||
| F:	include/asm-generic/gpio.h | ||||
| F:	include/linux/gpio/ | ||||
| F:	include/linux/gpio.h | ||||
| F:	include/linux/of_gpio.h | ||||
| F:	include/asm-generic/gpio.h | ||||
| F:	include/uapi/linux/gpio.h | ||||
| F:	tools/gpio/ | ||||
| 
 | ||||
| @ -8055,8 +8056,8 @@ F:	drivers/scsi/ips.* | ||||
| ICH LPC AND GPIO DRIVER | ||||
| M:	Peter Tyser <ptyser@xes-inc.com> | ||||
| S:	Maintained | ||||
| F:	drivers/mfd/lpc_ich.c | ||||
| F:	drivers/gpio/gpio-ich.c | ||||
| F:	drivers/mfd/lpc_ich.c | ||||
| 
 | ||||
| ICY I2C DRIVER | ||||
| M:	Max Staudt <max@enpas.org> | ||||
| @ -8392,7 +8393,7 @@ M:	Joonas Lahtinen <joonas.lahtinen@linux.intel.com> | ||||
| M:	Rodrigo Vivi <rodrigo.vivi@intel.com> | ||||
| L:	intel-gfx@lists.freedesktop.org | ||||
| W:	https://01.org/linuxgraphics/ | ||||
| B:	https://01.org/linuxgraphics/documentation/how-report-bugs | ||||
| B:	https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs | ||||
| C:	irc://chat.freenode.net/intel-gfx | ||||
| Q:	http://patchwork.freedesktop.org/project/intel-gfx/ | ||||
| T:	git git://anongit.freedesktop.org/drm-intel | ||||
| @ -9278,7 +9279,7 @@ F:	include/keys/trusted-type.h | ||||
| F:	security/keys/trusted.c | ||||
| F:	include/keys/trusted.h | ||||
| 
 | ||||
| KEYS/KEYRINGS: | ||||
| KEYS/KEYRINGS | ||||
| M:	David Howells <dhowells@redhat.com> | ||||
| M:	Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> | ||||
| L:	keyrings@vger.kernel.org | ||||
| @ -11114,14 +11115,12 @@ S:	Maintained | ||||
| F:	drivers/usb/image/microtek.* | ||||
| 
 | ||||
| MIPS | ||||
| M:	Ralf Baechle <ralf@linux-mips.org> | ||||
| M:	Paul Burton <paulburton@kernel.org> | ||||
| M:	Thomas Bogendoerfer <tsbogend@alpha.franken.de> | ||||
| L:	linux-mips@vger.kernel.org | ||||
| W:	http://www.linux-mips.org/ | ||||
| T:	git git://git.linux-mips.org/pub/scm/ralf/linux.git | ||||
| T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git | ||||
| Q:	http://patchwork.linux-mips.org/project/linux-mips/list/ | ||||
| S:	Supported | ||||
| S:	Maintained | ||||
| F:	Documentation/devicetree/bindings/mips/ | ||||
| F:	Documentation/mips/ | ||||
| F:	arch/mips/ | ||||
| @ -11484,7 +11483,7 @@ F:	drivers/scsi/mac_scsi.* | ||||
| F:	drivers/scsi/sun3_scsi.* | ||||
| F:	drivers/scsi/sun3_scsi_vme.c | ||||
| 
 | ||||
| NCSI LIBRARY: | ||||
| NCSI LIBRARY | ||||
| M:	Samuel Mendoza-Jonas <sam@mendozajonas.com> | ||||
| S:	Maintained | ||||
| F:	net/ncsi/ | ||||
| @ -12740,7 +12739,7 @@ M:	Tom Joseph <tjoseph@cadence.com> | ||||
| L:	linux-pci@vger.kernel.org | ||||
| S:	Maintained | ||||
| F:	Documentation/devicetree/bindings/pci/cdns,*.txt | ||||
| F:	drivers/pci/controller/pcie-cadence* | ||||
| F:	drivers/pci/controller/cadence/ | ||||
| 
 | ||||
| PCI DRIVER FOR FREESCALE LAYERSCAPE | ||||
| M:	Minghuan Lian <minghuan.Lian@nxp.com> | ||||
| @ -13512,7 +13511,7 @@ L:	linuxppc-dev@lists.ozlabs.org | ||||
| S:	Maintained | ||||
| F:	drivers/block/ps3vram.c | ||||
| 
 | ||||
| PSAMPLE PACKET SAMPLING SUPPORT: | ||||
| PSAMPLE PACKET SAMPLING SUPPORT | ||||
| M:	Yotam Gigi <yotam.gi@gmail.com> | ||||
| S:	Maintained | ||||
| F:	net/psample | ||||
| @ -14582,10 +14581,10 @@ F:	drivers/media/pci/saa7146/ | ||||
| F:	include/media/drv-intf/saa7146* | ||||
| 
 | ||||
| SAFESETID SECURITY MODULE | ||||
| M:     Micah Morton <mortonm@chromium.org> | ||||
| S:     Supported | ||||
| F:     security/safesetid/ | ||||
| F:     Documentation/admin-guide/LSM/SafeSetID.rst | ||||
| M:	Micah Morton <mortonm@chromium.org> | ||||
| S:	Supported | ||||
| F:	security/safesetid/ | ||||
| F:	Documentation/admin-guide/LSM/SafeSetID.rst | ||||
| 
 | ||||
| SAMSUNG AUDIO (ASoC) DRIVERS | ||||
| M:	Krzysztof Kozlowski <krzk@kernel.org> | ||||
| @ -16075,8 +16074,8 @@ F:	Documentation/devicetree/bindings/reset/snps,axs10x-reset.txt | ||||
| SYNOPSYS CREG GPIO DRIVER | ||||
| M:	Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com> | ||||
| S:	Maintained | ||||
| F:	drivers/gpio/gpio-creg-snps.c | ||||
| F:	Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt | ||||
| F:	drivers/gpio/gpio-creg-snps.c | ||||
| 
 | ||||
| SYNOPSYS DESIGNWARE 8250 UART DRIVER | ||||
| R:	Andy Shevchenko <andriy.shevchenko@linux.intel.com> | ||||
| @ -16087,8 +16086,8 @@ SYNOPSYS DESIGNWARE APB GPIO DRIVER | ||||
| M:	Hoan Tran <hoan@os.amperecomputing.com> | ||||
| L:	linux-gpio@vger.kernel.org | ||||
| S:	Maintained | ||||
| F:	drivers/gpio/gpio-dwapb.c | ||||
| F:	Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt | ||||
| F:	drivers/gpio/gpio-dwapb.c | ||||
| 
 | ||||
| SYNOPSYS DESIGNWARE AXI DMAC DRIVER | ||||
| M:	Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com> | ||||
| @ -16552,8 +16551,8 @@ M:	Michael Jamet <michael.jamet@intel.com> | ||||
| M:	Mika Westerberg <mika.westerberg@linux.intel.com> | ||||
| M:	Yehezkel Bernat <YehezkelShB@gmail.com> | ||||
| L:	linux-usb@vger.kernel.org | ||||
| T:	git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git | ||||
| S:	Maintained | ||||
| T:	git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git | ||||
| F:	Documentation/admin-guide/thunderbolt.rst | ||||
| F:	drivers/thunderbolt/ | ||||
| F:	include/linux/thunderbolt.h | ||||
| @ -17080,7 +17079,7 @@ S:	Maintained | ||||
| F:	Documentation/admin-guide/ufs.rst | ||||
| F:	fs/ufs/ | ||||
| 
 | ||||
| UHID USERSPACE HID IO DRIVER: | ||||
| UHID USERSPACE HID IO DRIVER | ||||
| M:	David Herrmann <dh.herrmann@googlemail.com> | ||||
| L:	linux-input@vger.kernel.org | ||||
| S:	Maintained | ||||
| @ -17094,18 +17093,18 @@ S:	Maintained | ||||
| F:	drivers/usb/common/ulpi.c | ||||
| F:	include/linux/ulpi/ | ||||
| 
 | ||||
| ULTRA-WIDEBAND (UWB) SUBSYSTEM: | ||||
| ULTRA-WIDEBAND (UWB) SUBSYSTEM | ||||
| L:	devel@driverdev.osuosl.org | ||||
| S:	Obsolete | ||||
| F:	drivers/staging/uwb/ | ||||
| 
 | ||||
| UNICODE SUBSYSTEM: | ||||
| UNICODE SUBSYSTEM | ||||
| M:	Gabriel Krisman Bertazi <krisman@collabora.com> | ||||
| L:	linux-fsdevel@vger.kernel.org | ||||
| S:	Supported | ||||
| F:	fs/unicode/ | ||||
| 
 | ||||
| UNICORE32 ARCHITECTURE: | ||||
| UNICORE32 ARCHITECTURE | ||||
| M:	Guan Xuetao <gxt@pku.edu.cn> | ||||
| W:	http://mprc.pku.edu.cn/~guanxuetao/linux | ||||
| S:	Maintained | ||||
| @ -17392,11 +17391,14 @@ F:	drivers/usb/ | ||||
| F:	include/linux/usb.h | ||||
| F:	include/linux/usb/ | ||||
| 
 | ||||
| USB TYPEC PI3USB30532 MUX DRIVER | ||||
| M:	Hans de Goede <hdegoede@redhat.com> | ||||
| USB TYPEC BUS FOR ALTERNATE MODES | ||||
| M:	Heikki Krogerus <heikki.krogerus@linux.intel.com> | ||||
| L:	linux-usb@vger.kernel.org | ||||
| S:	Maintained | ||||
| F:	drivers/usb/typec/mux/pi3usb30532.c | ||||
| F:	Documentation/ABI/testing/sysfs-bus-typec | ||||
| F:	Documentation/driver-api/usb/typec_bus.rst | ||||
| F:	drivers/usb/typec/altmodes/ | ||||
| F:	include/linux/usb/typec_altmode.h | ||||
| 
 | ||||
| USB TYPEC CLASS | ||||
| M:	Heikki Krogerus <heikki.krogerus@linux.intel.com> | ||||
| @ -17407,14 +17409,11 @@ F:	Documentation/driver-api/usb/typec.rst | ||||
| F:	drivers/usb/typec/ | ||||
| F:	include/linux/usb/typec.h | ||||
| 
 | ||||
| USB TYPEC BUS FOR ALTERNATE MODES | ||||
| M:	Heikki Krogerus <heikki.krogerus@linux.intel.com> | ||||
| USB TYPEC PI3USB30532 MUX DRIVER | ||||
| M:	Hans de Goede <hdegoede@redhat.com> | ||||
| L:	linux-usb@vger.kernel.org | ||||
| S:	Maintained | ||||
| F:	Documentation/ABI/testing/sysfs-bus-typec | ||||
| F:	Documentation/driver-api/usb/typec_bus.rst | ||||
| F:	drivers/usb/typec/altmodes/ | ||||
| F:	include/linux/usb/typec_altmode.h | ||||
| F:	drivers/usb/typec/mux/pi3usb30532.c | ||||
| 
 | ||||
| USB TYPEC PORT CONTROLLER DRIVERS | ||||
| M:	Guenter Roeck <linux@roeck-us.net> | ||||
| @ -17791,7 +17790,7 @@ F:	include/linux/vbox_utils.h | ||||
| F:	include/uapi/linux/vbox*.h | ||||
| F:	drivers/virt/vboxguest/ | ||||
| 
 | ||||
| VIRTUAL BOX SHARED FOLDER VFS DRIVER: | ||||
| VIRTUAL BOX SHARED FOLDER VFS DRIVER | ||||
| M:	Hans de Goede <hdegoede@redhat.com> | ||||
| L:	linux-fsdevel@vger.kernel.org | ||||
| S:	Maintained | ||||
| @ -18414,8 +18413,8 @@ M:	Nandor Han <nandor.han@ge.com> | ||||
| M:	Semi Malinen <semi.malinen@ge.com> | ||||
| L:	linux-gpio@vger.kernel.org | ||||
| S:	Maintained | ||||
| F:	drivers/gpio/gpio-xra1403.c | ||||
| F:	Documentation/devicetree/bindings/gpio/gpio-xra1403.txt | ||||
| F:	drivers/gpio/gpio-xra1403.c | ||||
| 
 | ||||
| XTENSA XTFPGA PLATFORM SUPPORT | ||||
| M:	Max Filippov <jcmvbkbc@gmail.com> | ||||
|  | ||||
							
								
								
									
										6
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								Makefile
									
									
									
									
									
								
							| @ -2,7 +2,7 @@ | ||||
| VERSION = 5 | ||||
| PATCHLEVEL = 6 | ||||
| SUBLEVEL = 0 | ||||
| EXTRAVERSION = -rc1 | ||||
| EXTRAVERSION = -rc4 | ||||
| NAME = Kleptomaniac Octopus | ||||
| 
 | ||||
| # *DOCUMENTATION*
 | ||||
| @ -68,6 +68,7 @@ unexport GREP_OPTIONS | ||||
| #
 | ||||
| # If KBUILD_VERBOSE equals 0 then the above command will be hidden.
 | ||||
| # If KBUILD_VERBOSE equals 1 then the above command is displayed.
 | ||||
| # If KBUILD_VERBOSE equals 2 then give the reason why each target is rebuilt.
 | ||||
| #
 | ||||
| # To put more focus on warnings, be less verbose as default
 | ||||
| # Use 'make V=1' to see the full commands
 | ||||
| @ -1238,7 +1239,7 @@ ifneq ($(dtstree),) | ||||
| %.dtb: include/config/kernel.release scripts_dtc | ||||
| 	$(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ | ||||
| 
 | ||||
| PHONY += dtbs dtbs_install dt_binding_check | ||||
| PHONY += dtbs dtbs_install dtbs_check | ||||
| dtbs dtbs_check: include/config/kernel.release scripts_dtc | ||||
| 	$(Q)$(MAKE) $(build)=$(dtstree) | ||||
| 
 | ||||
| @ -1258,6 +1259,7 @@ PHONY += scripts_dtc | ||||
| scripts_dtc: scripts_basic | ||||
| 	$(Q)$(MAKE) $(build)=scripts/dtc | ||||
| 
 | ||||
| PHONY += dt_binding_check | ||||
| dt_binding_check: scripts_dtc | ||||
| 	$(Q)$(MAKE) $(build)=Documentation/devicetree/bindings | ||||
| 
 | ||||
|  | ||||
| @ -178,9 +178,6 @@ | ||||
| 			phy-mode = "rgmii"; | ||||
| 			pinctrl-0 = <&pinctrl_rgmii1 &pinctrl_rgmii1_mdio_1>; | ||||
| 
 | ||||
| 			snps,phy-bus-name = "stmmac"; | ||||
| 			snps,phy-bus-id = <0>; | ||||
| 			snps,phy-addr = <0>; | ||||
| 			snps,reset-gpio = <&pio0 7 0>; | ||||
| 			snps,reset-active-low; | ||||
| 			snps,reset-delays-us = <0 10000 1000000>; | ||||
|  | ||||
| @ -46,7 +46,7 @@ | ||||
| 			/* DAC */ | ||||
| 			format = "i2s"; | ||||
| 			mclk-fs = <256>; | ||||
| 			frame-inversion = <1>; | ||||
| 			frame-inversion; | ||||
| 			cpu { | ||||
| 				sound-dai = <&sti_uni_player2>; | ||||
| 			}; | ||||
|  | ||||
| @ -11,8 +11,6 @@ CONFIG_SLAB=y | ||||
| CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_PXA=y | ||||
| CONFIG_ARCH_GUMSTIX=y | ||||
| CONFIG_PCCARD=y | ||||
|  | ||||
| @ -25,7 +25,6 @@ CONFIG_EMBEDDED=y | ||||
| CONFIG_PROFILING=y | ||||
| CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| CONFIG_ARCH_AXXIA=y | ||||
| CONFIG_GPIO_PCA953X=y | ||||
| CONFIG_ARM_LPAE=y | ||||
|  | ||||
| @ -7,7 +7,6 @@ CONFIG_EMBEDDED=y | ||||
| CONFIG_SLOB=y | ||||
| CONFIG_JUMP_LABEL=y | ||||
| CONFIG_PARTITION_ADVANCED=y | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_CLPS711X=y | ||||
| CONFIG_ARCH_AUTCPU12=y | ||||
| CONFIG_ARCH_CDB89712=y | ||||
|  | ||||
| @ -17,7 +17,7 @@ CONFIG_MODULE_UNLOAD=y | ||||
| CONFIG_MODULE_FORCE_UNLOAD=y | ||||
| CONFIG_MODVERSIONS=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| CONFIG_IOSCHED_CFQ=m | ||||
| CONFIG_IOSCHED_BFQ=m | ||||
| CONFIG_ARCH_MULTI_V6=y | ||||
| #CONFIG_ARCH_MULTI_V7 is not set | ||||
| CONFIG_ARCH_CNS3XXX=y | ||||
|  | ||||
| @ -43,7 +43,6 @@ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y | ||||
| CONFIG_USB_MON=y | ||||
| CONFIG_USB_STORAGE=y | ||||
| CONFIG_MMC=y | ||||
| # CONFIG_MMC_BLOCK_BOUNCE is not set | ||||
| CONFIG_MMC_PXA=y | ||||
| CONFIG_EXT3_FS=y | ||||
| CONFIG_NFS_FS=y | ||||
|  | ||||
| @ -7,8 +7,6 @@ CONFIG_EXPERT=y | ||||
| # CONFIG_BASE_FULL is not set | ||||
| # CONFIG_EPOLL is not set | ||||
| CONFIG_SLOB=y | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_SA1100=y | ||||
| CONFIG_SA1100_COLLIE=y | ||||
| CONFIG_PCCARD=y | ||||
|  | ||||
| @ -15,8 +15,6 @@ CONFIG_MODULE_UNLOAD=y | ||||
| CONFIG_MODULE_FORCE_UNLOAD=y | ||||
| CONFIG_MODVERSIONS=y | ||||
| CONFIG_PARTITION_ADVANCED=y | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_MULTIPLATFORM=y | ||||
| CONFIG_ARCH_MULTI_V7=n | ||||
| CONFIG_ARCH_MULTI_V5=y | ||||
|  | ||||
| @ -12,8 +12,6 @@ CONFIG_EMBEDDED=y | ||||
| # CONFIG_VM_EVENT_COUNTERS is not set | ||||
| # CONFIG_SLUB_DEBUG is not set | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| # CONFIG_MMU is not set | ||||
| CONFIG_ARM_SINGLE_ARMV7M=y | ||||
| CONFIG_ARCH_EFM32=y | ||||
|  | ||||
| @ -11,7 +11,6 @@ CONFIG_MODULE_UNLOAD=y | ||||
| CONFIG_MODULE_FORCE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| CONFIG_PARTITION_ADVANCED=y | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_EP93XX=y | ||||
| CONFIG_CRUNCH=y | ||||
| CONFIG_MACH_ADSSPHERE=y | ||||
|  | ||||
| @ -9,8 +9,6 @@ CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| CONFIG_MODULE_FORCE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_PXA=y | ||||
| CONFIG_ARCH_PXA_ESERIES=y | ||||
| # CONFIG_ARM_THUMB is not set | ||||
|  | ||||
| @ -14,7 +14,6 @@ CONFIG_MODULE_UNLOAD=y | ||||
| CONFIG_MODULE_FORCE_UNLOAD=y | ||||
| CONFIG_MODVERSIONS=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_PXA=y | ||||
| CONFIG_PXA_EZX=y | ||||
| CONFIG_NO_HZ=y | ||||
|  | ||||
| @ -5,8 +5,6 @@ CONFIG_LOG_BUF_SHIFT=14 | ||||
| CONFIG_BLK_DEV_INITRD=y | ||||
| CONFIG_MODULES=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_SA1100=y | ||||
| CONFIG_SA1100_H3600=y | ||||
| CONFIG_PCCARD=y | ||||
|  | ||||
| @ -10,7 +10,6 @@ CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| CONFIG_MODULE_FORCE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_PXA=y | ||||
| CONFIG_MACH_H5000=y | ||||
| CONFIG_AEABI=y | ||||
|  | ||||
| @ -13,7 +13,6 @@ CONFIG_MODULE_UNLOAD=y | ||||
| CONFIG_MODULE_FORCE_UNLOAD=y | ||||
| CONFIG_MODVERSIONS=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_PXA=y | ||||
| CONFIG_MACH_INTELMOTE2=y | ||||
| CONFIG_NO_HZ=y | ||||
|  | ||||
| @ -32,8 +32,6 @@ CONFIG_KPROBES=y | ||||
| CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_NET=y | ||||
| CONFIG_PACKET=y | ||||
| CONFIG_UNIX=y | ||||
|  | ||||
| @ -1,4 +1,3 @@ | ||||
| CONFIG_CROSS_COMPILE="arm-linux-gnueabihf-" | ||||
| CONFIG_HIGH_RES_TIMERS=y | ||||
| CONFIG_PREEMPT=y | ||||
| CONFIG_BLK_DEV_INITRD=y | ||||
| @ -28,10 +27,7 @@ CONFIG_FLASH_SIZE=0x00080000 | ||||
| CONFIG_ZBOOT_ROM_TEXT=0x0 | ||||
| CONFIG_ZBOOT_ROM_BSS=0x0 | ||||
| CONFIG_ARM_APPENDED_DTB=y | ||||
| # CONFIG_LBDAF is not set | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_BINFMT_FLAT=y | ||||
| CONFIG_BINFMT_ZFLAT=y | ||||
| CONFIG_BINFMT_SHARED_FLAT=y | ||||
|  | ||||
| @ -9,8 +9,6 @@ CONFIG_SLAB=y | ||||
| CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_PXA=y | ||||
| CONFIG_MACH_H4700=y | ||||
| CONFIG_MACH_MAGICIAN=y | ||||
|  | ||||
| @ -15,7 +15,6 @@ CONFIG_EMBEDDED=y | ||||
| # CONFIG_SLUB_DEBUG is not set | ||||
| # CONFIG_COMPAT_BRK is not set | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| CONFIG_ARCH_MULTI_V4=y | ||||
| # CONFIG_ARCH_MULTI_V7 is not set | ||||
| CONFIG_ARCH_MOXART=y | ||||
|  | ||||
| @ -25,8 +25,6 @@ CONFIG_MODULE_UNLOAD=y | ||||
| CONFIG_MODULE_FORCE_UNLOAD=y | ||||
| CONFIG_MODVERSIONS=y | ||||
| CONFIG_BLK_DEV_INTEGRITY=y | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_NET=y | ||||
| CONFIG_PACKET=y | ||||
| CONFIG_UNIX=y | ||||
|  | ||||
| @ -18,8 +18,6 @@ CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| CONFIG_MODULE_FORCE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_OMAP=y | ||||
| CONFIG_ARCH_OMAP1=y | ||||
| CONFIG_OMAP_RESET_CLOCKS=y | ||||
|  | ||||
| @ -7,8 +7,6 @@ CONFIG_SLAB=y | ||||
| CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_PXA=y | ||||
| CONFIG_ARCH_PXA_PALM=y | ||||
| # CONFIG_MACH_PALMTX is not set | ||||
|  | ||||
| @ -13,8 +13,6 @@ CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| CONFIG_MODULE_FORCE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_PXA=y | ||||
| CONFIG_MACH_PCM027=y | ||||
| CONFIG_MACH_PCM990_BASEBOARD=y | ||||
|  | ||||
| @ -6,8 +6,6 @@ CONFIG_EXPERT=y | ||||
| # CONFIG_HOTPLUG is not set | ||||
| # CONFIG_SHMEM is not set | ||||
| CONFIG_MODULES=y | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_SA1100=y | ||||
| CONFIG_SA1100_PLEB=y | ||||
| CONFIG_ZBOOT_ROM_TEXT=0x0 | ||||
|  | ||||
| @ -8,7 +8,6 @@ CONFIG_SLAB=y | ||||
| CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_MULTI_V6=y | ||||
| CONFIG_ARCH_REALVIEW=y | ||||
| CONFIG_MACH_REALVIEW_EB=y | ||||
|  | ||||
| @ -14,8 +14,6 @@ CONFIG_MODULE_FORCE_LOAD=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| CONFIG_MODULE_FORCE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_AT91=y | ||||
| CONFIG_SOC_SAMA5D2=y | ||||
| CONFIG_SOC_SAMA5D3=y | ||||
| @ -182,7 +180,6 @@ CONFIG_USB_GADGET=y | ||||
| CONFIG_USB_ATMEL_USBA=y | ||||
| CONFIG_USB_G_SERIAL=y | ||||
| CONFIG_MMC=y | ||||
| # CONFIG_MMC_BLOCK_BOUNCE is not set | ||||
| CONFIG_MMC_SDHCI=y | ||||
| CONFIG_MMC_SDHCI_PLTFM=y | ||||
| CONFIG_MMC_SDHCI_OF_AT91=y | ||||
|  | ||||
| @ -14,8 +14,6 @@ CONFIG_EMBEDDED=y | ||||
| # CONFIG_VM_EVENT_COUNTERS is not set | ||||
| # CONFIG_SLUB_DEBUG is not set | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| # CONFIG_MMU is not set | ||||
| CONFIG_ARCH_STM32=y | ||||
| CONFIG_CPU_V7M_NUM_IRQ=240 | ||||
|  | ||||
| @ -85,6 +85,7 @@ CONFIG_BATTERY_AXP20X=y | ||||
| CONFIG_AXP20X_POWER=y | ||||
| CONFIG_THERMAL=y | ||||
| CONFIG_CPU_THERMAL=y | ||||
| CONFIG_SUN8I_THERMAL=y | ||||
| CONFIG_WATCHDOG=y | ||||
| CONFIG_SUNXI_WATCHDOG=y | ||||
| CONFIG_MFD_AC100=y | ||||
|  | ||||
| @ -11,7 +11,6 @@ CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| CONFIG_PARTITION_ADVANCED=y | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| # CONFIG_ARCH_MULTI_V7 is not set | ||||
| CONFIG_ARCH_U300=y | ||||
| CONFIG_MACH_U300_SPIDUMMY=y | ||||
| @ -46,7 +45,6 @@ CONFIG_FB=y | ||||
| CONFIG_BACKLIGHT_CLASS_DEVICE=y | ||||
| # CONFIG_USB_SUPPORT is not set | ||||
| CONFIG_MMC=y | ||||
| # CONFIG_MMC_BLOCK_BOUNCE is not set | ||||
| CONFIG_MMC_ARMMMCI=y | ||||
| CONFIG_RTC_CLASS=y | ||||
| # CONFIG_RTC_HCTOSYS is not set | ||||
|  | ||||
| @ -15,8 +15,6 @@ CONFIG_OPROFILE=y | ||||
| CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_DEADLINE is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_VEXPRESS=y | ||||
| CONFIG_ARCH_VEXPRESS_DCSCB=y | ||||
| CONFIG_ARCH_VEXPRESS_TC2_PM=y | ||||
|  | ||||
| @ -9,7 +9,6 @@ CONFIG_SLAB=y | ||||
| CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_PXA=y | ||||
| CONFIG_ARCH_VIPER=y | ||||
| CONFIG_IWMMXT=y | ||||
|  | ||||
| @ -4,7 +4,6 @@ CONFIG_LOG_BUF_SHIFT=13 | ||||
| CONFIG_MODULES=y | ||||
| CONFIG_MODULE_UNLOAD=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_PXA=y | ||||
| CONFIG_MACH_ARCOM_ZEUS=y | ||||
| CONFIG_PCCARD=m | ||||
| @ -137,7 +136,6 @@ CONFIG_USB_MASS_STORAGE=m | ||||
| CONFIG_USB_G_SERIAL=m | ||||
| CONFIG_USB_G_PRINTER=m | ||||
| CONFIG_MMC=y | ||||
| # CONFIG_MMC_BLOCK_BOUNCE is not set | ||||
| CONFIG_MMC_PXA=y | ||||
| CONFIG_NEW_LEDS=y | ||||
| CONFIG_LEDS_CLASS=m | ||||
|  | ||||
| @ -16,7 +16,6 @@ CONFIG_EMBEDDED=y | ||||
| CONFIG_PERF_EVENTS=y | ||||
| CONFIG_SLAB=y | ||||
| # CONFIG_BLK_DEV_BSG is not set | ||||
| # CONFIG_IOSCHED_CFQ is not set | ||||
| CONFIG_ARCH_ZX=y | ||||
| CONFIG_SOC_ZX296702=y | ||||
| # CONFIG_SWP_EMULATE is not set | ||||
|  | ||||
| @ -392,9 +392,6 @@ static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {} | ||||
| static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} | ||||
| static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} | ||||
| 
 | ||||
| static inline void kvm_arm_vhe_guest_enter(void) {} | ||||
| static inline void kvm_arm_vhe_guest_exit(void) {} | ||||
| 
 | ||||
| #define KVM_BP_HARDEN_UNKNOWN		-1 | ||||
| #define KVM_BP_HARDEN_WA_NEEDED		0 | ||||
| #define KVM_BP_HARDEN_NOT_REQUIRED	1 | ||||
|  | ||||
| @ -11,7 +11,7 @@ config ARCH_NPCM7XX | ||||
| 	depends on ARCH_MULTI_V7 | ||||
| 	select PINCTRL_NPCM7XX | ||||
| 	select NPCM7XX_TIMER | ||||
| 	select ARCH_REQUIRE_GPIOLIB | ||||
| 	select GPIOLIB | ||||
| 	select CACHE_L2X0 | ||||
| 	select ARM_GIC | ||||
| 	select HAVE_ARM_TWD if SMP | ||||
|  | ||||
| @ -161,10 +161,10 @@ | ||||
| 		bus-range = <0x0 0x1>; | ||||
| 		reg = <0x0 0x40000000 0x0 0x10000000>; | ||||
| 		ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>; | ||||
| 		interrupt-map = <0 0 0 1 &gic GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, | ||||
| 				<0 0 0 2 &gic GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, | ||||
| 				<0 0 0 3 &gic GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>, | ||||
| 				<0 0 0 4 &gic GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>; | ||||
| 		interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, | ||||
| 				<0 0 0 2 &gic 0 0 GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, | ||||
| 				<0 0 0 3 &gic 0 0 GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>, | ||||
| 				<0 0 0 4 &gic 0 0 GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>; | ||||
| 		interrupt-map-mask = <0x0 0x0 0x0 0x7>; | ||||
| 		msi-map = <0x0 &its 0x0 0x10000>; | ||||
| 		iommu-map = <0x0 &smmu 0x0 0x10000>; | ||||
|  | ||||
| @ -452,6 +452,7 @@ CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y | ||||
| CONFIG_CPU_THERMAL=y | ||||
| CONFIG_THERMAL_EMULATION=y | ||||
| CONFIG_QORIQ_THERMAL=m | ||||
| CONFIG_SUN8I_THERMAL=y | ||||
| CONFIG_ROCKCHIP_THERMAL=m | ||||
| CONFIG_RCAR_THERMAL=y | ||||
| CONFIG_RCAR_GEN3_THERMAL=y | ||||
| @ -547,6 +548,7 @@ CONFIG_ROCKCHIP_DW_MIPI_DSI=y | ||||
| CONFIG_ROCKCHIP_INNO_HDMI=y | ||||
| CONFIG_DRM_RCAR_DU=m | ||||
| CONFIG_DRM_SUN4I=m | ||||
| CONFIG_DRM_SUN6I_DSI=m | ||||
| CONFIG_DRM_SUN8I_DW_HDMI=m | ||||
| CONFIG_DRM_SUN8I_MIXER=m | ||||
| CONFIG_DRM_MSM=m | ||||
| @ -681,7 +683,7 @@ CONFIG_RTC_DRV_SNVS=m | ||||
| CONFIG_RTC_DRV_IMX_SC=m | ||||
| CONFIG_RTC_DRV_XGENE=y | ||||
| CONFIG_DMADEVICES=y | ||||
| CONFIG_DMA_BCM2835=m | ||||
| CONFIG_DMA_BCM2835=y | ||||
| CONFIG_DMA_SUN6I=m | ||||
| CONFIG_FSL_EDMA=y | ||||
| CONFIG_IMX_SDMA=y | ||||
|  | ||||
| @ -32,7 +32,7 @@ static inline void gic_write_eoir(u32 irq) | ||||
| 	isb(); | ||||
| } | ||||
| 
 | ||||
| static inline void gic_write_dir(u32 irq) | ||||
| static __always_inline void gic_write_dir(u32 irq) | ||||
| { | ||||
| 	write_sysreg_s(irq, SYS_ICC_DIR_EL1); | ||||
| 	isb(); | ||||
|  | ||||
| @ -69,7 +69,7 @@ static inline int icache_is_aliasing(void) | ||||
| 	return test_bit(ICACHEF_ALIASING, &__icache_flags); | ||||
| } | ||||
| 
 | ||||
| static inline int icache_is_vpipt(void) | ||||
| static __always_inline int icache_is_vpipt(void) | ||||
| { | ||||
| 	return test_bit(ICACHEF_VPIPT, &__icache_flags); | ||||
| } | ||||
|  | ||||
| @ -145,7 +145,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, | ||||
| #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 | ||||
| extern void flush_dcache_page(struct page *); | ||||
| 
 | ||||
| static inline void __flush_icache_all(void) | ||||
| static __always_inline void __flush_icache_all(void) | ||||
| { | ||||
| 	if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) | ||||
| 		return; | ||||
|  | ||||
| @ -435,13 +435,13 @@ cpuid_feature_extract_signed_field(u64 features, int field) | ||||
| 	return cpuid_feature_extract_signed_field_width(features, field, 4); | ||||
| } | ||||
| 
 | ||||
| static inline unsigned int __attribute_const__ | ||||
| static __always_inline unsigned int __attribute_const__ | ||||
| cpuid_feature_extract_unsigned_field_width(u64 features, int field, int width) | ||||
| { | ||||
| 	return (u64)(features << (64 - width - field)) >> (64 - width); | ||||
| } | ||||
| 
 | ||||
| static inline unsigned int __attribute_const__ | ||||
| static __always_inline unsigned int __attribute_const__ | ||||
| cpuid_feature_extract_unsigned_field(u64 features, int field) | ||||
| { | ||||
| 	return cpuid_feature_extract_unsigned_field_width(features, field, 4); | ||||
| @ -564,7 +564,7 @@ static inline bool system_supports_mixed_endian(void) | ||||
| 	return val == 0x1; | ||||
| } | ||||
| 
 | ||||
| static inline bool system_supports_fpsimd(void) | ||||
| static __always_inline bool system_supports_fpsimd(void) | ||||
| { | ||||
| 	return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD); | ||||
| } | ||||
| @ -575,13 +575,13 @@ static inline bool system_uses_ttbr0_pan(void) | ||||
| 		!cpus_have_const_cap(ARM64_HAS_PAN); | ||||
| } | ||||
| 
 | ||||
| static inline bool system_supports_sve(void) | ||||
| static __always_inline bool system_supports_sve(void) | ||||
| { | ||||
| 	return IS_ENABLED(CONFIG_ARM64_SVE) && | ||||
| 		cpus_have_const_cap(ARM64_SVE); | ||||
| } | ||||
| 
 | ||||
| static inline bool system_supports_cnp(void) | ||||
| static __always_inline bool system_supports_cnp(void) | ||||
| { | ||||
| 	return IS_ENABLED(CONFIG_ARM64_CNP) && | ||||
| 		cpus_have_const_cap(ARM64_HAS_CNP); | ||||
|  | ||||
| @ -33,7 +33,6 @@ static inline u32 disr_to_esr(u64 disr) | ||||
| 
 | ||||
| asmlinkage void enter_from_user_mode(void); | ||||
| void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); | ||||
| void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); | ||||
| void do_undefinstr(struct pt_regs *regs); | ||||
| asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr); | ||||
| void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, | ||||
| @ -47,7 +46,4 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); | ||||
| void do_cp15instr(unsigned int esr, struct pt_regs *regs); | ||||
| void do_el0_svc(struct pt_regs *regs); | ||||
| void do_el0_svc_compat(struct pt_regs *regs); | ||||
| void do_el0_ia_bp_hardening(unsigned long addr,  unsigned int esr, | ||||
| 			    struct pt_regs *regs); | ||||
| 
 | ||||
| #endif	/* __ASM_EXCEPTION_H */ | ||||
|  | ||||
Some files were not shown because too many files have changed in this diff Show More
		Loading…
	
		Reference in New Issue
	
	Block a user