s390 updates for 5.20 merge window
- Rework copy_oldmem_page() callback to take an iov_iter. This includes few prerequisite updates and fixes to the oldmem reading code. - Rework cpufeature implementation to allow for various CPU feature indications, which is not only limited to hardware capabilities, but also allows CPU facilities. - Use the cpufeature rework to autoload Ultravisor module when CPU facility 158 is available. - Add ELF note type for encrypted CPU state of a protected virtual CPU. The zgetdump tool from s390-tools package will decrypt the CPU state using a Customer Communication Key and overwrite respective notes to make the data accessible for crash and other debugging tools. - Use vzalloc() instead of vmalloc() + memset() in ChaCha20 crypto test. - Fix incorrect recovery of kretprobe modified return address in stacktrace. - Switch the NMI handler to use generic irqentry_nmi_enter() and irqentry_nmi_exit() helper functions. - Rework the cryptographic Adjunct Processors (AP) pass-through design to support dynamic changes to the AP matrix of a running guest as well as to implement more of the AP architecture. - Minor boot code cleanups. - Grammar and typo fixes to hmcdrv and tape drivers. -----BEGIN PGP SIGNATURE----- iI0EABYIADUWIQQrtrZiYVkVzKQcYivNdxKlNrRb8AUCYu4dRBccYWdvcmRlZXZA bGludXguaWJtLmNvbQAKCRDNdxKlNrRb8DnlAP45Sk4cE35T+Z0vdHE2f0uMXE/p uHNjS3fDZOQVFJ2jZwEA99xPF5qPCttbR/b1VHsMSb30684IT1A4PC7y05kgfAw= =jCc3 -----END PGP SIGNATURE----- Merge tag 's390-5.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux Pull s390 updates from Alexander Gordeev: - Rework copy_oldmem_page() callback to take an iov_iter. This includes a few prerequisite updates and fixes to the oldmem reading code. - Rework cpufeature implementation to allow for various CPU feature indications, which is not only limited to hardware capabilities, but also allows CPU facilities. - Use the cpufeature rework to autoload Ultravisor module when CPU facility 158 is available. - Add ELF note type for encrypted CPU state of a protected virtual CPU. The zgetdump tool from s390-tools package will decrypt the CPU state using a Customer Communication Key and overwrite respective notes to make the data accessible for crash and other debugging tools. - Use vzalloc() instead of vmalloc() + memset() in ChaCha20 crypto test. - Fix incorrect recovery of kretprobe modified return address in stacktrace. - Switch the NMI handler to use generic irqentry_nmi_enter() and irqentry_nmi_exit() helper functions. - Rework the cryptographic Adjunct Processors (AP) pass-through design to support dynamic changes to the AP matrix of a running guest as well as to implement more of the AP architecture. - Minor boot code cleanups. - Grammar and typo fixes to hmcdrv and tape drivers. * tag 's390-5.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (46 commits) Revert "s390/smp: enforce lowcore protection on CPU restart" Revert "s390/smp: rework absolute lowcore access" Revert "s390/smp,ptdump: add absolute lowcore markers" s390/unwind: fix fgraph return address recovery s390/nmi: use irqentry_nmi_enter()/irqentry_nmi_exit() s390: add ELF note type for encrypted CPU state of a PV VCPU s390/smp,ptdump: add absolute lowcore markers s390/smp: rework absolute lowcore access s390/setup: rearrange absolute lowcore initialization s390/boot: cleanup adjust_to_uv_max() function s390/smp: enforce lowcore protection on CPU restart s390/tape: fix comment typo s390/hmcdrv: fix Kconfig "its" grammar s390/docs: fix warnings for vfio_ap driver doc s390/docs: fix warnings for vfio_ap driver lock usage doc s390/crash: support multi-segment iterators s390/crash: use static swap buffer for copy_to_user_real() s390/crash: move copy_to_user_real() to crash_dump.c s390/zcore: fix race when reading from hardware system area s390/crash: fix incorrect number of bytes to copy to user space ...
This commit is contained in:
commit
24cb958695
@ -12,6 +12,7 @@ s390 Architecture
|
|||||||
qeth
|
qeth
|
||||||
s390dbf
|
s390dbf
|
||||||
vfio-ap
|
vfio-ap
|
||||||
|
vfio-ap-locking
|
||||||
vfio-ccw
|
vfio-ccw
|
||||||
zfcpdump
|
zfcpdump
|
||||||
common_io
|
common_io
|
||||||
|
115
Documentation/s390/vfio-ap-locking.rst
Normal file
115
Documentation/s390/vfio-ap-locking.rst
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
======================
|
||||||
|
VFIO AP Locks Overview
|
||||||
|
======================
|
||||||
|
This document describes the locks that are pertinent to the secure operation
|
||||||
|
of the vfio_ap device driver. Throughout this document, the following variables
|
||||||
|
will be used to denote instances of the structures herein described:
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
struct ap_matrix_dev *matrix_dev;
|
||||||
|
struct ap_matrix_mdev *matrix_mdev;
|
||||||
|
struct kvm *kvm;
|
||||||
|
|
||||||
|
The Matrix Devices Lock (drivers/s390/crypto/vfio_ap_private.h)
|
||||||
|
---------------------------------------------------------------
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
struct ap_matrix_dev {
|
||||||
|
...
|
||||||
|
struct list_head mdev_list;
|
||||||
|
struct mutex mdevs_lock;
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
The Matrix Devices Lock (matrix_dev->mdevs_lock) is implemented as a global
|
||||||
|
mutex contained within the single object of struct ap_matrix_dev. This lock
|
||||||
|
controls access to all fields contained within each matrix_mdev
|
||||||
|
(matrix_dev->mdev_list). This lock must be held while reading from, writing to
|
||||||
|
or using the data from a field contained within a matrix_mdev instance
|
||||||
|
representing one of the vfio_ap device driver's mediated devices.
|
||||||
|
|
||||||
|
The KVM Lock (include/linux/kvm_host.h)
|
||||||
|
---------------------------------------
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
struct kvm {
|
||||||
|
...
|
||||||
|
struct mutex lock;
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
The KVM Lock (kvm->lock) controls access to the state data for a KVM guest. This
|
||||||
|
lock must be held by the vfio_ap device driver while one or more AP adapters,
|
||||||
|
domains or control domains are being plugged into or unplugged from the guest.
|
||||||
|
|
||||||
|
The KVM pointer is stored in the in the matrix_mdev instance
|
||||||
|
(matrix_mdev->kvm = kvm) containing the state of the mediated device that has
|
||||||
|
been attached to the KVM guest.
|
||||||
|
|
||||||
|
The Guests Lock (drivers/s390/crypto/vfio_ap_private.h)
|
||||||
|
-----------------------------------------------------------
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
struct ap_matrix_dev {
|
||||||
|
...
|
||||||
|
struct list_head mdev_list;
|
||||||
|
struct mutex guests_lock;
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
The Guests Lock (matrix_dev->guests_lock) controls access to the
|
||||||
|
matrix_mdev instances (matrix_dev->mdev_list) that represent mediated devices
|
||||||
|
that hold the state for the mediated devices that have been attached to a
|
||||||
|
KVM guest. This lock must be held:
|
||||||
|
|
||||||
|
1. To control access to the KVM pointer (matrix_mdev->kvm) while the vfio_ap
|
||||||
|
device driver is using it to plug/unplug AP devices passed through to the KVM
|
||||||
|
guest.
|
||||||
|
|
||||||
|
2. To add matrix_mdev instances to or remove them from matrix_dev->mdev_list.
|
||||||
|
This is necessary to ensure the proper locking order when the list is perused
|
||||||
|
to find an ap_matrix_mdev instance for the purpose of plugging/unplugging
|
||||||
|
AP devices passed through to a KVM guest.
|
||||||
|
|
||||||
|
For example, when a queue device is removed from the vfio_ap device driver,
|
||||||
|
if the adapter is passed through to a KVM guest, it will have to be
|
||||||
|
unplugged. In order to figure out whether the adapter is passed through,
|
||||||
|
the matrix_mdev object to which the queue is assigned will have to be
|
||||||
|
found. The KVM pointer (matrix_mdev->kvm) can then be used to determine if
|
||||||
|
the mediated device is passed through (matrix_mdev->kvm != NULL) and if so,
|
||||||
|
to unplug the adapter.
|
||||||
|
|
||||||
|
It is not necessary to take the Guests Lock to access the KVM pointer if the
|
||||||
|
pointer is not used to plug/unplug devices passed through to the KVM guest;
|
||||||
|
however, in this case, the Matrix Devices Lock (matrix_dev->mdevs_lock) must be
|
||||||
|
held in order to access the KVM pointer since it is set and cleared under the
|
||||||
|
protection of the Matrix Devices Lock. A case in point is the function that
|
||||||
|
handles interception of the PQAP(AQIC) instruction sub-function. This handler
|
||||||
|
needs to access the KVM pointer only for the purposes of setting or clearing IRQ
|
||||||
|
resources, so only the matrix_dev->mdevs_lock needs to be held.
|
||||||
|
|
||||||
|
The PQAP Hook Lock (arch/s390/include/asm/kvm_host.h)
|
||||||
|
-----------------------------------------------------
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
|
struct kvm_s390_crypto {
|
||||||
|
...
|
||||||
|
struct rw_semaphore pqap_hook_rwsem;
|
||||||
|
crypto_hook *pqap_hook;
|
||||||
|
...
|
||||||
|
};
|
||||||
|
|
||||||
|
The PQAP Hook Lock is a r/w semaphore that controls access to the function
|
||||||
|
pointer of the handler ``(*kvm->arch.crypto.pqap_hook)`` to invoke when the
|
||||||
|
PQAP(AQIC) instruction sub-function is intercepted by the host. The lock must be
|
||||||
|
held in write mode when pqap_hook value is set, and in read mode when the
|
||||||
|
pqap_hook function is called.
|
@ -123,27 +123,24 @@ Let's now take a look at how AP instructions executed on a guest are interpreted
|
|||||||
by the hardware.
|
by the hardware.
|
||||||
|
|
||||||
A satellite control block called the Crypto Control Block (CRYCB) is attached to
|
A satellite control block called the Crypto Control Block (CRYCB) is attached to
|
||||||
our main hardware virtualization control block. The CRYCB contains three fields
|
our main hardware virtualization control block. The CRYCB contains an AP Control
|
||||||
to identify the adapters, usage domains and control domains assigned to the KVM
|
Block (APCB) that has three fields to identify the adapters, usage domains and
|
||||||
guest:
|
control domains assigned to the KVM guest:
|
||||||
|
|
||||||
* The AP Mask (APM) field is a bit mask that identifies the AP adapters assigned
|
* The AP Mask (APM) field is a bit mask that identifies the AP adapters assigned
|
||||||
to the KVM guest. Each bit in the mask, from left to right (i.e. from most
|
to the KVM guest. Each bit in the mask, from left to right, corresponds to
|
||||||
significant to least significant bit in big endian order), corresponds to
|
|
||||||
an APID from 0-255. If a bit is set, the corresponding adapter is valid for
|
an APID from 0-255. If a bit is set, the corresponding adapter is valid for
|
||||||
use by the KVM guest.
|
use by the KVM guest.
|
||||||
|
|
||||||
* The AP Queue Mask (AQM) field is a bit mask identifying the AP usage domains
|
* The AP Queue Mask (AQM) field is a bit mask identifying the AP usage domains
|
||||||
assigned to the KVM guest. Each bit in the mask, from left to right (i.e. from
|
assigned to the KVM guest. Each bit in the mask, from left to right,
|
||||||
most significant to least significant bit in big endian order), corresponds to
|
corresponds to an AP queue index (APQI) from 0-255. If a bit is set, the
|
||||||
an AP queue index (APQI) from 0-255. If a bit is set, the corresponding queue
|
corresponding queue is valid for use by the KVM guest.
|
||||||
is valid for use by the KVM guest.
|
|
||||||
|
|
||||||
* The AP Domain Mask field is a bit mask that identifies the AP control domains
|
* The AP Domain Mask field is a bit mask that identifies the AP control domains
|
||||||
assigned to the KVM guest. The ADM bit mask controls which domains can be
|
assigned to the KVM guest. The ADM bit mask controls which domains can be
|
||||||
changed by an AP command-request message sent to a usage domain from the
|
changed by an AP command-request message sent to a usage domain from the
|
||||||
guest. Each bit in the mask, from left to right (i.e. from most significant to
|
guest. Each bit in the mask, from left to right, corresponds to a domain from
|
||||||
least significant bit in big endian order), corresponds to a domain from
|
|
||||||
0-255. If a bit is set, the corresponding domain can be modified by an AP
|
0-255. If a bit is set, the corresponding domain can be modified by an AP
|
||||||
command-request message sent to a usage domain.
|
command-request message sent to a usage domain.
|
||||||
|
|
||||||
@ -151,10 +148,10 @@ If you recall from the description of an AP Queue, AP instructions include
|
|||||||
an APQN to identify the AP queue to which an AP command-request message is to be
|
an APQN to identify the AP queue to which an AP command-request message is to be
|
||||||
sent (NQAP and PQAP instructions), or from which a command-reply message is to
|
sent (NQAP and PQAP instructions), or from which a command-reply message is to
|
||||||
be received (DQAP instruction). The validity of an APQN is defined by the matrix
|
be received (DQAP instruction). The validity of an APQN is defined by the matrix
|
||||||
calculated from the APM and AQM; it is the cross product of all assigned adapter
|
calculated from the APM and AQM; it is the Cartesian product of all assigned
|
||||||
numbers (APM) with all assigned queue indexes (AQM). For example, if adapters 1
|
adapter numbers (APM) with all assigned queue indexes (AQM). For example, if
|
||||||
and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs (1,5), (1,6),
|
adapters 1 and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs
|
||||||
(2,5) and (2,6) will be valid for the guest.
|
(1,5), (1,6), (2,5) and (2,6) will be valid for the guest.
|
||||||
|
|
||||||
The APQNs can provide secure key functionality - i.e., a private key is stored
|
The APQNs can provide secure key functionality - i.e., a private key is stored
|
||||||
on the adapter card for each of its domains - so each APQN must be assigned to
|
on the adapter card for each of its domains - so each APQN must be assigned to
|
||||||
@ -192,7 +189,7 @@ The design introduces three new objects:
|
|||||||
|
|
||||||
1. AP matrix device
|
1. AP matrix device
|
||||||
2. VFIO AP device driver (vfio_ap.ko)
|
2. VFIO AP device driver (vfio_ap.ko)
|
||||||
3. VFIO AP mediated matrix pass-through device
|
3. VFIO AP mediated pass-through device
|
||||||
|
|
||||||
The VFIO AP device driver
|
The VFIO AP device driver
|
||||||
-------------------------
|
-------------------------
|
||||||
@ -200,12 +197,13 @@ The VFIO AP (vfio_ap) device driver serves the following purposes:
|
|||||||
|
|
||||||
1. Provides the interfaces to secure APQNs for exclusive use of KVM guests.
|
1. Provides the interfaces to secure APQNs for exclusive use of KVM guests.
|
||||||
|
|
||||||
2. Sets up the VFIO mediated device interfaces to manage a mediated matrix
|
2. Sets up the VFIO mediated device interfaces to manage a vfio_ap mediated
|
||||||
device and creates the sysfs interfaces for assigning adapters, usage
|
device and creates the sysfs interfaces for assigning adapters, usage
|
||||||
domains, and control domains comprising the matrix for a KVM guest.
|
domains, and control domains comprising the matrix for a KVM guest.
|
||||||
|
|
||||||
3. Configures the APM, AQM and ADM in the CRYCB referenced by a KVM guest's
|
3. Configures the APM, AQM and ADM in the APCB contained in the CRYCB referenced
|
||||||
SIE state description to grant the guest access to a matrix of AP devices
|
by a KVM guest's SIE state description to grant the guest access to a matrix
|
||||||
|
of AP devices
|
||||||
|
|
||||||
Reserve APQNs for exclusive use of KVM guests
|
Reserve APQNs for exclusive use of KVM guests
|
||||||
---------------------------------------------
|
---------------------------------------------
|
||||||
@ -235,10 +233,10 @@ reserved::
|
|||||||
| | 8 probe | |
|
| | 8 probe | |
|
||||||
+--------^---------+ +--^--^------------+
|
+--------^---------+ +--^--^------------+
|
||||||
6 edit | | |
|
6 edit | | |
|
||||||
apmask | +-----------------------------+ | 9 mdev create
|
apmask | +-----------------------------+ | 11 mdev create
|
||||||
aqmask | | 1 modprobe |
|
aqmask | | 1 modprobe |
|
||||||
+--------+-----+---+ +----------------+-+ +----------------+
|
+--------+-----+---+ +----------------+-+ +----------------+
|
||||||
| | | |8 create | mediated |
|
| | | |10 create| mediated |
|
||||||
| admin | | VFIO device core |---------> matrix |
|
| admin | | VFIO device core |---------> matrix |
|
||||||
| + | | | device |
|
| + | | | device |
|
||||||
+------+-+---------+ +--------^---------+ +--------^-------+
|
+------+-+---------+ +--------^---------+ +--------^-------+
|
||||||
@ -246,14 +244,14 @@ reserved::
|
|||||||
| | 9 create vfio_ap-passthrough | |
|
| | 9 create vfio_ap-passthrough | |
|
||||||
| +------------------------------+ |
|
| +------------------------------+ |
|
||||||
+-------------------------------------------------------------+
|
+-------------------------------------------------------------+
|
||||||
10 assign adapter/domain/control domain
|
12 assign adapter/domain/control domain
|
||||||
|
|
||||||
The process for reserving an AP queue for use by a KVM guest is:
|
The process for reserving an AP queue for use by a KVM guest is:
|
||||||
|
|
||||||
1. The administrator loads the vfio_ap device driver
|
1. The administrator loads the vfio_ap device driver
|
||||||
2. The vfio-ap driver during its initialization will register a single 'matrix'
|
2. The vfio-ap driver during its initialization will register a single 'matrix'
|
||||||
device with the device core. This will serve as the parent device for
|
device with the device core. This will serve as the parent device for
|
||||||
all mediated matrix devices used to configure an AP matrix for a guest.
|
all vfio_ap mediated devices used to configure an AP matrix for a guest.
|
||||||
3. The /sys/devices/vfio_ap/matrix device is created by the device core
|
3. The /sys/devices/vfio_ap/matrix device is created by the device core
|
||||||
4. The vfio_ap device driver will register with the AP bus for AP queue devices
|
4. The vfio_ap device driver will register with the AP bus for AP queue devices
|
||||||
of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap
|
of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap
|
||||||
@ -269,24 +267,24 @@ The process for reserving an AP queue for use by a KVM guest is:
|
|||||||
default zcrypt cex4queue driver.
|
default zcrypt cex4queue driver.
|
||||||
8. The AP bus probes the vfio_ap device driver to bind the queues reserved for
|
8. The AP bus probes the vfio_ap device driver to bind the queues reserved for
|
||||||
it.
|
it.
|
||||||
9. The administrator creates a passthrough type mediated matrix device to be
|
9. The administrator creates a passthrough type vfio_ap mediated device to be
|
||||||
used by a guest
|
used by a guest
|
||||||
10. The administrator assigns the adapters, usage domains and control domains
|
10. The administrator assigns the adapters, usage domains and control domains
|
||||||
to be exclusively used by a guest.
|
to be exclusively used by a guest.
|
||||||
|
|
||||||
Set up the VFIO mediated device interfaces
|
Set up the VFIO mediated device interfaces
|
||||||
------------------------------------------
|
------------------------------------------
|
||||||
The VFIO AP device driver utilizes the common interface of the VFIO mediated
|
The VFIO AP device driver utilizes the common interfaces of the VFIO mediated
|
||||||
device core driver to:
|
device core driver to:
|
||||||
|
|
||||||
* Register an AP mediated bus driver to add a mediated matrix device to and
|
* Register an AP mediated bus driver to add a vfio_ap mediated device to and
|
||||||
remove it from a VFIO group.
|
remove it from a VFIO group.
|
||||||
* Create and destroy a mediated matrix device
|
* Create and destroy a vfio_ap mediated device
|
||||||
* Add a mediated matrix device to and remove it from the AP mediated bus driver
|
* Add a vfio_ap mediated device to and remove it from the AP mediated bus driver
|
||||||
* Add a mediated matrix device to and remove it from an IOMMU group
|
* Add a vfio_ap mediated device to and remove it from an IOMMU group
|
||||||
|
|
||||||
The following high-level block diagram shows the main components and interfaces
|
The following high-level block diagram shows the main components and interfaces
|
||||||
of the VFIO AP mediated matrix device driver::
|
of the VFIO AP mediated device driver::
|
||||||
|
|
||||||
+-------------+
|
+-------------+
|
||||||
| |
|
| |
|
||||||
@ -343,7 +341,7 @@ matrix device.
|
|||||||
* device_api:
|
* device_api:
|
||||||
the mediated device type's API
|
the mediated device type's API
|
||||||
* available_instances:
|
* available_instances:
|
||||||
the number of mediated matrix passthrough devices
|
the number of vfio_ap mediated passthrough devices
|
||||||
that can be created
|
that can be created
|
||||||
* device_api:
|
* device_api:
|
||||||
specifies the VFIO API
|
specifies the VFIO API
|
||||||
@ -351,29 +349,37 @@ matrix device.
|
|||||||
This attribute group identifies the user-defined sysfs attributes of the
|
This attribute group identifies the user-defined sysfs attributes of the
|
||||||
mediated device. When a device is registered with the VFIO mediated device
|
mediated device. When a device is registered with the VFIO mediated device
|
||||||
framework, the sysfs attribute files identified in the 'mdev_attr_groups'
|
framework, the sysfs attribute files identified in the 'mdev_attr_groups'
|
||||||
structure will be created in the mediated matrix device's directory. The
|
structure will be created in the vfio_ap mediated device's directory. The
|
||||||
sysfs attributes for a mediated matrix device are:
|
sysfs attributes for a vfio_ap mediated device are:
|
||||||
|
|
||||||
assign_adapter / unassign_adapter:
|
assign_adapter / unassign_adapter:
|
||||||
Write-only attributes for assigning/unassigning an AP adapter to/from the
|
Write-only attributes for assigning/unassigning an AP adapter to/from the
|
||||||
mediated matrix device. To assign/unassign an adapter, the APID of the
|
vfio_ap mediated device. To assign/unassign an adapter, the APID of the
|
||||||
adapter is echoed to the respective attribute file.
|
adapter is echoed into the respective attribute file.
|
||||||
assign_domain / unassign_domain:
|
assign_domain / unassign_domain:
|
||||||
Write-only attributes for assigning/unassigning an AP usage domain to/from
|
Write-only attributes for assigning/unassigning an AP usage domain to/from
|
||||||
the mediated matrix device. To assign/unassign a domain, the domain
|
the vfio_ap mediated device. To assign/unassign a domain, the domain
|
||||||
number of the usage domain is echoed to the respective attribute
|
number of the usage domain is echoed into the respective attribute
|
||||||
file.
|
file.
|
||||||
matrix:
|
matrix:
|
||||||
A read-only file for displaying the APQNs derived from the cross product
|
A read-only file for displaying the APQNs derived from the Cartesian
|
||||||
of the adapter and domain numbers assigned to the mediated matrix device.
|
product of the adapter and domain numbers assigned to the vfio_ap mediated
|
||||||
|
device.
|
||||||
|
guest_matrix:
|
||||||
|
A read-only file for displaying the APQNs derived from the Cartesian
|
||||||
|
product of the adapter and domain numbers assigned to the APM and AQM
|
||||||
|
fields respectively of the KVM guest's CRYCB. This may differ from the
|
||||||
|
the APQNs assigned to the vfio_ap mediated device if any APQN does not
|
||||||
|
reference a queue device bound to the vfio_ap device driver (i.e., the
|
||||||
|
queue is not in the host's AP configuration).
|
||||||
assign_control_domain / unassign_control_domain:
|
assign_control_domain / unassign_control_domain:
|
||||||
Write-only attributes for assigning/unassigning an AP control domain
|
Write-only attributes for assigning/unassigning an AP control domain
|
||||||
to/from the mediated matrix device. To assign/unassign a control domain,
|
to/from the vfio_ap mediated device. To assign/unassign a control domain,
|
||||||
the ID of the domain to be assigned/unassigned is echoed to the respective
|
the ID of the domain to be assigned/unassigned is echoed into the
|
||||||
attribute file.
|
respective attribute file.
|
||||||
control_domains:
|
control_domains:
|
||||||
A read-only file for displaying the control domain numbers assigned to the
|
A read-only file for displaying the control domain numbers assigned to the
|
||||||
mediated matrix device.
|
vfio_ap mediated device.
|
||||||
|
|
||||||
* functions:
|
* functions:
|
||||||
|
|
||||||
@ -383,45 +389,75 @@ matrix device.
|
|||||||
* Store the reference to the KVM structure for the guest using the mdev
|
* Store the reference to the KVM structure for the guest using the mdev
|
||||||
* Store the AP matrix configuration for the adapters, domains, and control
|
* Store the AP matrix configuration for the adapters, domains, and control
|
||||||
domains assigned via the corresponding sysfs attributes files
|
domains assigned via the corresponding sysfs attributes files
|
||||||
|
* Store the AP matrix configuration for the adapters, domains and control
|
||||||
|
domains available to a guest. A guest may not be provided access to APQNs
|
||||||
|
referencing queue devices that do not exist, or are not bound to the
|
||||||
|
vfio_ap device driver.
|
||||||
|
|
||||||
remove:
|
remove:
|
||||||
deallocates the mediated matrix device's ap_matrix_mdev structure. This will
|
deallocates the vfio_ap mediated device's ap_matrix_mdev structure.
|
||||||
be allowed only if a running guest is not using the mdev.
|
This will be allowed only if a running guest is not using the mdev.
|
||||||
|
|
||||||
* callback interfaces
|
* callback interfaces
|
||||||
|
|
||||||
open:
|
open_device:
|
||||||
The vfio_ap driver uses this callback to register a
|
The vfio_ap driver uses this callback to register a
|
||||||
VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix
|
VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the matrix mdev
|
||||||
device. The open is invoked when QEMU connects the VFIO iommu group
|
devices. The open_device callback is invoked by userspace to connect the
|
||||||
for the mdev matrix device to the MDEV bus. Access to the KVM structure used
|
VFIO iommu group for the matrix mdev device to the MDEV bus. Access to the
|
||||||
to configure the KVM guest is provided via this callback. The KVM structure,
|
KVM structure used to configure the KVM guest is provided via this callback.
|
||||||
is used to configure the guest's access to the AP matrix defined via the
|
The KVM structure, is used to configure the guest's access to the AP matrix
|
||||||
mediated matrix device's sysfs attribute files.
|
defined via the vfio_ap mediated device's sysfs attribute files.
|
||||||
release:
|
|
||||||
unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the
|
|
||||||
mdev matrix device and deconfigures the guest's AP matrix.
|
|
||||||
|
|
||||||
Configure the APM, AQM and ADM in the CRYCB
|
close_device:
|
||||||
-------------------------------------------
|
unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the
|
||||||
Configuring the AP matrix for a KVM guest will be performed when the
|
matrix mdev device and deconfigures the guest's AP matrix.
|
||||||
|
|
||||||
|
ioctl:
|
||||||
|
this callback handles the VFIO_DEVICE_GET_INFO and VFIO_DEVICE_RESET ioctls
|
||||||
|
defined by the vfio framework.
|
||||||
|
|
||||||
|
Configure the guest's AP resources
|
||||||
|
----------------------------------
|
||||||
|
Configuring the AP resources for a KVM guest will be performed when the
|
||||||
VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier
|
VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier
|
||||||
function is called when QEMU connects to KVM. The guest's AP matrix is
|
function is called when userspace connects to KVM. The guest's AP resources are
|
||||||
configured via it's CRYCB by:
|
configured via it's APCB by:
|
||||||
|
|
||||||
* Setting the bits in the APM corresponding to the APIDs assigned to the
|
* Setting the bits in the APM corresponding to the APIDs assigned to the
|
||||||
mediated matrix device via its 'assign_adapter' interface.
|
vfio_ap mediated device via its 'assign_adapter' interface.
|
||||||
* Setting the bits in the AQM corresponding to the domains assigned to the
|
* Setting the bits in the AQM corresponding to the domains assigned to the
|
||||||
mediated matrix device via its 'assign_domain' interface.
|
vfio_ap mediated device via its 'assign_domain' interface.
|
||||||
* Setting the bits in the ADM corresponding to the domain dIDs assigned to the
|
* Setting the bits in the ADM corresponding to the domain dIDs assigned to the
|
||||||
mediated matrix device via its 'assign_control_domains' interface.
|
vfio_ap mediated device via its 'assign_control_domains' interface.
|
||||||
|
|
||||||
|
The linux device model precludes passing a device through to a KVM guest that
|
||||||
|
is not bound to the device driver facilitating its pass-through. Consequently,
|
||||||
|
an APQN that does not reference a queue device bound to the vfio_ap device
|
||||||
|
driver will not be assigned to a KVM guest's matrix. The AP architecture,
|
||||||
|
however, does not provide a means to filter individual APQNs from the guest's
|
||||||
|
matrix, so the adapters, domains and control domains assigned to vfio_ap
|
||||||
|
mediated device via its sysfs 'assign_adapter', 'assign_domain' and
|
||||||
|
'assign_control_domain' interfaces will be filtered before providing the AP
|
||||||
|
configuration to a guest:
|
||||||
|
|
||||||
|
* The APIDs of the adapters, the APQIs of the domains and the domain numbers of
|
||||||
|
the control domains assigned to the matrix mdev that are not also assigned to
|
||||||
|
the host's AP configuration will be filtered.
|
||||||
|
|
||||||
|
* Each APQN derived from the Cartesian product of the APIDs and APQIs assigned
|
||||||
|
to the vfio_ap mdev is examined and if any one of them does not reference a
|
||||||
|
queue device bound to the vfio_ap device driver, the adapter will not be
|
||||||
|
plugged into the guest (i.e., the bit corresponding to its APID will not be
|
||||||
|
set in the APM of the guest's APCB).
|
||||||
|
|
||||||
The CPU model features for AP
|
The CPU model features for AP
|
||||||
-----------------------------
|
-----------------------------
|
||||||
The AP stack relies on the presence of the AP instructions as well as two
|
The AP stack relies on the presence of the AP instructions as well as three
|
||||||
facilities: The AP Facilities Test (APFT) facility; and the AP Query
|
facilities: The AP Facilities Test (APFT) facility; the AP Query
|
||||||
Configuration Information (QCI) facility. These features/facilities are made
|
Configuration Information (QCI) facility; and the AP Queue Interruption Control
|
||||||
available to a KVM guest via the following CPU model features:
|
facility. These features/facilities are made available to a KVM guest via the
|
||||||
|
following CPU model features:
|
||||||
|
|
||||||
1. ap: Indicates whether the AP instructions are installed on the guest. This
|
1. ap: Indicates whether the AP instructions are installed on the guest. This
|
||||||
feature will be enabled by KVM only if the AP instructions are installed
|
feature will be enabled by KVM only if the AP instructions are installed
|
||||||
@ -435,24 +471,28 @@ available to a KVM guest via the following CPU model features:
|
|||||||
can be made available to the guest only if it is available on the host (i.e.,
|
can be made available to the guest only if it is available on the host (i.e.,
|
||||||
facility bit 12 is set).
|
facility bit 12 is set).
|
||||||
|
|
||||||
|
4. apqi: Indicates AP Queue Interruption Control faclity is available on the
|
||||||
|
guest. This facility can be made available to the guest only if it is
|
||||||
|
available on the host (i.e., facility bit 65 is set).
|
||||||
|
|
||||||
Note: If the user chooses to specify a CPU model different than the 'host'
|
Note: If the user chooses to specify a CPU model different than the 'host'
|
||||||
model to QEMU, the CPU model features and facilities need to be turned on
|
model to QEMU, the CPU model features and facilities need to be turned on
|
||||||
explicitly; for example::
|
explicitly; for example::
|
||||||
|
|
||||||
/usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on
|
/usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on,apqi=on
|
||||||
|
|
||||||
A guest can be precluded from using AP features/facilities by turning them off
|
A guest can be precluded from using AP features/facilities by turning them off
|
||||||
explicitly; for example::
|
explicitly; for example::
|
||||||
|
|
||||||
/usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off
|
/usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off,apqi=off
|
||||||
|
|
||||||
Note: If the APFT facility is turned off (apft=off) for the guest, the guest
|
Note: If the APFT facility is turned off (apft=off) for the guest, the guest
|
||||||
will not see any AP devices. The zcrypt device drivers that register for type 10
|
will not see any AP devices. The zcrypt device drivers on the guest that
|
||||||
and newer AP devices - i.e., the cex4card and cex4queue device drivers - need
|
register for type 10 and newer AP devices - i.e., the cex4card and cex4queue
|
||||||
the APFT facility to ascertain the facilities installed on a given AP device. If
|
device drivers - need the APFT facility to ascertain the facilities installed on
|
||||||
the APFT facility is not installed on the guest, then the probe of device
|
a given AP device. If the APFT facility is not installed on the guest, then no
|
||||||
drivers will fail since only type 10 and newer devices can be configured for
|
adapter or domain devices will get created by the AP bus running on the
|
||||||
guest use.
|
guest because only type 10 and newer devices can be configured for guest use.
|
||||||
|
|
||||||
Example
|
Example
|
||||||
=======
|
=======
|
||||||
@ -471,7 +511,7 @@ CARD.DOMAIN TYPE MODE
|
|||||||
05.00ab CEX5C CCA-Coproc
|
05.00ab CEX5C CCA-Coproc
|
||||||
06 CEX5A Accelerator
|
06 CEX5A Accelerator
|
||||||
06.0004 CEX5A Accelerator
|
06.0004 CEX5A Accelerator
|
||||||
06.00ab CEX5C CCA-Coproc
|
06.00ab CEX5A Accelerator
|
||||||
=========== ===== ============
|
=========== ===== ============
|
||||||
|
|
||||||
Guest2
|
Guest2
|
||||||
@ -479,9 +519,9 @@ Guest2
|
|||||||
=========== ===== ============
|
=========== ===== ============
|
||||||
CARD.DOMAIN TYPE MODE
|
CARD.DOMAIN TYPE MODE
|
||||||
=========== ===== ============
|
=========== ===== ============
|
||||||
05 CEX5A Accelerator
|
05 CEX5C CCA-Coproc
|
||||||
05.0047 CEX5A Accelerator
|
05.0047 CEX5C CCA-Coproc
|
||||||
05.00ff CEX5A Accelerator
|
05.00ff CEX5C CCA-Coproc
|
||||||
=========== ===== ============
|
=========== ===== ============
|
||||||
|
|
||||||
Guest3
|
Guest3
|
||||||
@ -529,40 +569,56 @@ These are the steps:
|
|||||||
|
|
||||||
2. Secure the AP queues to be used by the three guests so that the host can not
|
2. Secure the AP queues to be used by the three guests so that the host can not
|
||||||
access them. To secure them, there are two sysfs files that specify
|
access them. To secure them, there are two sysfs files that specify
|
||||||
bitmasks marking a subset of the APQN range as 'usable by the default AP
|
bitmasks marking a subset of the APQN range as usable only by the default AP
|
||||||
queue device drivers' or 'not usable by the default device drivers' and thus
|
queue device drivers. All remaining APQNs are available for use by
|
||||||
available for use by the vfio_ap device driver'. The location of the sysfs
|
any other device driver. The vfio_ap device driver is currently the only
|
||||||
files containing the masks are::
|
non-default device driver. The location of the sysfs files containing the
|
||||||
|
masks are::
|
||||||
|
|
||||||
/sys/bus/ap/apmask
|
/sys/bus/ap/apmask
|
||||||
/sys/bus/ap/aqmask
|
/sys/bus/ap/aqmask
|
||||||
|
|
||||||
The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
|
The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
|
||||||
(APID). Each bit in the mask, from left to right (i.e., from most significant
|
(APID). Each bit in the mask, from left to right, corresponds to an APID from
|
||||||
to least significant bit in big endian order), corresponds to an APID from
|
0-255. If a bit is set, the APID belongs to the subset of APQNs marked as
|
||||||
0-255. If a bit is set, the APID is marked as usable only by the default AP
|
available only to the default AP queue device drivers.
|
||||||
queue device drivers; otherwise, the APID is usable by the vfio_ap
|
|
||||||
device driver.
|
|
||||||
|
|
||||||
The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes
|
The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes
|
||||||
(APQI). Each bit in the mask, from left to right (i.e., from most significant
|
(APQI). Each bit in the mask, from left to right, corresponds to an APQI from
|
||||||
to least significant bit in big endian order), corresponds to an APQI from
|
0-255. If a bit is set, the APQI belongs to the subset of APQNs marked as
|
||||||
0-255. If a bit is set, the APQI is marked as usable only by the default AP
|
available only to the default AP queue device drivers.
|
||||||
queue device drivers; otherwise, the APQI is usable by the vfio_ap device
|
|
||||||
driver.
|
|
||||||
|
|
||||||
Take, for example, the following mask::
|
The Cartesian product of the APIDs corresponding to the bits set in the
|
||||||
|
apmask and the APQIs corresponding to the bits set in the aqmask comprise
|
||||||
|
the subset of APQNs that can be used only by the host default device drivers.
|
||||||
|
All other APQNs are available to the non-default device drivers such as the
|
||||||
|
vfio_ap driver.
|
||||||
|
|
||||||
0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
|
Take, for example, the following masks::
|
||||||
|
|
||||||
It indicates:
|
apmask:
|
||||||
|
0x7d00000000000000000000000000000000000000000000000000000000000000
|
||||||
|
|
||||||
1, 2, 3, 4, 5, and 7-255 belong to the default drivers' pool, and 0 and 6
|
aqmask:
|
||||||
belong to the vfio_ap device driver's pool.
|
0x8000000000000000000000000000000000000000000000000000000000000000
|
||||||
|
|
||||||
|
The masks indicate:
|
||||||
|
|
||||||
|
* Adapters 1, 2, 3, 4, 5, and 7 are available for use by the host default
|
||||||
|
device drivers.
|
||||||
|
|
||||||
|
* Domain 0 is available for use by the host default device drivers
|
||||||
|
|
||||||
|
* The subset of APQNs available for use only by the default host device
|
||||||
|
drivers are:
|
||||||
|
|
||||||
|
(1,0), (2,0), (3,0), (4.0), (5,0) and (7,0)
|
||||||
|
|
||||||
|
* All other APQNs are available for use by the non-default device drivers.
|
||||||
|
|
||||||
The APQN of each AP queue device assigned to the linux host is checked by the
|
The APQN of each AP queue device assigned to the linux host is checked by the
|
||||||
AP bus against the set of APQNs derived from the cross product of APIDs
|
AP bus against the set of APQNs derived from the Cartesian product of APIDs
|
||||||
and APQIs marked as usable only by the default AP queue device drivers. If a
|
and APQIs marked as available to the default AP queue device drivers. If a
|
||||||
match is detected, only the default AP queue device drivers will be probed;
|
match is detected, only the default AP queue device drivers will be probed;
|
||||||
otherwise, the vfio_ap device driver will be probed.
|
otherwise, the vfio_ap device driver will be probed.
|
||||||
|
|
||||||
@ -579,8 +635,7 @@ These are the steps:
|
|||||||
|
|
||||||
0x4100000000000000000000000000000000000000000000000000000000000000
|
0x4100000000000000000000000000000000000000000000000000000000000000
|
||||||
|
|
||||||
Keep in mind that the mask reads from left to right (i.e., most
|
Keep in mind that the mask reads from left to right, so the mask
|
||||||
significant to least significant bit in big endian order), so the mask
|
|
||||||
above identifies device numbers 1 and 7 (01000001).
|
above identifies device numbers 1 and 7 (01000001).
|
||||||
|
|
||||||
If the string is longer than the mask, the operation is terminated with
|
If the string is longer than the mask, the operation is terminated with
|
||||||
@ -626,11 +681,22 @@ These are the steps:
|
|||||||
default drivers pool: adapter 0-15, domain 1
|
default drivers pool: adapter 0-15, domain 1
|
||||||
alternate drivers pool: adapter 16-255, domains 0, 2-255
|
alternate drivers pool: adapter 16-255, domains 0, 2-255
|
||||||
|
|
||||||
|
**Note:**
|
||||||
|
Changing a mask such that one or more APQNs will be taken from a vfio_ap
|
||||||
|
mediated device (see below) will fail with an error (EBUSY). A message
|
||||||
|
is logged to the kernel ring buffer which can be viewed with the 'dmesg'
|
||||||
|
command. The output identifies each APQN flagged as 'in use' and identifies
|
||||||
|
the vfio_ap mediated device to which it is assigned; for example:
|
||||||
|
|
||||||
|
Userspace may not re-assign queue 05.0054 already assigned to 62177883-f1bb-47f0-914d-32a22e3a8804
|
||||||
|
Userspace may not re-assign queue 04.0054 already assigned to cef03c3c-903d-4ecc-9a83-40694cb8aee4
|
||||||
|
|
||||||
Securing the APQNs for our example
|
Securing the APQNs for our example
|
||||||
----------------------------------
|
----------------------------------
|
||||||
To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047,
|
To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047,
|
||||||
06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding
|
06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding
|
||||||
APQNs can either be removed from the default masks::
|
APQNs can be removed from the default masks using either of the following
|
||||||
|
commands::
|
||||||
|
|
||||||
echo -5,-6 > /sys/bus/ap/apmask
|
echo -5,-6 > /sys/bus/ap/apmask
|
||||||
|
|
||||||
@ -683,7 +749,7 @@ Securing the APQNs for our example
|
|||||||
|
|
||||||
/sys/devices/vfio_ap/matrix/
|
/sys/devices/vfio_ap/matrix/
|
||||||
--- [mdev_supported_types]
|
--- [mdev_supported_types]
|
||||||
------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
|
------ [vfio_ap-passthrough] (passthrough vfio_ap mediated device type)
|
||||||
--------- create
|
--------- create
|
||||||
--------- [devices]
|
--------- [devices]
|
||||||
|
|
||||||
@ -734,6 +800,9 @@ Securing the APQNs for our example
|
|||||||
----------------unassign_control_domain
|
----------------unassign_control_domain
|
||||||
----------------unassign_domain
|
----------------unassign_domain
|
||||||
|
|
||||||
|
Note *****: The vfio_ap mdevs do not persist across reboots unless the
|
||||||
|
mdevctl tool is used to create and persist them.
|
||||||
|
|
||||||
4. The administrator now needs to configure the matrixes for the mediated
|
4. The administrator now needs to configure the matrixes for the mediated
|
||||||
devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
|
devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
|
||||||
|
|
||||||
@ -755,6 +824,10 @@ Securing the APQNs for our example
|
|||||||
|
|
||||||
cat matrix
|
cat matrix
|
||||||
|
|
||||||
|
To display the matrix that is or will be assigned to Guest1::
|
||||||
|
|
||||||
|
cat guest_matrix
|
||||||
|
|
||||||
This is how the matrix is configured for Guest2::
|
This is how the matrix is configured for Guest2::
|
||||||
|
|
||||||
echo 5 > assign_adapter
|
echo 5 > assign_adapter
|
||||||
@ -774,17 +847,24 @@ Securing the APQNs for our example
|
|||||||
higher than the maximum is specified, the operation will terminate with
|
higher than the maximum is specified, the operation will terminate with
|
||||||
an error (ENODEV).
|
an error (ENODEV).
|
||||||
|
|
||||||
* All APQNs that can be derived from the adapter ID and the IDs of
|
Note: The maximum adapter number can be obtained via the sysfs
|
||||||
the previously assigned domains must be bound to the vfio_ap device
|
/sys/bus/ap/ap_max_adapter_id attribute file.
|
||||||
driver. If no domains have yet been assigned, then there must be at least
|
|
||||||
one APQN with the specified APID bound to the vfio_ap driver. If no such
|
|
||||||
APQNs are bound to the driver, the operation will terminate with an
|
|
||||||
error (EADDRNOTAVAIL).
|
|
||||||
|
|
||||||
No APQN that can be derived from the adapter ID and the IDs of the
|
* Each APQN derived from the Cartesian product of the APID of the adapter
|
||||||
previously assigned domains can be assigned to another mediated matrix
|
being assigned and the APQIs of the domains previously assigned:
|
||||||
device. If an APQN is assigned to another mediated matrix device, the
|
|
||||||
operation will terminate with an error (EADDRINUSE).
|
- Must only be available to the vfio_ap device driver as specified in the
|
||||||
|
sysfs /sys/bus/ap/apmask and /sys/bus/ap/aqmask attribute files. If even
|
||||||
|
one APQN is reserved for use by the host device driver, the operation
|
||||||
|
will terminate with an error (EADDRNOTAVAIL).
|
||||||
|
|
||||||
|
- Must NOT be assigned to another vfio_ap mediated device. If even one APQN
|
||||||
|
is assigned to another vfio_ap mediated device, the operation will
|
||||||
|
terminate with an error (EBUSY).
|
||||||
|
|
||||||
|
- Must NOT be assigned while the sysfs /sys/bus/ap/apmask and
|
||||||
|
sys/bus/ap/aqmask attribute files are being edited or the operation may
|
||||||
|
terminate with an error (EBUSY).
|
||||||
|
|
||||||
In order to successfully assign a domain:
|
In order to successfully assign a domain:
|
||||||
|
|
||||||
@ -793,41 +873,50 @@ Securing the APQNs for our example
|
|||||||
higher than the maximum is specified, the operation will terminate with
|
higher than the maximum is specified, the operation will terminate with
|
||||||
an error (ENODEV).
|
an error (ENODEV).
|
||||||
|
|
||||||
* All APQNs that can be derived from the domain ID and the IDs of
|
Note: The maximum domain number can be obtained via the sysfs
|
||||||
the previously assigned adapters must be bound to the vfio_ap device
|
/sys/bus/ap/ap_max_domain_id attribute file.
|
||||||
driver. If no domains have yet been assigned, then there must be at least
|
|
||||||
one APQN with the specified APQI bound to the vfio_ap driver. If no such
|
|
||||||
APQNs are bound to the driver, the operation will terminate with an
|
|
||||||
error (EADDRNOTAVAIL).
|
|
||||||
|
|
||||||
No APQN that can be derived from the domain ID and the IDs of the
|
* Each APQN derived from the Cartesian product of the APQI of the domain
|
||||||
previously assigned adapters can be assigned to another mediated matrix
|
being assigned and the APIDs of the adapters previously assigned:
|
||||||
device. If an APQN is assigned to another mediated matrix device, the
|
|
||||||
operation will terminate with an error (EADDRINUSE).
|
|
||||||
|
|
||||||
In order to successfully assign a control domain, the domain number
|
- Must only be available to the vfio_ap device driver as specified in the
|
||||||
specified must represent a value from 0 up to the maximum domain number
|
sysfs /sys/bus/ap/apmask and /sys/bus/ap/aqmask attribute files. If even
|
||||||
configured for the system. If a control domain number higher than the maximum
|
one APQN is reserved for use by the host device driver, the operation
|
||||||
is specified, the operation will terminate with an error (ENODEV).
|
will terminate with an error (EADDRNOTAVAIL).
|
||||||
|
|
||||||
|
- Must NOT be assigned to another vfio_ap mediated device. If even one APQN
|
||||||
|
is assigned to another vfio_ap mediated device, the operation will
|
||||||
|
terminate with an error (EBUSY).
|
||||||
|
|
||||||
|
- Must NOT be assigned while the sysfs /sys/bus/ap/apmask and
|
||||||
|
sys/bus/ap/aqmask attribute files are being edited or the operation may
|
||||||
|
terminate with an error (EBUSY).
|
||||||
|
|
||||||
|
In order to successfully assign a control domain:
|
||||||
|
|
||||||
|
* The domain number specified must represent a value from 0 up to the maximum
|
||||||
|
domain number configured for the system. If a control domain number higher
|
||||||
|
than the maximum is specified, the operation will terminate with an
|
||||||
|
error (ENODEV).
|
||||||
|
|
||||||
5. Start Guest1::
|
5. Start Guest1::
|
||||||
|
|
||||||
/usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
|
/usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on,apqi=on \
|
||||||
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
|
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
|
||||||
|
|
||||||
7. Start Guest2::
|
7. Start Guest2::
|
||||||
|
|
||||||
/usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
|
/usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on,apqi=on \
|
||||||
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
|
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
|
||||||
|
|
||||||
7. Start Guest3::
|
7. Start Guest3::
|
||||||
|
|
||||||
/usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
|
/usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on,apqi=on \
|
||||||
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
|
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
|
||||||
|
|
||||||
When the guest is shut down, the mediated matrix devices may be removed.
|
When the guest is shut down, the vfio_ap mediated devices may be removed.
|
||||||
|
|
||||||
Using our example again, to remove the mediated matrix device $uuid1::
|
Using our example again, to remove the vfio_ap mediated device $uuid1::
|
||||||
|
|
||||||
/sys/devices/vfio_ap/matrix/
|
/sys/devices/vfio_ap/matrix/
|
||||||
--- [mdev_supported_types]
|
--- [mdev_supported_types]
|
||||||
@ -840,26 +929,143 @@ Using our example again, to remove the mediated matrix device $uuid1::
|
|||||||
|
|
||||||
echo 1 > remove
|
echo 1 > remove
|
||||||
|
|
||||||
This will remove all of the mdev matrix device's sysfs structures including
|
This will remove all of the matrix mdev device's sysfs structures including
|
||||||
the mdev device itself. To recreate and reconfigure the mdev matrix device,
|
the mdev device itself. To recreate and reconfigure the matrix mdev device,
|
||||||
all of the steps starting with step 3 will have to be performed again. Note
|
all of the steps starting with step 3 will have to be performed again. Note
|
||||||
that the remove will fail if a guest using the mdev is still running.
|
that the remove will fail if a guest using the vfio_ap mdev is still running.
|
||||||
|
|
||||||
It is not necessary to remove an mdev matrix device, but one may want to
|
It is not necessary to remove a vfio_ap mdev, but one may want to
|
||||||
remove it if no guest will use it during the remaining lifetime of the linux
|
remove it if no guest will use it during the remaining lifetime of the linux
|
||||||
host. If the mdev matrix device is removed, one may want to also reconfigure
|
host. If the vfio_ap mdev is removed, one may want to also reconfigure
|
||||||
the pool of adapters and queues reserved for use by the default drivers.
|
the pool of adapters and queues reserved for use by the default drivers.
|
||||||
|
|
||||||
|
Hot plug/unplug support:
|
||||||
|
========================
|
||||||
|
An adapter, domain or control domain may be hot plugged into a running KVM
|
||||||
|
guest by assigning it to the vfio_ap mediated device being used by the guest if
|
||||||
|
the following conditions are met:
|
||||||
|
|
||||||
|
* The adapter, domain or control domain must also be assigned to the host's
|
||||||
|
AP configuration.
|
||||||
|
|
||||||
|
* Each APQN derived from the Cartesian product comprised of the APID of the
|
||||||
|
adapter being assigned and the APQIs of the domains assigned must reference a
|
||||||
|
queue device bound to the vfio_ap device driver.
|
||||||
|
|
||||||
|
* To hot plug a domain, each APQN derived from the Cartesian product
|
||||||
|
comprised of the APQI of the domain being assigned and the APIDs of the
|
||||||
|
adapters assigned must reference a queue device bound to the vfio_ap device
|
||||||
|
driver.
|
||||||
|
|
||||||
|
An adapter, domain or control domain may be hot unplugged from a running KVM
|
||||||
|
guest by unassigning it from the vfio_ap mediated device being used by the
|
||||||
|
guest.
|
||||||
|
|
||||||
|
Over-provisioning of AP queues for a KVM guest:
|
||||||
|
===============================================
|
||||||
|
Over-provisioning is defined herein as the assignment of adapters or domains to
|
||||||
|
a vfio_ap mediated device that do not reference AP devices in the host's AP
|
||||||
|
configuration. The idea here is that when the adapter or domain becomes
|
||||||
|
available, it will be automatically hot-plugged into the KVM guest using
|
||||||
|
the vfio_ap mediated device to which it is assigned as long as each new APQN
|
||||||
|
resulting from plugging it in references a queue device bound to the vfio_ap
|
||||||
|
device driver.
|
||||||
|
|
||||||
Limitations
|
Limitations
|
||||||
===========
|
===========
|
||||||
* The KVM/kernel interfaces do not provide a way to prevent restoring an APQN
|
Live guest migration is not supported for guests using AP devices without
|
||||||
to the default drivers pool of a queue that is still assigned to a mediated
|
intervention by a system administrator. Before a KVM guest can be migrated,
|
||||||
device in use by a guest. It is incumbent upon the administrator to
|
the vfio_ap mediated device must be removed. Unfortunately, it can not be
|
||||||
ensure there is no mediated device in use by a guest to which the APQN is
|
removed manually (i.e., echo 1 > /sys/devices/vfio_ap/matrix/$UUID/remove) while
|
||||||
assigned lest the host be given access to the private data of the AP queue
|
the mdev is in use by a KVM guest. If the guest is being emulated by QEMU,
|
||||||
device such as a private key configured specifically for the guest.
|
its mdev can be hot unplugged from the guest in one of two ways:
|
||||||
|
|
||||||
* Dynamically modifying the AP matrix for a running guest (which would amount to
|
1. If the KVM guest was started with libvirt, you can hot unplug the mdev via
|
||||||
hot(un)plug of AP devices for the guest) is currently not supported
|
the following commands:
|
||||||
|
|
||||||
* Live guest migration is not supported for guests using AP devices.
|
virsh detach-device <guestname> <path-to-device-xml>
|
||||||
|
|
||||||
|
For example, to hot unplug mdev 62177883-f1bb-47f0-914d-32a22e3a8804 from
|
||||||
|
the guest named 'my-guest':
|
||||||
|
|
||||||
|
virsh detach-device my-guest ~/config/my-guest-hostdev.xml
|
||||||
|
|
||||||
|
The contents of my-guest-hostdev.xml:
|
||||||
|
|
||||||
|
.. code-block:: xml
|
||||||
|
|
||||||
|
<hostdev mode='subsystem' type='mdev' managed='no' model='vfio-ap'>
|
||||||
|
<source>
|
||||||
|
<address uuid='62177883-f1bb-47f0-914d-32a22e3a8804'/>
|
||||||
|
</source>
|
||||||
|
</hostdev>
|
||||||
|
|
||||||
|
|
||||||
|
virsh qemu-monitor-command <guest-name> --hmp "device-del <device-id>"
|
||||||
|
|
||||||
|
For example, to hot unplug the vfio_ap mediated device identified on the
|
||||||
|
qemu command line with 'id=hostdev0' from the guest named 'my-guest':
|
||||||
|
|
||||||
|
.. code-block:: sh
|
||||||
|
|
||||||
|
virsh qemu-monitor-command my-guest --hmp "device_del hostdev0"
|
||||||
|
|
||||||
|
2. A vfio_ap mediated device can be hot unplugged by attaching the qemu monitor
|
||||||
|
to the guest and using the following qemu monitor command:
|
||||||
|
|
||||||
|
(QEMU) device-del id=<device-id>
|
||||||
|
|
||||||
|
For example, to hot unplug the vfio_ap mediated device that was specified
|
||||||
|
on the qemu command line with 'id=hostdev0' when the guest was started:
|
||||||
|
|
||||||
|
(QEMU) device-del id=hostdev0
|
||||||
|
|
||||||
|
After live migration of the KVM guest completes, an AP configuration can be
|
||||||
|
restored to the KVM guest by hot plugging a vfio_ap mediated device on the target
|
||||||
|
system into the guest in one of two ways:
|
||||||
|
|
||||||
|
1. If the KVM guest was started with libvirt, you can hot plug a matrix mediated
|
||||||
|
device into the guest via the following virsh commands:
|
||||||
|
|
||||||
|
virsh attach-device <guestname> <path-to-device-xml>
|
||||||
|
|
||||||
|
For example, to hot plug mdev 62177883-f1bb-47f0-914d-32a22e3a8804 into
|
||||||
|
the guest named 'my-guest':
|
||||||
|
|
||||||
|
virsh attach-device my-guest ~/config/my-guest-hostdev.xml
|
||||||
|
|
||||||
|
The contents of my-guest-hostdev.xml:
|
||||||
|
|
||||||
|
.. code-block:: xml
|
||||||
|
|
||||||
|
<hostdev mode='subsystem' type='mdev' managed='no' model='vfio-ap'>
|
||||||
|
<source>
|
||||||
|
<address uuid='62177883-f1bb-47f0-914d-32a22e3a8804'/>
|
||||||
|
</source>
|
||||||
|
</hostdev>
|
||||||
|
|
||||||
|
|
||||||
|
virsh qemu-monitor-command <guest-name> --hmp \
|
||||||
|
"device_add vfio-ap,sysfsdev=<path-to-mdev>,id=<device-id>"
|
||||||
|
|
||||||
|
For example, to hot plug the vfio_ap mediated device
|
||||||
|
62177883-f1bb-47f0-914d-32a22e3a8804 into the guest named 'my-guest' with
|
||||||
|
device-id hostdev0:
|
||||||
|
|
||||||
|
virsh qemu-monitor-command my-guest --hmp \
|
||||||
|
"device_add vfio-ap,\
|
||||||
|
sysfsdev=/sys/devices/vfio_ap/matrix/62177883-f1bb-47f0-914d-32a22e3a8804,\
|
||||||
|
id=hostdev0"
|
||||||
|
|
||||||
|
2. A vfio_ap mediated device can be hot plugged by attaching the qemu monitor
|
||||||
|
to the guest and using the following qemu monitor command:
|
||||||
|
|
||||||
|
(qemu) device_add "vfio-ap,sysfsdev=<path-to-mdev>,id=<device-id>"
|
||||||
|
|
||||||
|
For example, to plug the vfio_ap mediated device
|
||||||
|
62177883-f1bb-47f0-914d-32a22e3a8804 into the guest with the device-id
|
||||||
|
hostdev0:
|
||||||
|
|
||||||
|
(QEMU) device-add "vfio-ap,\
|
||||||
|
sysfsdev=/sys/devices/vfio_ap/matrix/62177883-f1bb-47f0-914d-32a22e3a8804,\
|
||||||
|
id=hostdev0"
|
||||||
|
@ -17808,7 +17808,7 @@ M: Jason Herne <jjherne@linux.ibm.com>
|
|||||||
L: linux-s390@vger.kernel.org
|
L: linux-s390@vger.kernel.org
|
||||||
S: Supported
|
S: Supported
|
||||||
W: http://www.ibm.com/developerworks/linux/linux390/
|
W: http://www.ibm.com/developerworks/linux/linux390/
|
||||||
F: Documentation/s390/vfio-ap.rst
|
F: Documentation/s390/vfio-ap*
|
||||||
F: drivers/s390/crypto/vfio_ap*
|
F: drivers/s390/crypto/vfio_ap*
|
||||||
|
|
||||||
S390 VFIO-CCW DRIVER
|
S390 VFIO-CCW DRIVER
|
||||||
|
@ -152,6 +152,7 @@ static void setup_kernel_memory_layout(void)
|
|||||||
unsigned long vmemmap_start;
|
unsigned long vmemmap_start;
|
||||||
unsigned long rte_size;
|
unsigned long rte_size;
|
||||||
unsigned long pages;
|
unsigned long pages;
|
||||||
|
unsigned long vmax;
|
||||||
|
|
||||||
pages = ident_map_size / PAGE_SIZE;
|
pages = ident_map_size / PAGE_SIZE;
|
||||||
/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
|
/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
|
||||||
@ -163,10 +164,10 @@ static void setup_kernel_memory_layout(void)
|
|||||||
vmalloc_size > _REGION2_SIZE ||
|
vmalloc_size > _REGION2_SIZE ||
|
||||||
vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
|
vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
|
||||||
_REGION2_SIZE) {
|
_REGION2_SIZE) {
|
||||||
MODULES_END = _REGION1_SIZE;
|
vmax = _REGION1_SIZE;
|
||||||
rte_size = _REGION2_SIZE;
|
rte_size = _REGION2_SIZE;
|
||||||
} else {
|
} else {
|
||||||
MODULES_END = _REGION2_SIZE;
|
vmax = _REGION2_SIZE;
|
||||||
rte_size = _REGION3_SIZE;
|
rte_size = _REGION3_SIZE;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
@ -174,11 +175,12 @@ static void setup_kernel_memory_layout(void)
|
|||||||
* secure storage limit, so that any vmalloc allocation
|
* secure storage limit, so that any vmalloc allocation
|
||||||
* we do could be used to back secure guest storage.
|
* we do could be used to back secure guest storage.
|
||||||
*/
|
*/
|
||||||
adjust_to_uv_max(&MODULES_END);
|
vmax = adjust_to_uv_max(vmax);
|
||||||
#ifdef CONFIG_KASAN
|
#ifdef CONFIG_KASAN
|
||||||
/* force vmalloc and modules below kasan shadow */
|
/* force vmalloc and modules below kasan shadow */
|
||||||
MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
|
vmax = min(vmax, KASAN_SHADOW_START);
|
||||||
#endif
|
#endif
|
||||||
|
MODULES_END = vmax;
|
||||||
MODULES_VADDR = MODULES_END - MODULES_LEN;
|
MODULES_VADDR = MODULES_END - MODULES_LEN;
|
||||||
VMALLOC_END = MODULES_VADDR;
|
VMALLOC_END = MODULES_VADDR;
|
||||||
|
|
||||||
|
@ -57,10 +57,11 @@ void uv_query_info(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_KVM)
|
#if IS_ENABLED(CONFIG_KVM)
|
||||||
void adjust_to_uv_max(unsigned long *vmax)
|
unsigned long adjust_to_uv_max(unsigned long limit)
|
||||||
{
|
{
|
||||||
if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
|
if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
|
||||||
*vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
|
limit = min_t(unsigned long, limit, uv_info.max_sec_stor_addr);
|
||||||
|
return limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int is_prot_virt_host_capable(void)
|
static int is_prot_virt_host_capable(void)
|
||||||
|
@ -3,10 +3,13 @@
|
|||||||
#define BOOT_UV_H
|
#define BOOT_UV_H
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_KVM)
|
#if IS_ENABLED(CONFIG_KVM)
|
||||||
void adjust_to_uv_max(unsigned long *vmax);
|
unsigned long adjust_to_uv_max(unsigned long limit);
|
||||||
void sanitize_prot_virt_host(void);
|
void sanitize_prot_virt_host(void);
|
||||||
#else
|
#else
|
||||||
static inline void adjust_to_uv_max(unsigned long *vmax) {}
|
static inline unsigned long adjust_to_uv_max(unsigned long limit)
|
||||||
|
{
|
||||||
|
return limit;
|
||||||
|
}
|
||||||
static inline void sanitize_prot_virt_host(void) {}
|
static inline void sanitize_prot_virt_host(void) {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1049,7 +1049,7 @@ out_err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(MSA, aes_s390_init);
|
module_cpu_feature_match(S390_CPU_FEATURE_MSA, aes_s390_init);
|
||||||
module_exit(aes_s390_fini);
|
module_exit(aes_s390_fini);
|
||||||
|
|
||||||
MODULE_ALIAS_CRYPTO("aes-all");
|
MODULE_ALIAS_CRYPTO("aes-all");
|
||||||
|
@ -121,7 +121,7 @@ static void __exit chacha_mod_fini(void)
|
|||||||
crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
|
crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(VXRS, chacha_mod_init);
|
module_cpu_feature_match(S390_CPU_FEATURE_VXRS, chacha_mod_init);
|
||||||
module_exit(chacha_mod_fini);
|
module_exit(chacha_mod_fini);
|
||||||
|
|
||||||
MODULE_DESCRIPTION("ChaCha20 stream cipher");
|
MODULE_DESCRIPTION("ChaCha20 stream cipher");
|
||||||
|
@ -298,7 +298,7 @@ static void __exit crc_vx_mod_exit(void)
|
|||||||
crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
|
crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(VXRS, crc_vx_mod_init);
|
module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init);
|
||||||
module_exit(crc_vx_mod_exit);
|
module_exit(crc_vx_mod_exit);
|
||||||
|
|
||||||
MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
|
MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
|
||||||
|
@ -492,7 +492,7 @@ out_err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(MSA, des_s390_init);
|
module_cpu_feature_match(S390_CPU_FEATURE_MSA, des_s390_init);
|
||||||
module_exit(des_s390_exit);
|
module_exit(des_s390_exit);
|
||||||
|
|
||||||
MODULE_ALIAS_CRYPTO("des");
|
MODULE_ALIAS_CRYPTO("des");
|
||||||
|
@ -145,7 +145,7 @@ static void __exit ghash_mod_exit(void)
|
|||||||
crypto_unregister_shash(&ghash_alg);
|
crypto_unregister_shash(&ghash_alg);
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(MSA, ghash_mod_init);
|
module_cpu_feature_match(S390_CPU_FEATURE_MSA, ghash_mod_init);
|
||||||
module_exit(ghash_mod_exit);
|
module_exit(ghash_mod_exit);
|
||||||
|
|
||||||
MODULE_ALIAS_CRYPTO("ghash");
|
MODULE_ALIAS_CRYPTO("ghash");
|
||||||
|
@ -907,5 +907,5 @@ static void __exit prng_exit(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(MSA, prng_init);
|
module_cpu_feature_match(S390_CPU_FEATURE_MSA, prng_init);
|
||||||
module_exit(prng_exit);
|
module_exit(prng_exit);
|
||||||
|
@ -95,7 +95,7 @@ static void __exit sha1_s390_fini(void)
|
|||||||
crypto_unregister_shash(&alg);
|
crypto_unregister_shash(&alg);
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(MSA, sha1_s390_init);
|
module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha1_s390_init);
|
||||||
module_exit(sha1_s390_fini);
|
module_exit(sha1_s390_fini);
|
||||||
|
|
||||||
MODULE_ALIAS_CRYPTO("sha1");
|
MODULE_ALIAS_CRYPTO("sha1");
|
||||||
|
@ -134,7 +134,7 @@ static void __exit sha256_s390_fini(void)
|
|||||||
crypto_unregister_shash(&sha256_alg);
|
crypto_unregister_shash(&sha256_alg);
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(MSA, sha256_s390_init);
|
module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha256_s390_init);
|
||||||
module_exit(sha256_s390_fini);
|
module_exit(sha256_s390_fini);
|
||||||
|
|
||||||
MODULE_ALIAS_CRYPTO("sha256");
|
MODULE_ALIAS_CRYPTO("sha256");
|
||||||
|
@ -137,7 +137,7 @@ static void __exit sha3_256_s390_fini(void)
|
|||||||
crypto_unregister_shash(&sha3_256_alg);
|
crypto_unregister_shash(&sha3_256_alg);
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(MSA, sha3_256_s390_init);
|
module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha3_256_s390_init);
|
||||||
module_exit(sha3_256_s390_fini);
|
module_exit(sha3_256_s390_fini);
|
||||||
|
|
||||||
MODULE_ALIAS_CRYPTO("sha3-256");
|
MODULE_ALIAS_CRYPTO("sha3-256");
|
||||||
|
@ -147,7 +147,7 @@ static void __exit fini(void)
|
|||||||
crypto_unregister_shash(&sha3_384_alg);
|
crypto_unregister_shash(&sha3_384_alg);
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(MSA, init);
|
module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
|
||||||
module_exit(fini);
|
module_exit(fini);
|
||||||
|
|
||||||
MODULE_LICENSE("GPL");
|
MODULE_LICENSE("GPL");
|
||||||
|
@ -142,7 +142,7 @@ static void __exit fini(void)
|
|||||||
crypto_unregister_shash(&sha384_alg);
|
crypto_unregister_shash(&sha384_alg);
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(MSA, init);
|
module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
|
||||||
module_exit(fini);
|
module_exit(fini);
|
||||||
|
|
||||||
MODULE_LICENSE("GPL");
|
MODULE_LICENSE("GPL");
|
||||||
|
@ -2,28 +2,21 @@
|
|||||||
/*
|
/*
|
||||||
* Module interface for CPU features
|
* Module interface for CPU features
|
||||||
*
|
*
|
||||||
* Copyright IBM Corp. 2015
|
* Copyright IBM Corp. 2015, 2022
|
||||||
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
|
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __ASM_S390_CPUFEATURE_H
|
#ifndef __ASM_S390_CPUFEATURE_H
|
||||||
#define __ASM_S390_CPUFEATURE_H
|
#define __ASM_S390_CPUFEATURE_H
|
||||||
|
|
||||||
#include <asm/elf.h>
|
enum {
|
||||||
|
S390_CPU_FEATURE_MSA,
|
||||||
|
S390_CPU_FEATURE_VXRS,
|
||||||
|
S390_CPU_FEATURE_UV,
|
||||||
|
MAX_CPU_FEATURES
|
||||||
|
};
|
||||||
|
|
||||||
/* Hardware features on Linux on z Systems are indicated by facility bits that
|
#define cpu_feature(feature) (feature)
|
||||||
* are mapped to the so-called machine flags. Particular machine flags are
|
|
||||||
* then used to define ELF hardware capabilities; most notably hardware flags
|
|
||||||
* that are essential for user space / glibc.
|
|
||||||
*
|
|
||||||
* Restrict the set of exposed CPU features to ELF hardware capabilities for
|
|
||||||
* now. Additional machine flags can be indicated by values larger than
|
|
||||||
* MAX_ELF_HWCAP_FEATURES.
|
|
||||||
*/
|
|
||||||
#define MAX_ELF_HWCAP_FEATURES (8 * sizeof(elf_hwcap))
|
|
||||||
#define MAX_CPU_FEATURES MAX_ELF_HWCAP_FEATURES
|
|
||||||
|
|
||||||
#define cpu_feature(feat) ilog2(HWCAP_ ## feat)
|
|
||||||
|
|
||||||
int cpu_have_feature(unsigned int nr);
|
int cpu_have_feature(unsigned int nr);
|
||||||
|
|
||||||
|
@ -42,18 +42,4 @@ typedef struct {
|
|||||||
.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
|
.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
|
||||||
.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
|
.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
|
||||||
|
|
||||||
static inline int tprot(unsigned long addr)
|
|
||||||
{
|
|
||||||
int rc = -EFAULT;
|
|
||||||
|
|
||||||
asm volatile(
|
|
||||||
" tprot 0(%1),0\n"
|
|
||||||
"0: ipm %0\n"
|
|
||||||
" srl %0,28\n"
|
|
||||||
"1:\n"
|
|
||||||
EX_TABLE(0b,1b)
|
|
||||||
: "+d" (rc) : "a" (addr) : "cc");
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -8,6 +8,8 @@
|
|||||||
#ifndef _ASM_S390_OS_INFO_H
|
#ifndef _ASM_S390_OS_INFO_H
|
||||||
#define _ASM_S390_OS_INFO_H
|
#define _ASM_S390_OS_INFO_H
|
||||||
|
|
||||||
|
#include <linux/uio.h>
|
||||||
|
|
||||||
#define OS_INFO_VERSION_MAJOR 1
|
#define OS_INFO_VERSION_MAJOR 1
|
||||||
#define OS_INFO_VERSION_MINOR 1
|
#define OS_INFO_VERSION_MINOR 1
|
||||||
#define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */
|
#define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */
|
||||||
@ -39,7 +41,20 @@ u32 os_info_csum(struct os_info *os_info);
|
|||||||
|
|
||||||
#ifdef CONFIG_CRASH_DUMP
|
#ifdef CONFIG_CRASH_DUMP
|
||||||
void *os_info_old_entry(int nr, unsigned long *size);
|
void *os_info_old_entry(int nr, unsigned long *size);
|
||||||
int copy_oldmem_kernel(void *dst, unsigned long src, size_t count);
|
size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count);
|
||||||
|
|
||||||
|
static inline int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
|
||||||
|
{
|
||||||
|
struct iov_iter iter;
|
||||||
|
struct kvec kvec;
|
||||||
|
|
||||||
|
kvec.iov_base = dst;
|
||||||
|
kvec.iov_len = count;
|
||||||
|
iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
|
||||||
|
if (copy_oldmem_iter(&iter, src, count) < count)
|
||||||
|
return -EFAULT;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
static inline void *os_info_old_entry(int nr, unsigned long *size)
|
static inline void *os_info_old_entry(int nr, unsigned long *size)
|
||||||
{
|
{
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE)
|
#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE)
|
||||||
|
|
||||||
#ifndef __ASSEMBLY__
|
#ifndef __ASSEMBLY__
|
||||||
|
#include <linux/uio.h>
|
||||||
#include <asm/chpid.h>
|
#include <asm/chpid.h>
|
||||||
#include <asm/cpu.h>
|
#include <asm/cpu.h>
|
||||||
|
|
||||||
@ -146,8 +147,7 @@ int sclp_pci_deconfigure(u32 fid);
|
|||||||
int sclp_ap_configure(u32 apid);
|
int sclp_ap_configure(u32 apid);
|
||||||
int sclp_ap_deconfigure(u32 apid);
|
int sclp_ap_deconfigure(u32 apid);
|
||||||
int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
|
int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
|
||||||
int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
|
size_t memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count);
|
||||||
int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
|
|
||||||
void sclp_ocf_cpc_name_copy(char *dst);
|
void sclp_ocf_cpc_name_copy(char *dst);
|
||||||
|
|
||||||
static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
|
static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
|
||||||
|
@ -285,7 +285,6 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
|
|||||||
return __clear_user(to, n);
|
return __clear_user(to, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count);
|
|
||||||
void *s390_kernel_write(void *dst, const void *src, size_t size);
|
void *s390_kernel_write(void *dst, const void *src, size_t size);
|
||||||
|
|
||||||
int __noreturn __put_kernel_bad(void);
|
int __noreturn __put_kernel_bad(void);
|
||||||
|
@ -47,7 +47,7 @@ struct unwind_state {
|
|||||||
static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state,
|
static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state,
|
||||||
unsigned long ip)
|
unsigned long ip)
|
||||||
{
|
{
|
||||||
ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
|
ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *)state->sp);
|
||||||
if (is_kretprobe_trampoline(ip))
|
if (is_kretprobe_trampoline(ip))
|
||||||
ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur);
|
ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur);
|
||||||
return ip;
|
return ip;
|
||||||
|
@ -35,7 +35,7 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
|
|||||||
|
|
||||||
obj-y := traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
|
obj-y := traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
|
||||||
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
|
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
|
||||||
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o
|
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
|
||||||
obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o
|
obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o
|
||||||
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
|
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
|
||||||
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
|
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
|
||||||
|
46
arch/s390/kernel/cpufeature.c
Normal file
46
arch/s390/kernel/cpufeature.c
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* Copyright IBM Corp. 2022
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/cpufeature.h>
|
||||||
|
#include <linux/bug.h>
|
||||||
|
#include <asm/elf.h>
|
||||||
|
|
||||||
|
enum {
|
||||||
|
TYPE_HWCAP,
|
||||||
|
TYPE_FACILITY,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct s390_cpu_feature {
|
||||||
|
unsigned int type : 4;
|
||||||
|
unsigned int num : 28;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = {
|
||||||
|
[S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA},
|
||||||
|
[S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS},
|
||||||
|
[S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158},
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cpu_have_feature - Test CPU features on module initialization
|
||||||
|
*/
|
||||||
|
int cpu_have_feature(unsigned int num)
|
||||||
|
{
|
||||||
|
struct s390_cpu_feature *feature;
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(num >= MAX_CPU_FEATURES))
|
||||||
|
return 0;
|
||||||
|
feature = &s390_cpu_features[num];
|
||||||
|
switch (feature->type) {
|
||||||
|
case TYPE_HWCAP:
|
||||||
|
return !!(elf_hwcap & BIT(feature->num));
|
||||||
|
case TYPE_FACILITY:
|
||||||
|
return test_facility(feature->num);
|
||||||
|
default:
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(cpu_have_feature);
|
@ -53,6 +53,8 @@ struct save_area {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static LIST_HEAD(dump_save_areas);
|
static LIST_HEAD(dump_save_areas);
|
||||||
|
static DEFINE_MUTEX(memcpy_real_mutex);
|
||||||
|
static char memcpy_real_buf[PAGE_SIZE];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate a save area
|
* Allocate a save area
|
||||||
@ -63,7 +65,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
|
|||||||
|
|
||||||
sa = memblock_alloc(sizeof(*sa), 8);
|
sa = memblock_alloc(sizeof(*sa), 8);
|
||||||
if (!sa)
|
if (!sa)
|
||||||
panic("Failed to allocate save area\n");
|
return NULL;
|
||||||
|
|
||||||
if (is_boot_cpu)
|
if (is_boot_cpu)
|
||||||
list_add(&sa->list, &dump_save_areas);
|
list_add(&sa->list, &dump_save_areas);
|
||||||
@ -114,38 +116,35 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
|
|||||||
memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
|
memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static size_t copy_to_iter_real(struct iov_iter *iter, unsigned long src, size_t count)
|
||||||
* Return physical address for virtual address
|
|
||||||
*/
|
|
||||||
static inline void *load_real_addr(void *addr)
|
|
||||||
{
|
{
|
||||||
unsigned long real_addr;
|
size_t len, copied, res = 0;
|
||||||
|
|
||||||
asm volatile(
|
mutex_lock(&memcpy_real_mutex);
|
||||||
" lra %0,0(%1)\n"
|
while (count) {
|
||||||
" jz 0f\n"
|
len = min(PAGE_SIZE, count);
|
||||||
" la %0,0\n"
|
if (memcpy_real(memcpy_real_buf, src, len))
|
||||||
"0:"
|
break;
|
||||||
: "=a" (real_addr) : "a" (addr) : "cc");
|
copied = copy_to_iter(memcpy_real_buf, len, iter);
|
||||||
return (void *)real_addr;
|
count -= copied;
|
||||||
|
src += copied;
|
||||||
|
res += copied;
|
||||||
|
if (copied < len)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
mutex_unlock(&memcpy_real_mutex);
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
|
||||||
* Copy memory of the old, dumped system to a kernel space virtual address
|
|
||||||
*/
|
|
||||||
int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
|
|
||||||
{
|
{
|
||||||
unsigned long len;
|
size_t len, copied, res = 0;
|
||||||
void *ra;
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
while (count) {
|
while (count) {
|
||||||
if (!oldmem_data.start && src < sclp.hsa_size) {
|
if (!oldmem_data.start && src < sclp.hsa_size) {
|
||||||
/* Copy from zfcp/nvme dump HSA area */
|
/* Copy from zfcp/nvme dump HSA area */
|
||||||
len = min(count, sclp.hsa_size - src);
|
len = min(count, sclp.hsa_size - src);
|
||||||
rc = memcpy_hsa_kernel(dst, src, len);
|
copied = memcpy_hsa_iter(iter, src, len);
|
||||||
if (rc)
|
|
||||||
return rc;
|
|
||||||
} else {
|
} else {
|
||||||
/* Check for swapped kdump oldmem areas */
|
/* Check for swapped kdump oldmem areas */
|
||||||
if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
|
if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
|
||||||
@ -157,57 +156,15 @@ int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
|
|||||||
} else {
|
} else {
|
||||||
len = count;
|
len = count;
|
||||||
}
|
}
|
||||||
if (is_vmalloc_or_module_addr(dst)) {
|
copied = copy_to_iter_real(iter, src, len);
|
||||||
ra = load_real_addr(dst);
|
|
||||||
len = min(PAGE_SIZE - offset_in_page(ra), len);
|
|
||||||
} else {
|
|
||||||
ra = dst;
|
|
||||||
}
|
|
||||||
if (memcpy_real(ra, src, len))
|
|
||||||
return -EFAULT;
|
|
||||||
}
|
}
|
||||||
dst += len;
|
count -= copied;
|
||||||
src += len;
|
src += copied;
|
||||||
count -= len;
|
res += copied;
|
||||||
|
if (copied < len)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return 0;
|
return res;
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copy memory of the old, dumped system to a user space virtual address
|
|
||||||
*/
|
|
||||||
static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
|
|
||||||
{
|
|
||||||
unsigned long len;
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
while (count) {
|
|
||||||
if (!oldmem_data.start && src < sclp.hsa_size) {
|
|
||||||
/* Copy from zfcp/nvme dump HSA area */
|
|
||||||
len = min(count, sclp.hsa_size - src);
|
|
||||||
rc = memcpy_hsa_user(dst, src, len);
|
|
||||||
if (rc)
|
|
||||||
return rc;
|
|
||||||
} else {
|
|
||||||
/* Check for swapped kdump oldmem areas */
|
|
||||||
if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
|
|
||||||
src -= oldmem_data.start;
|
|
||||||
len = min(count, oldmem_data.size - src);
|
|
||||||
} else if (oldmem_data.start && src < oldmem_data.size) {
|
|
||||||
len = min(count, oldmem_data.size - src);
|
|
||||||
src += oldmem_data.start;
|
|
||||||
} else {
|
|
||||||
len = count;
|
|
||||||
}
|
|
||||||
rc = copy_to_user_real(dst, src, count);
|
|
||||||
if (rc)
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
dst += len;
|
|
||||||
src += len;
|
|
||||||
count -= len;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -217,26 +174,9 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
|
|||||||
unsigned long offset)
|
unsigned long offset)
|
||||||
{
|
{
|
||||||
unsigned long src;
|
unsigned long src;
|
||||||
int rc;
|
|
||||||
|
|
||||||
if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter)))
|
|
||||||
return -EINVAL;
|
|
||||||
/* Multi-segment iterators are not supported */
|
|
||||||
if (iter->nr_segs > 1)
|
|
||||||
return -EINVAL;
|
|
||||||
if (!csize)
|
|
||||||
return 0;
|
|
||||||
src = pfn_to_phys(pfn) + offset;
|
src = pfn_to_phys(pfn) + offset;
|
||||||
|
return copy_oldmem_iter(iter, src, csize);
|
||||||
/* XXX: pass the iov_iter down to a common function */
|
|
||||||
if (iter_is_iovec(iter))
|
|
||||||
rc = copy_oldmem_user(iter->iov->iov_base, src, csize);
|
|
||||||
else
|
|
||||||
rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize);
|
|
||||||
if (rc < 0)
|
|
||||||
return rc;
|
|
||||||
iov_iter_advance(iter, csize);
|
|
||||||
return csize;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include <linux/kernel_stat.h>
|
#include <linux/kernel_stat.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/errno.h>
|
#include <linux/errno.h>
|
||||||
|
#include <linux/entry-common.h>
|
||||||
#include <linux/hardirq.h>
|
#include <linux/hardirq.h>
|
||||||
#include <linux/log2.h>
|
#include <linux/log2.h>
|
||||||
#include <linux/kprobes.h>
|
#include <linux/kprobes.h>
|
||||||
@ -397,11 +398,12 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
|
|||||||
static unsigned long long last_ipd;
|
static unsigned long long last_ipd;
|
||||||
struct mcck_struct *mcck;
|
struct mcck_struct *mcck;
|
||||||
unsigned long long tmp;
|
unsigned long long tmp;
|
||||||
|
irqentry_state_t irq_state;
|
||||||
union mci mci;
|
union mci mci;
|
||||||
unsigned long mcck_dam_code;
|
unsigned long mcck_dam_code;
|
||||||
int mcck_pending = 0;
|
int mcck_pending = 0;
|
||||||
|
|
||||||
nmi_enter();
|
irq_state = irqentry_nmi_enter(regs);
|
||||||
|
|
||||||
if (user_mode(regs))
|
if (user_mode(regs))
|
||||||
update_timer_mcck();
|
update_timer_mcck();
|
||||||
@ -504,14 +506,14 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
|
|||||||
clear_cpu_flag(CIF_MCCK_GUEST);
|
clear_cpu_flag(CIF_MCCK_GUEST);
|
||||||
|
|
||||||
if (user_mode(regs) && mcck_pending) {
|
if (user_mode(regs) && mcck_pending) {
|
||||||
nmi_exit();
|
irqentry_nmi_exit(regs, irq_state);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mcck_pending)
|
if (mcck_pending)
|
||||||
schedule_mcck_handler();
|
schedule_mcck_handler();
|
||||||
|
|
||||||
nmi_exit();
|
irqentry_nmi_exit(regs, irq_state);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
NOKPROBE_SYMBOL(s390_do_machine_check);
|
NOKPROBE_SYMBOL(s390_do_machine_check);
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
|
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
|
||||||
|
|
||||||
#include <linux/stop_machine.h>
|
#include <linux/stop_machine.h>
|
||||||
#include <linux/cpufeature.h>
|
|
||||||
#include <linux/bitops.h>
|
#include <linux/bitops.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/random.h>
|
#include <linux/random.h>
|
||||||
@ -96,15 +95,6 @@ void cpu_init(void)
|
|||||||
enter_lazy_tlb(&init_mm, current);
|
enter_lazy_tlb(&init_mm, current);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* cpu_have_feature - Test CPU features on module initialization
|
|
||||||
*/
|
|
||||||
int cpu_have_feature(unsigned int num)
|
|
||||||
{
|
|
||||||
return elf_hwcap & (1UL << num);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(cpu_have_feature);
|
|
||||||
|
|
||||||
static void show_facilities(struct seq_file *m)
|
static void show_facilities(struct seq_file *m)
|
||||||
{
|
{
|
||||||
unsigned int bit;
|
unsigned int bit;
|
||||||
|
@ -474,19 +474,18 @@ static void __init setup_lowcore_dat_off(void)
|
|||||||
lc->restart_data = 0;
|
lc->restart_data = 0;
|
||||||
lc->restart_source = -1U;
|
lc->restart_source = -1U;
|
||||||
|
|
||||||
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
|
|
||||||
if (!mcck_stack)
|
|
||||||
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
|
|
||||||
__func__, THREAD_SIZE, THREAD_SIZE);
|
|
||||||
lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
|
|
||||||
|
|
||||||
/* Setup absolute zero lowcore */
|
|
||||||
put_abs_lowcore(restart_stack, lc->restart_stack);
|
put_abs_lowcore(restart_stack, lc->restart_stack);
|
||||||
put_abs_lowcore(restart_fn, lc->restart_fn);
|
put_abs_lowcore(restart_fn, lc->restart_fn);
|
||||||
put_abs_lowcore(restart_data, lc->restart_data);
|
put_abs_lowcore(restart_data, lc->restart_data);
|
||||||
put_abs_lowcore(restart_source, lc->restart_source);
|
put_abs_lowcore(restart_source, lc->restart_source);
|
||||||
put_abs_lowcore(restart_psw, lc->restart_psw);
|
put_abs_lowcore(restart_psw, lc->restart_psw);
|
||||||
|
|
||||||
|
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
|
||||||
|
if (!mcck_stack)
|
||||||
|
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
|
||||||
|
__func__, THREAD_SIZE, THREAD_SIZE);
|
||||||
|
lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
|
||||||
|
|
||||||
lc->spinlock_lockval = arch_spin_lockval(0);
|
lc->spinlock_lockval = arch_spin_lockval(0);
|
||||||
lc->spinlock_index = 0;
|
lc->spinlock_index = 0;
|
||||||
arch_spin_lock_setup(0);
|
arch_spin_lock_setup(0);
|
||||||
|
@ -171,32 +171,6 @@ void memcpy_absolute(void *dest, void *src, size_t count)
|
|||||||
arch_local_irq_restore(flags);
|
arch_local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Copy memory from kernel (real) to user (virtual)
|
|
||||||
*/
|
|
||||||
int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count)
|
|
||||||
{
|
|
||||||
int offs = 0, size, rc;
|
|
||||||
char *buf;
|
|
||||||
|
|
||||||
buf = (char *) __get_free_page(GFP_KERNEL);
|
|
||||||
if (!buf)
|
|
||||||
return -ENOMEM;
|
|
||||||
rc = -EFAULT;
|
|
||||||
while (offs < count) {
|
|
||||||
size = min(PAGE_SIZE, count - offs);
|
|
||||||
if (memcpy_real(buf, src + offs, size))
|
|
||||||
goto out;
|
|
||||||
if (copy_to_user(dest + offs, buf, size))
|
|
||||||
goto out;
|
|
||||||
offs += size;
|
|
||||||
}
|
|
||||||
rc = 0;
|
|
||||||
out:
|
|
||||||
free_page((unsigned long) buf);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if physical address is within prefix or zero page
|
* Check if physical address is within prefix or zero page
|
||||||
*/
|
*/
|
||||||
|
@ -252,5 +252,5 @@ static void __exit trng_exit(void)
|
|||||||
trng_debug_exit();
|
trng_debug_exit();
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(MSA, trng_init);
|
module_cpu_feature_match(S390_CPU_FEATURE_MSA, trng_init);
|
||||||
module_exit(trng_exit);
|
module_exit(trng_exit);
|
||||||
|
@ -89,7 +89,7 @@ config HMC_DRV
|
|||||||
Management Console (HMC) drive CD/DVD-ROM. It is available as a
|
Management Console (HMC) drive CD/DVD-ROM. It is available as a
|
||||||
module, called 'hmcdrv', and also as kernel built-in. There is one
|
module, called 'hmcdrv', and also as kernel built-in. There is one
|
||||||
optional parameter for this module: cachesize=N, which modifies the
|
optional parameter for this module: cachesize=N, which modifies the
|
||||||
transfer cache size from it's default value 0.5MB to N bytes. If N
|
transfer cache size from its default value 0.5MB to N bytes. If N
|
||||||
is zero, then no caching is performed.
|
is zero, then no caching is performed.
|
||||||
|
|
||||||
config SCLP_OFB
|
config SCLP_OFB
|
||||||
|
@ -548,7 +548,7 @@ tape_34xx_unit_check(struct tape_device *device, struct tape_request *request,
|
|||||||
case 0x2e:
|
case 0x2e:
|
||||||
/*
|
/*
|
||||||
* Not capable. This indicates either that the drive fails
|
* Not capable. This indicates either that the drive fails
|
||||||
* reading the format id mark or that that format specified
|
* reading the format id mark or that format specified
|
||||||
* is not supported by the drive.
|
* is not supported by the drive.
|
||||||
*/
|
*/
|
||||||
dev_warn (&device->cdev->dev, "The tape unit cannot process "
|
dev_warn (&device->cdev->dev, "The tape unit cannot process "
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include <linux/stddef.h>
|
#include <linux/stddef.h>
|
||||||
#include <linux/vmalloc.h>
|
#include <linux/vmalloc.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
#include <linux/cpufeature.h>
|
||||||
|
|
||||||
#include <asm/uvdevice.h>
|
#include <asm/uvdevice.h>
|
||||||
#include <asm/uv.h>
|
#include <asm/uv.h>
|
||||||
@ -244,12 +245,10 @@ static void __exit uvio_dev_exit(void)
|
|||||||
|
|
||||||
static int __init uvio_dev_init(void)
|
static int __init uvio_dev_init(void)
|
||||||
{
|
{
|
||||||
if (!test_facility(158))
|
|
||||||
return -ENXIO;
|
|
||||||
return misc_register(&uvio_dev_miscdev);
|
return misc_register(&uvio_dev_miscdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
module_init(uvio_dev_init);
|
module_cpu_feature_match(S390_CPU_FEATURE_UV, uvio_dev_init);
|
||||||
module_exit(uvio_dev_exit);
|
module_exit(uvio_dev_exit);
|
||||||
|
|
||||||
MODULE_AUTHOR("IBM Corporation");
|
MODULE_AUTHOR("IBM Corporation");
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <linux/debugfs.h>
|
#include <linux/debugfs.h>
|
||||||
#include <linux/panic_notifier.h>
|
#include <linux/panic_notifier.h>
|
||||||
#include <linux/reboot.h>
|
#include <linux/reboot.h>
|
||||||
|
#include <linux/uio.h>
|
||||||
|
|
||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
#include <asm/ipl.h>
|
#include <asm/ipl.h>
|
||||||
@ -50,36 +51,41 @@ static struct dentry *zcore_reipl_file;
|
|||||||
static struct dentry *zcore_hsa_file;
|
static struct dentry *zcore_hsa_file;
|
||||||
static struct ipl_parameter_block *zcore_ipl_block;
|
static struct ipl_parameter_block *zcore_ipl_block;
|
||||||
|
|
||||||
|
static DEFINE_MUTEX(hsa_buf_mutex);
|
||||||
static char hsa_buf[PAGE_SIZE] __aligned(PAGE_SIZE);
|
static char hsa_buf[PAGE_SIZE] __aligned(PAGE_SIZE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copy memory from HSA to user memory (not reentrant):
|
* Copy memory from HSA to iterator (not reentrant):
|
||||||
*
|
*
|
||||||
* @dest: User buffer where memory should be copied to
|
* @iter: Iterator where memory should be copied to
|
||||||
* @src: Start address within HSA where data should be copied
|
* @src: Start address within HSA where data should be copied
|
||||||
* @count: Size of buffer, which should be copied
|
* @count: Size of buffer, which should be copied
|
||||||
*/
|
*/
|
||||||
int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
|
size_t memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count)
|
||||||
{
|
{
|
||||||
unsigned long offset, bytes;
|
size_t bytes, copied, res = 0;
|
||||||
|
unsigned long offset;
|
||||||
|
|
||||||
if (!hsa_available)
|
if (!hsa_available)
|
||||||
return -ENODATA;
|
return 0;
|
||||||
|
|
||||||
|
mutex_lock(&hsa_buf_mutex);
|
||||||
while (count) {
|
while (count) {
|
||||||
if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) {
|
if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) {
|
||||||
TRACE("sclp_sdias_copy() failed\n");
|
TRACE("sclp_sdias_copy() failed\n");
|
||||||
return -EIO;
|
break;
|
||||||
}
|
}
|
||||||
offset = src % PAGE_SIZE;
|
offset = src % PAGE_SIZE;
|
||||||
bytes = min(PAGE_SIZE - offset, count);
|
bytes = min(PAGE_SIZE - offset, count);
|
||||||
if (copy_to_user(dest, hsa_buf + offset, bytes))
|
copied = copy_to_iter(hsa_buf + offset, bytes, iter);
|
||||||
return -EFAULT;
|
count -= copied;
|
||||||
src += bytes;
|
src += copied;
|
||||||
dest += bytes;
|
res += copied;
|
||||||
count -= bytes;
|
if (copied < bytes)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return 0;
|
mutex_unlock(&hsa_buf_mutex);
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -89,25 +95,16 @@ int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
|
|||||||
* @src: Start address within HSA where data should be copied
|
* @src: Start address within HSA where data should be copied
|
||||||
* @count: Size of buffer, which should be copied
|
* @count: Size of buffer, which should be copied
|
||||||
*/
|
*/
|
||||||
int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count)
|
static inline int memcpy_hsa_kernel(void *dst, unsigned long src, size_t count)
|
||||||
{
|
{
|
||||||
unsigned long offset, bytes;
|
struct iov_iter iter;
|
||||||
|
struct kvec kvec;
|
||||||
|
|
||||||
if (!hsa_available)
|
kvec.iov_base = dst;
|
||||||
return -ENODATA;
|
kvec.iov_len = count;
|
||||||
|
iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
|
||||||
while (count) {
|
if (memcpy_hsa_iter(&iter, src, count) < count)
|
||||||
if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) {
|
return -EIO;
|
||||||
TRACE("sclp_sdias_copy() failed\n");
|
|
||||||
return -EIO;
|
|
||||||
}
|
|
||||||
offset = src % PAGE_SIZE;
|
|
||||||
bytes = min(PAGE_SIZE - offset, count);
|
|
||||||
memcpy(dest, hsa_buf + offset, bytes);
|
|
||||||
src += bytes;
|
|
||||||
dest += bytes;
|
|
||||||
count -= bytes;
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -838,6 +838,17 @@ static void ap_bus_revise_bindings(void)
|
|||||||
bus_for_each_dev(&ap_bus_type, NULL, NULL, __ap_revise_reserved);
|
bus_for_each_dev(&ap_bus_type, NULL, NULL, __ap_revise_reserved);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ap_owned_by_def_drv: indicates whether an AP adapter is reserved for the
|
||||||
|
* default host driver or not.
|
||||||
|
* @card: the APID of the adapter card to check
|
||||||
|
* @queue: the APQI of the queue to check
|
||||||
|
*
|
||||||
|
* Note: the ap_perms_mutex must be locked by the caller of this function.
|
||||||
|
*
|
||||||
|
* Return: an int specifying whether the AP adapter is reserved for the host (1)
|
||||||
|
* or not (0).
|
||||||
|
*/
|
||||||
int ap_owned_by_def_drv(int card, int queue)
|
int ap_owned_by_def_drv(int card, int queue)
|
||||||
{
|
{
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
@ -845,25 +856,31 @@ int ap_owned_by_def_drv(int card, int queue)
|
|||||||
if (card < 0 || card >= AP_DEVICES || queue < 0 || queue >= AP_DOMAINS)
|
if (card < 0 || card >= AP_DEVICES || queue < 0 || queue >= AP_DOMAINS)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
mutex_lock(&ap_perms_mutex);
|
|
||||||
|
|
||||||
if (test_bit_inv(card, ap_perms.apm) &&
|
if (test_bit_inv(card, ap_perms.apm) &&
|
||||||
test_bit_inv(queue, ap_perms.aqm))
|
test_bit_inv(queue, ap_perms.aqm))
|
||||||
rc = 1;
|
rc = 1;
|
||||||
|
|
||||||
mutex_unlock(&ap_perms_mutex);
|
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(ap_owned_by_def_drv);
|
EXPORT_SYMBOL(ap_owned_by_def_drv);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ap_apqn_in_matrix_owned_by_def_drv: indicates whether every APQN contained in
|
||||||
|
* a set is reserved for the host drivers
|
||||||
|
* or not.
|
||||||
|
* @apm: a bitmap specifying a set of APIDs comprising the APQNs to check
|
||||||
|
* @aqm: a bitmap specifying a set of APQIs comprising the APQNs to check
|
||||||
|
*
|
||||||
|
* Note: the ap_perms_mutex must be locked by the caller of this function.
|
||||||
|
*
|
||||||
|
* Return: an int specifying whether each APQN is reserved for the host (1) or
|
||||||
|
* not (0)
|
||||||
|
*/
|
||||||
int ap_apqn_in_matrix_owned_by_def_drv(unsigned long *apm,
|
int ap_apqn_in_matrix_owned_by_def_drv(unsigned long *apm,
|
||||||
unsigned long *aqm)
|
unsigned long *aqm)
|
||||||
{
|
{
|
||||||
int card, queue, rc = 0;
|
int card, queue, rc = 0;
|
||||||
|
|
||||||
mutex_lock(&ap_perms_mutex);
|
|
||||||
|
|
||||||
for (card = 0; !rc && card < AP_DEVICES; card++)
|
for (card = 0; !rc && card < AP_DEVICES; card++)
|
||||||
if (test_bit_inv(card, apm) &&
|
if (test_bit_inv(card, apm) &&
|
||||||
test_bit_inv(card, ap_perms.apm))
|
test_bit_inv(card, ap_perms.apm))
|
||||||
@ -872,8 +889,6 @@ int ap_apqn_in_matrix_owned_by_def_drv(unsigned long *apm,
|
|||||||
test_bit_inv(queue, ap_perms.aqm))
|
test_bit_inv(queue, ap_perms.aqm))
|
||||||
rc = 1;
|
rc = 1;
|
||||||
|
|
||||||
mutex_unlock(&ap_perms_mutex);
|
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(ap_apqn_in_matrix_owned_by_def_drv);
|
EXPORT_SYMBOL(ap_apqn_in_matrix_owned_by_def_drv);
|
||||||
|
@ -2115,5 +2115,5 @@ static void __exit pkey_exit(void)
|
|||||||
pkey_debug_exit();
|
pkey_debug_exit();
|
||||||
}
|
}
|
||||||
|
|
||||||
module_cpu_feature_match(MSA, pkey_init);
|
module_cpu_feature_match(S390_CPU_FEATURE_MSA, pkey_init);
|
||||||
module_exit(pkey_exit);
|
module_exit(pkey_exit);
|
||||||
|
@ -18,9 +18,6 @@
|
|||||||
|
|
||||||
#define VFIO_AP_ROOT_NAME "vfio_ap"
|
#define VFIO_AP_ROOT_NAME "vfio_ap"
|
||||||
#define VFIO_AP_DEV_NAME "matrix"
|
#define VFIO_AP_DEV_NAME "matrix"
|
||||||
#define AP_QUEUE_ASSIGNED "assigned"
|
|
||||||
#define AP_QUEUE_UNASSIGNED "unassigned"
|
|
||||||
#define AP_QUEUE_IN_USE "in use"
|
|
||||||
|
|
||||||
MODULE_AUTHOR("IBM Corporation");
|
MODULE_AUTHOR("IBM Corporation");
|
||||||
MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018");
|
MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018");
|
||||||
@ -46,120 +43,12 @@ static struct ap_device_id ap_queue_ids[] = {
|
|||||||
{ /* end of sibling */ },
|
{ /* end of sibling */ },
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct ap_matrix_mdev *vfio_ap_mdev_for_queue(struct vfio_ap_queue *q)
|
|
||||||
{
|
|
||||||
struct ap_matrix_mdev *matrix_mdev;
|
|
||||||
unsigned long apid = AP_QID_CARD(q->apqn);
|
|
||||||
unsigned long apqi = AP_QID_QUEUE(q->apqn);
|
|
||||||
|
|
||||||
list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
|
|
||||||
if (test_bit_inv(apid, matrix_mdev->matrix.apm) &&
|
|
||||||
test_bit_inv(apqi, matrix_mdev->matrix.aqm))
|
|
||||||
return matrix_mdev;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t status_show(struct device *dev,
|
|
||||||
struct device_attribute *attr,
|
|
||||||
char *buf)
|
|
||||||
{
|
|
||||||
ssize_t nchars = 0;
|
|
||||||
struct vfio_ap_queue *q;
|
|
||||||
struct ap_matrix_mdev *matrix_mdev;
|
|
||||||
struct ap_device *apdev = to_ap_dev(dev);
|
|
||||||
|
|
||||||
mutex_lock(&matrix_dev->lock);
|
|
||||||
q = dev_get_drvdata(&apdev->device);
|
|
||||||
matrix_mdev = vfio_ap_mdev_for_queue(q);
|
|
||||||
|
|
||||||
if (matrix_mdev) {
|
|
||||||
if (matrix_mdev->kvm)
|
|
||||||
nchars = scnprintf(buf, PAGE_SIZE, "%s\n",
|
|
||||||
AP_QUEUE_IN_USE);
|
|
||||||
else
|
|
||||||
nchars = scnprintf(buf, PAGE_SIZE, "%s\n",
|
|
||||||
AP_QUEUE_ASSIGNED);
|
|
||||||
} else {
|
|
||||||
nchars = scnprintf(buf, PAGE_SIZE, "%s\n",
|
|
||||||
AP_QUEUE_UNASSIGNED);
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_unlock(&matrix_dev->lock);
|
|
||||||
|
|
||||||
return nchars;
|
|
||||||
}
|
|
||||||
|
|
||||||
static DEVICE_ATTR_RO(status);
|
|
||||||
|
|
||||||
static struct attribute *vfio_queue_attrs[] = {
|
|
||||||
&dev_attr_status.attr,
|
|
||||||
NULL,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const struct attribute_group vfio_queue_attr_group = {
|
|
||||||
.attrs = vfio_queue_attrs,
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* vfio_ap_queue_dev_probe: Allocate a vfio_ap_queue structure and associate it
|
|
||||||
* with the device as driver_data.
|
|
||||||
*
|
|
||||||
* @apdev: the AP device being probed
|
|
||||||
*
|
|
||||||
* Return: returns 0 if the probe succeeded; otherwise, returns an error if
|
|
||||||
* storage could not be allocated for a vfio_ap_queue object or the
|
|
||||||
* sysfs 'status' attribute could not be created for the queue device.
|
|
||||||
*/
|
|
||||||
static int vfio_ap_queue_dev_probe(struct ap_device *apdev)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
struct vfio_ap_queue *q;
|
|
||||||
|
|
||||||
q = kzalloc(sizeof(*q), GFP_KERNEL);
|
|
||||||
if (!q)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
mutex_lock(&matrix_dev->lock);
|
|
||||||
dev_set_drvdata(&apdev->device, q);
|
|
||||||
q->apqn = to_ap_queue(&apdev->device)->qid;
|
|
||||||
q->saved_isc = VFIO_AP_ISC_INVALID;
|
|
||||||
|
|
||||||
ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group);
|
|
||||||
if (ret) {
|
|
||||||
dev_set_drvdata(&apdev->device, NULL);
|
|
||||||
kfree(q);
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_unlock(&matrix_dev->lock);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* vfio_ap_queue_dev_remove: Free the associated vfio_ap_queue structure.
|
|
||||||
*
|
|
||||||
* @apdev: the AP device being removed
|
|
||||||
*
|
|
||||||
* Takes the matrix lock to avoid actions on this device while doing the remove.
|
|
||||||
*/
|
|
||||||
static void vfio_ap_queue_dev_remove(struct ap_device *apdev)
|
|
||||||
{
|
|
||||||
struct vfio_ap_queue *q;
|
|
||||||
|
|
||||||
mutex_lock(&matrix_dev->lock);
|
|
||||||
sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group);
|
|
||||||
q = dev_get_drvdata(&apdev->device);
|
|
||||||
vfio_ap_mdev_reset_queue(q, 1);
|
|
||||||
dev_set_drvdata(&apdev->device, NULL);
|
|
||||||
kfree(q);
|
|
||||||
mutex_unlock(&matrix_dev->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct ap_driver vfio_ap_drv = {
|
static struct ap_driver vfio_ap_drv = {
|
||||||
.probe = vfio_ap_queue_dev_probe,
|
.probe = vfio_ap_mdev_probe_queue,
|
||||||
.remove = vfio_ap_queue_dev_remove,
|
.remove = vfio_ap_mdev_remove_queue,
|
||||||
|
.in_use = vfio_ap_mdev_resource_in_use,
|
||||||
|
.on_config_changed = vfio_ap_on_cfg_changed,
|
||||||
|
.on_scan_complete = vfio_ap_on_scan_complete,
|
||||||
.ids = ap_queue_ids,
|
.ids = ap_queue_ids,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -212,8 +101,9 @@ static int vfio_ap_matrix_dev_create(void)
|
|||||||
goto matrix_alloc_err;
|
goto matrix_alloc_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_init(&matrix_dev->lock);
|
mutex_init(&matrix_dev->mdevs_lock);
|
||||||
INIT_LIST_HEAD(&matrix_dev->mdev_list);
|
INIT_LIST_HEAD(&matrix_dev->mdev_list);
|
||||||
|
mutex_init(&matrix_dev->guests_lock);
|
||||||
|
|
||||||
dev_set_name(&matrix_dev->device, "%s", VFIO_AP_DEV_NAME);
|
dev_set_name(&matrix_dev->device, "%s", VFIO_AP_DEV_NAME);
|
||||||
matrix_dev->device.parent = root_device;
|
matrix_dev->device.parent = root_device;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -19,6 +19,7 @@
|
|||||||
#include <linux/mutex.h>
|
#include <linux/mutex.h>
|
||||||
#include <linux/kvm_host.h>
|
#include <linux/kvm_host.h>
|
||||||
#include <linux/vfio.h>
|
#include <linux/vfio.h>
|
||||||
|
#include <linux/hashtable.h>
|
||||||
|
|
||||||
#include "ap_bus.h"
|
#include "ap_bus.h"
|
||||||
|
|
||||||
@ -32,20 +33,26 @@
|
|||||||
* @available_instances: number of mediated matrix devices that can be created
|
* @available_instances: number of mediated matrix devices that can be created
|
||||||
* @info: the struct containing the output from the PQAP(QCI) instruction
|
* @info: the struct containing the output from the PQAP(QCI) instruction
|
||||||
* @mdev_list: the list of mediated matrix devices created
|
* @mdev_list: the list of mediated matrix devices created
|
||||||
* @lock: mutex for locking the AP matrix device. This lock will be
|
* @mdevs_lock: mutex for locking the AP matrix device. This lock will be
|
||||||
* taken every time we fiddle with state managed by the vfio_ap
|
* taken every time we fiddle with state managed by the vfio_ap
|
||||||
* driver, be it using @mdev_list or writing the state of a
|
* driver, be it using @mdev_list or writing the state of a
|
||||||
* single ap_matrix_mdev device. It's quite coarse but we don't
|
* single ap_matrix_mdev device. It's quite coarse but we don't
|
||||||
* expect much contention.
|
* expect much contention.
|
||||||
* @vfio_ap_drv: the vfio_ap device driver
|
* @vfio_ap_drv: the vfio_ap device driver
|
||||||
|
* @guests_lock: mutex for controlling access to a guest that is using AP
|
||||||
|
* devices passed through by the vfio_ap device driver. This lock
|
||||||
|
* will be taken when the AP devices are plugged into or unplugged
|
||||||
|
* from a guest, and when an ap_matrix_mdev device is added to or
|
||||||
|
* removed from @mdev_list or the list is iterated.
|
||||||
*/
|
*/
|
||||||
struct ap_matrix_dev {
|
struct ap_matrix_dev {
|
||||||
struct device device;
|
struct device device;
|
||||||
atomic_t available_instances;
|
atomic_t available_instances;
|
||||||
struct ap_config_info info;
|
struct ap_config_info info;
|
||||||
struct list_head mdev_list;
|
struct list_head mdev_list;
|
||||||
struct mutex lock;
|
struct mutex mdevs_lock; /* serializes access to each ap_matrix_mdev */
|
||||||
struct ap_driver *vfio_ap_drv;
|
struct ap_driver *vfio_ap_drv;
|
||||||
|
struct mutex guests_lock; /* serializes access to each KVM guest */
|
||||||
};
|
};
|
||||||
|
|
||||||
extern struct ap_matrix_dev *matrix_dev;
|
extern struct ap_matrix_dev *matrix_dev;
|
||||||
@ -74,6 +81,15 @@ struct ap_matrix {
|
|||||||
DECLARE_BITMAP(adm, 256);
|
DECLARE_BITMAP(adm, 256);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct ap_queue_table - a table of queue objects.
|
||||||
|
*
|
||||||
|
* @queues: a hashtable of queues (struct vfio_ap_queue).
|
||||||
|
*/
|
||||||
|
struct ap_queue_table {
|
||||||
|
DECLARE_HASHTABLE(queues, 8);
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct ap_matrix_mdev - Contains the data associated with a matrix mediated
|
* struct ap_matrix_mdev - Contains the data associated with a matrix mediated
|
||||||
* device.
|
* device.
|
||||||
@ -81,18 +97,29 @@ struct ap_matrix {
|
|||||||
* @node: allows the ap_matrix_mdev struct to be added to a list
|
* @node: allows the ap_matrix_mdev struct to be added to a list
|
||||||
* @matrix: the adapters, usage domains and control domains assigned to the
|
* @matrix: the adapters, usage domains and control domains assigned to the
|
||||||
* mediated matrix device.
|
* mediated matrix device.
|
||||||
|
* @shadow_apcb: the shadow copy of the APCB field of the KVM guest's CRYCB
|
||||||
* @kvm: the struct holding guest's state
|
* @kvm: the struct holding guest's state
|
||||||
* @pqap_hook: the function pointer to the interception handler for the
|
* @pqap_hook: the function pointer to the interception handler for the
|
||||||
* PQAP(AQIC) instruction.
|
* PQAP(AQIC) instruction.
|
||||||
* @mdev: the mediated device
|
* @mdev: the mediated device
|
||||||
|
* @qtable: table of queues (struct vfio_ap_queue) assigned to the mdev
|
||||||
|
* @apm_add: bitmap of APIDs added to the host's AP configuration
|
||||||
|
* @aqm_add: bitmap of APQIs added to the host's AP configuration
|
||||||
|
* @adm_add: bitmap of control domain numbers added to the host's AP
|
||||||
|
* configuration
|
||||||
*/
|
*/
|
||||||
struct ap_matrix_mdev {
|
struct ap_matrix_mdev {
|
||||||
struct vfio_device vdev;
|
struct vfio_device vdev;
|
||||||
struct list_head node;
|
struct list_head node;
|
||||||
struct ap_matrix matrix;
|
struct ap_matrix matrix;
|
||||||
|
struct ap_matrix shadow_apcb;
|
||||||
struct kvm *kvm;
|
struct kvm *kvm;
|
||||||
crypto_hook pqap_hook;
|
crypto_hook pqap_hook;
|
||||||
struct mdev_device *mdev;
|
struct mdev_device *mdev;
|
||||||
|
struct ap_queue_table qtable;
|
||||||
|
DECLARE_BITMAP(apm_add, AP_DEVICES);
|
||||||
|
DECLARE_BITMAP(aqm_add, AP_DOMAINS);
|
||||||
|
DECLARE_BITMAP(adm_add, AP_DOMAINS);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -102,6 +129,8 @@ struct ap_matrix_mdev {
|
|||||||
* @saved_iova: the notification indicator byte (nib) address
|
* @saved_iova: the notification indicator byte (nib) address
|
||||||
* @apqn: the APQN of the AP queue device
|
* @apqn: the APQN of the AP queue device
|
||||||
* @saved_isc: the guest ISC registered with the GIB interface
|
* @saved_isc: the guest ISC registered with the GIB interface
|
||||||
|
* @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
|
||||||
|
* @reset_rc: the status response code from the last reset of the queue
|
||||||
*/
|
*/
|
||||||
struct vfio_ap_queue {
|
struct vfio_ap_queue {
|
||||||
struct ap_matrix_mdev *matrix_mdev;
|
struct ap_matrix_mdev *matrix_mdev;
|
||||||
@ -109,11 +138,21 @@ struct vfio_ap_queue {
|
|||||||
int apqn;
|
int apqn;
|
||||||
#define VFIO_AP_ISC_INVALID 0xff
|
#define VFIO_AP_ISC_INVALID 0xff
|
||||||
unsigned char saved_isc;
|
unsigned char saved_isc;
|
||||||
|
struct hlist_node mdev_qnode;
|
||||||
|
unsigned int reset_rc;
|
||||||
};
|
};
|
||||||
|
|
||||||
int vfio_ap_mdev_register(void);
|
int vfio_ap_mdev_register(void);
|
||||||
void vfio_ap_mdev_unregister(void);
|
void vfio_ap_mdev_unregister(void);
|
||||||
int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
|
|
||||||
unsigned int retry);
|
int vfio_ap_mdev_probe_queue(struct ap_device *queue);
|
||||||
|
void vfio_ap_mdev_remove_queue(struct ap_device *queue);
|
||||||
|
|
||||||
|
int vfio_ap_mdev_resource_in_use(unsigned long *apm, unsigned long *aqm);
|
||||||
|
|
||||||
|
void vfio_ap_on_cfg_changed(struct ap_config_info *new_config_info,
|
||||||
|
struct ap_config_info *old_config_info);
|
||||||
|
void vfio_ap_on_scan_complete(struct ap_config_info *new_config_info,
|
||||||
|
struct ap_config_info *old_config_info);
|
||||||
|
|
||||||
#endif /* _VFIO_AP_PRIVATE_H_ */
|
#endif /* _VFIO_AP_PRIVATE_H_ */
|
||||||
|
@ -420,6 +420,7 @@ typedef struct elf64_shdr {
|
|||||||
#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
|
#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
|
||||||
#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */
|
#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */
|
||||||
#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */
|
#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */
|
||||||
|
#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */
|
||||||
#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */
|
#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */
|
||||||
#define NT_ARM_TLS 0x401 /* ARM TLS register */
|
#define NT_ARM_TLS 0x401 /* ARM TLS register */
|
||||||
#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
|
#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
|
||||||
|
@ -252,29 +252,26 @@ static int __init chacha_s390_test_init(void)
|
|||||||
memset(plain, 'a', data_size);
|
memset(plain, 'a', data_size);
|
||||||
get_random_bytes(plain, (data_size > 256 ? 256 : data_size));
|
get_random_bytes(plain, (data_size > 256 ? 256 : data_size));
|
||||||
|
|
||||||
cipher_generic = vmalloc(data_size);
|
cipher_generic = vzalloc(data_size);
|
||||||
if (!cipher_generic) {
|
if (!cipher_generic) {
|
||||||
pr_info("could not allocate cipher_generic buffer\n");
|
pr_info("could not allocate cipher_generic buffer\n");
|
||||||
ret = -2;
|
ret = -2;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
memset(cipher_generic, 0, data_size);
|
|
||||||
|
|
||||||
cipher_s390 = vmalloc(data_size);
|
cipher_s390 = vzalloc(data_size);
|
||||||
if (!cipher_s390) {
|
if (!cipher_s390) {
|
||||||
pr_info("could not allocate cipher_s390 buffer\n");
|
pr_info("could not allocate cipher_s390 buffer\n");
|
||||||
ret = -2;
|
ret = -2;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
memset(cipher_s390, 0, data_size);
|
|
||||||
|
|
||||||
revert = vmalloc(data_size);
|
revert = vzalloc(data_size);
|
||||||
if (!revert) {
|
if (!revert) {
|
||||||
pr_info("could not allocate revert buffer\n");
|
pr_info("could not allocate revert buffer\n");
|
||||||
ret = -2;
|
ret = -2;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
memset(revert, 0, data_size);
|
|
||||||
|
|
||||||
if (debug)
|
if (debug)
|
||||||
print_hex_dump(KERN_INFO, "src: ", DUMP_PREFIX_OFFSET,
|
print_hex_dump(KERN_INFO, "src: ", DUMP_PREFIX_OFFSET,
|
||||||
|
Loading…
Reference in New Issue
Block a user