From a084c44eaa6a618415e1bcb076c7e1c088bcd54c Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 26 Oct 2021 16:58:31 -0400 Subject: [PATCH 01/69] s390-vfio-ap: introduces s390 kernel debug feature for vfio_ap device driver Sets up an s390dbf debug log for the vfio_ap device driver for logging events occurring during the lifetime of the driver. Signed-off-by: Tony Krowiak Reviewed-by: Harald Freudenberger Reviewed-by: Matthew Rosato Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/vfio_ap_debug.h | 32 +++++++++++++++++++++++++++++ drivers/s390/crypto/vfio_ap_drv.c | 21 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 drivers/s390/crypto/vfio_ap_debug.h diff --git a/drivers/s390/crypto/vfio_ap_debug.h b/drivers/s390/crypto/vfio_ap_debug.h new file mode 100644 index 000000000000..180156121421 --- /dev/null +++ b/drivers/s390/crypto/vfio_ap_debug.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2022 + * + * Author(s): Tony Krowiak + */ +#ifndef VFIO_AP_DEBUG_H +#define VFIO_AP_DEBUG_H + +#include + +#define DBF_ERR 3 /* error conditions */ +#define DBF_WARN 4 /* warning conditions */ +#define DBF_INFO 5 /* informational */ +#define DBF_DEBUG 6 /* for debugging only */ + +#define DBF_MAX_SPRINTF_ARGS 10 + +#define VFIO_AP_DBF(...) \ + debug_sprintf_event(vfio_ap_dbf_info, ##__VA_ARGS__) +#define VFIO_AP_DBF_ERR(...) \ + debug_sprintf_event(vfio_ap_dbf_info, DBF_ERR, ##__VA_ARGS__) +#define VFIO_AP_DBF_WARN(...) \ + debug_sprintf_event(vfio_ap_dbf_info, DBF_WARN, ##__VA_ARGS__) +#define VFIO_AP_DBF_INFO(...) \ + debug_sprintf_event(vfio_ap_dbf_info, DBF_INFO, ##__VA_ARGS__) +#define VFIO_AP_DBF_DBG(...) \ + debug_sprintf_event(vfio_ap_dbf_info, DBF_DEBUG, ##__VA_ARGS__) + +extern debug_info_t *vfio_ap_dbf_info; + +#endif /* VFIO_AP_DEBUG_H */ diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index e043ae236630..82b6ffee06c5 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -14,6 +14,7 @@ #include #include #include "vfio_ap_private.h" +#include "vfio_ap_debug.h" #define VFIO_AP_ROOT_NAME "vfio_ap" #define VFIO_AP_DEV_NAME "matrix" @@ -26,6 +27,7 @@ MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018"); MODULE_LICENSE("GPL v2"); struct ap_matrix_dev *matrix_dev; +debug_info_t *vfio_ap_dbf_info; /* Only type 10 adapters (CEX4 and later) are supported * by the AP matrix device driver @@ -250,10 +252,28 @@ static void vfio_ap_matrix_dev_destroy(void) root_device_unregister(root_device); } +static int __init vfio_ap_dbf_info_init(void) +{ + vfio_ap_dbf_info = debug_register("vfio_ap", 1, 1, + DBF_MAX_SPRINTF_ARGS * sizeof(long)); + + if (!vfio_ap_dbf_info) + return -ENOENT; + + debug_register_view(vfio_ap_dbf_info, &debug_sprintf_view); + debug_set_level(vfio_ap_dbf_info, DBF_WARN); + + return 0; +} + static int __init vfio_ap_init(void) { int ret; + ret = vfio_ap_dbf_info_init(); + if (ret) + return ret; + /* If there are no AP instructions, there is nothing to pass through. */ if (!ap_instructions_available()) return -ENODEV; @@ -284,6 +304,7 @@ static void __exit vfio_ap_exit(void) vfio_ap_mdev_unregister(); ap_driver_unregister(&vfio_ap_drv); vfio_ap_matrix_dev_destroy(); + debug_unregister(vfio_ap_dbf_info); } module_init(vfio_ap_init); From d5c49db21b743658db03d75cbf410596c2c90371 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 11 Jan 2022 10:19:16 -0500 Subject: [PATCH 02/69] MAINTAINERS: update file path for S390 VFIO AP DRIVER Changed the MAINTAINERS file to include the new drivers/s390/crypto/vfio_ap_debug.h file path. Signed-off-by: Tony Krowiak Acked-by: Matthew Rosato Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 69a2935daf6c..5e4d1be85c22 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16893,9 +16893,7 @@ L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ F: Documentation/s390/vfio-ap.rst -F: drivers/s390/crypto/vfio_ap_drv.c -F: drivers/s390/crypto/vfio_ap_ops.c -F: drivers/s390/crypto/vfio_ap_private.h +F: drivers/s390/crypto/vfio_ap* S390 VFIO-CCW DRIVER M: Eric Farman From 68f554b7d250b632d5fa17063f84c618c48b32a8 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Thu, 4 Nov 2021 16:41:37 -0400 Subject: [PATCH 03/69] s390/vfio-ap: add s390dbf logging to the handle_pqap function This patch adds s390dbf logging to the function that handles interception of the PQAP(AQIC) instruction. Several items of data are validated before ultimately calling the functions that execute the PQAP(AQIC) instruction on behalf of the guest to which the queue for which interrupts are being enabled or disabled is attached. Currently, the handle_pqap function sets status response code 01 (queue not available) in the status word that is normally returned from the PQAP(AQIC) instruction under the following conditions: * Set when the function pointer to the handler is not set in the kvm_s390_crypto object (i.e., the PQAP hook is not registered). * Set when the KVM pointer is not set in the ap_matrix_mdev object (i.e., the matrix mdev is not passed through to a guest). * Set when the queue for which interrupts are being enabled or disabled is either not bound to the vfio_ap device driver or not assigned to the matrix mdev. Setting the response code returned to userspace without also logging a message in the kernel makes it impossible to determine whether the response was due to an error detected by the vfio_ap device driver or because the response code was returned by the firmware in response to the PQAP(AQIC) instruction, so this patch logs a message to the s390dbf log for the vfio_ap device driver for each of the situations described above. Signed-off-by: Tony Krowiak Reviewed-by: Matthew Rosato Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/vfio_ap_ops.c | 78 +++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 8 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index abc0b9b88386..537d7dd158a7 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -16,10 +16,12 @@ #include #include #include +#include #include #include #include "vfio_ap_private.h" +#include "vfio_ap_debug.h" #define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough" #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" @@ -256,6 +258,48 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, return status; } +/** + * vfio_ap_le_guid_to_be_uuid - convert a little endian guid array into an array + * of big endian elements that can be passed by + * value to an s390dbf sprintf event function to + * format a UUID string. + * + * @guid: the object containing the little endian guid + * @uuid: a six-element array of long values that can be passed by value as + * arguments for a formatting string specifying a UUID. + * + * The S390 Debug Feature (s390dbf) allows the use of "%s" in the sprintf + * event functions if the memory for the passed string is available as long as + * the debug feature exists. Since a mediated device can be removed at any + * time, it's name can not be used because %s passes the reference to the string + * in memory and the reference will go stale once the device is removed . + * + * The s390dbf string formatting function allows a maximum of 9 arguments for a + * message to be displayed in the 'sprintf' view. In order to use the bytes + * comprising the mediated device's UUID to display the mediated device name, + * they will have to be converted into an array whose elements can be passed by + * value to sprintf. For example: + * + * guid array: { 83, 78, 17, 62, bb, f1, f0, 47, 91, 4d, 32, a2, 2e, 3a, 88, 04 } + * mdev name: 62177883-f1bb-47f0-914d-32a22e3a8804 + * array returned: { 62177883, f1bb, 47f0, 914d, 32a2, 2e3a8804 } + * formatting string: "%08lx-%04lx-%04lx-%04lx-%02lx%04lx" + */ +static void vfio_ap_le_guid_to_be_uuid(guid_t *guid, unsigned long *uuid) +{ + /* + * The input guid is ordered in little endian, so it needs to be + * reordered for displaying a UUID as a string. This specifies the + * guid indices in proper order. + */ + uuid[0] = le32_to_cpup((__le32 *)guid); + uuid[1] = le16_to_cpup((__le16 *)&guid->b[4]); + uuid[2] = le16_to_cpup((__le16 *)&guid->b[6]); + uuid[3] = *((__u16 *)&guid->b[8]); + uuid[4] = *((__u16 *)&guid->b[10]); + uuid[5] = *((__u32 *)&guid->b[12]); +} + /** * handle_pqap - PQAP instruction callback * @@ -281,30 +325,48 @@ static int handle_pqap(struct kvm_vcpu *vcpu) { uint64_t status; uint16_t apqn; + unsigned long uuid[6]; struct vfio_ap_queue *q; struct ap_queue_status qstatus = { .response_code = AP_RESPONSE_Q_NOT_AVAIL, }; struct ap_matrix_mdev *matrix_mdev; - /* If we do not use the AIV facility just go to userland */ - if (!(vcpu->arch.sie_block->eca & ECA_AIV)) - return -EOPNOTSUPP; - apqn = vcpu->run->s.regs.gprs[0] & 0xffff; - mutex_lock(&matrix_dev->lock); - if (!vcpu->kvm->arch.crypto.pqap_hook) + /* If we do not use the AIV facility just go to userland */ + if (!(vcpu->arch.sie_block->eca & ECA_AIV)) { + VFIO_AP_DBF_WARN("%s: AIV facility not installed: apqn=0x%04x, eca=0x%04x\n", + __func__, apqn, vcpu->arch.sie_block->eca); + + return -EOPNOTSUPP; + } + + mutex_lock(&matrix_dev->lock); + if (!vcpu->kvm->arch.crypto.pqap_hook) { + VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n", + __func__, apqn); goto out_unlock; + } + matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook, struct ap_matrix_mdev, pqap_hook); /* If the there is no guest using the mdev, there is nothing to do */ - if (!matrix_mdev->kvm) + if (!matrix_mdev->kvm) { + vfio_ap_le_guid_to_be_uuid(&matrix_mdev->mdev->uuid, uuid); + VFIO_AP_DBF_WARN("%s: mdev %08lx-%04lx-%04lx-%04lx-%04lx%08lx not in use: apqn=0x%04x\n", + __func__, uuid[0], uuid[1], uuid[2], + uuid[3], uuid[4], uuid[5], apqn); goto out_unlock; + } q = vfio_ap_get_queue(matrix_mdev, apqn); - if (!q) + if (!q) { + VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n", + __func__, AP_QID_CARD(apqn), + AP_QID_QUEUE(apqn)); goto out_unlock; + } status = vcpu->run->s.regs.gprs[1]; From 783f0a3ccd79477b159c6ef2104f9cff765215d9 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 4 Jan 2022 15:44:13 -0500 Subject: [PATCH 04/69] s390/vfio-ap: add s390dbf logging to the vfio_ap_irq_enable function This patch adds s390dbf logging to the function that executes the PQAP(AQIC) instruction on behalf of the guest to which the queue for which interrupts are being enabled or disabled is attached. Currently, the vfio_ap_irq_enable function sets status response code 06 (notification indicator byte address (nib) invalid) in the status word when the vfio_pin_pages function - called to pin the page containing the nib - returns an error or a different number of pages pinned than requested. Setting the response code returned to userspace without also logging a message in the kernel makes it impossible to determine whether the response was due to an error detected by the vfio_ap device driver or because the response code was returned by the firmware in response to the PQAP(AQIC) instruction. In addition to logging a warning for the situation above, this patch adds the following: * A function to validate the nib address invoked prior to calling the vfio_pin_pages function. This allows for logging a message informing the reader of the reason the page containing the nib can not be pinned if the nib address is not valid. Response code 06 (invalid nib address) will be set in the status word returned to the guest from the instruction. * Checks the return value from the kvm_s390_gisc_register and logs a message informing the reader of the failure. Status response code 08 (invalid gisa) will be set in the status word returned to the guest from the PQAP(AQIC) instruction. * Checks the status response code returned from execution of the PQAP(AQIC) instruction and if it indicates an error, logs a message informing the reader. Signed-off-by: Tony Krowiak Reviewed-by: Matthew Rosato Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/vfio_ap_ops.c | 77 ++++++++++++++++++++++++++++--- 2 files changed, 72 insertions(+), 6 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 95b577754b35..714c7583af5e 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -47,6 +47,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_RESPONSE_BUSY 0x05 #define AP_RESPONSE_INVALID_ADDRESS 0x06 #define AP_RESPONSE_OTHERWISE_CHANGED 0x07 +#define AP_RESPONSE_INVALID_GISA 0x08 #define AP_RESPONSE_Q_FULL 0x10 #define AP_RESPONSE_NO_PENDING_REPLY 0x10 #define AP_RESPONSE_INDEX_TOO_BIG 0x11 diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 537d7dd158a7..7dc26365e29a 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -185,12 +185,44 @@ end_free: return status; } +/** + * vfio_ap_validate_nib - validate a notification indicator byte (nib) address. + * + * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction. + * @nib: the location for storing the nib address. + * @g_pfn: the location for storing the page frame number of the page containing + * the nib. + * + * When the PQAP(AQIC) instruction is executed, general register 2 contains the + * address of the notification indicator byte (nib) used for IRQ notification. + * This function parses the nib from gr2 and calculates the page frame + * number for the guest of the page containing the nib. The values are + * stored in @nib and @g_pfn respectively. + * + * The g_pfn of the nib is then validated to ensure the nib address is valid. + * + * Return: returns zero if the nib address is a valid; otherwise, returns + * -EINVAL. + */ +static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, unsigned long *nib, + unsigned long *g_pfn) +{ + *nib = vcpu->run->s.regs.gprs[2]; + *g_pfn = *nib >> PAGE_SHIFT; + + if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *g_pfn))) + return -EINVAL; + + return 0; +} + /** * vfio_ap_irq_enable - Enable Interruption for a APQN * * @q: the vfio_ap_queue holding AQIC parameters * @isc: the guest ISC to register with the GIB interface - * @nib: the notification indicator byte to pin. + * @vcpu: the vcpu object containing the registers specifying the parameters + * passed to the PQAP(AQIC) instruction. * * Pin the NIB saved in *q * Register the guest ISC to GIB interface and retrieve the @@ -206,22 +238,36 @@ end_free: */ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, int isc, - unsigned long nib) + struct kvm_vcpu *vcpu) { + unsigned long nib; struct ap_qirq_ctrl aqic_gisa = {}; struct ap_queue_status status = {}; struct kvm_s390_gisa *gisa; + int nisc; struct kvm *kvm; unsigned long h_nib, g_pfn, h_pfn; int ret; - g_pfn = nib >> PAGE_SHIFT; + /* Verify that the notification indicator byte address is valid */ + if (vfio_ap_validate_nib(vcpu, &nib, &g_pfn)) { + VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", + __func__, nib, g_pfn, q->apqn); + + status.response_code = AP_RESPONSE_INVALID_ADDRESS; + return status; + } + ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1, IOMMU_READ | IOMMU_WRITE, &h_pfn); switch (ret) { case 1: break; default: + VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d," + "nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", + __func__, ret, nib, g_pfn, q->apqn); + status.response_code = AP_RESPONSE_INVALID_ADDRESS; return status; } @@ -231,7 +277,17 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK); aqic_gisa.gisc = isc; - aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc); + + nisc = kvm_s390_gisc_register(kvm, isc); + if (nisc < 0) { + VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n", + __func__, nisc, isc, q->apqn); + + status.response_code = AP_RESPONSE_INVALID_GISA; + return status; + } + + aqic_gisa.isc = nisc; aqic_gisa.ir = 1; aqic_gisa.gisa = (uint64_t)gisa >> 4; @@ -255,6 +311,16 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, break; } + if (status.response_code != AP_RESPONSE_NORMAL) { + VFIO_AP_DBF_WARN("%s: PQAP(AQIC) failed with status=%#02x: " + "zone=%#x, ir=%#x, gisc=%#x, f=%#x," + "gisa=%#x, isc=%#x, apqn=%#04x\n", + __func__, status.response_code, + aqic_gisa.zone, aqic_gisa.ir, aqic_gisa.gisc, + aqic_gisa.gf, aqic_gisa.gisa, aqic_gisa.isc, + q->apqn); + } + return status; } @@ -372,8 +438,7 @@ static int handle_pqap(struct kvm_vcpu *vcpu) /* If IR bit(16) is set we enable the interrupt */ if ((status >> (63 - 16)) & 0x01) - qstatus = vfio_ap_irq_enable(q, status & 0x07, - vcpu->run->s.regs.gprs[2]); + qstatus = vfio_ap_irq_enable(q, status & 0x07, vcpu); else qstatus = vfio_ap_irq_disable(q); From f36e7c9845d998d1e4100b46cec9c678bff69a24 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 28 Jan 2022 12:10:57 +0100 Subject: [PATCH 05/69] s390: remove invalid email address of Heiko Carstens Remove my old invalid email address which can be found in a couple of files. Instead of updating it, just remove my contact data completely from source files. We have git and other tools which allow to figure out who is responsible for what with recent contact data. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/boot/head.S | 1 - arch/s390/include/asm/crw.h | 1 - arch/s390/include/asm/nmi.h | 1 - arch/s390/include/asm/sclp.h | 1 - arch/s390/include/asm/smp.h | 1 - arch/s390/kernel/base.S | 3 +-- arch/s390/kernel/cache.c | 1 - arch/s390/kernel/early.c | 1 - arch/s390/kernel/entry.S | 1 - arch/s390/kernel/ftrace.c | 3 +-- arch/s390/kernel/head64.S | 1 - arch/s390/kernel/ipl.c | 1 - arch/s390/kernel/machine_kexec.c | 1 - arch/s390/kernel/mcount.S | 2 -- arch/s390/kernel/nmi.c | 1 - arch/s390/kernel/relocate_kernel.S | 3 +-- arch/s390/kernel/smp.c | 1 - arch/s390/kernel/stacktrace.c | 1 - arch/s390/kernel/topology.c | 1 - arch/s390/kvm/kvm-s390.c | 1 - arch/s390/lib/delay.c | 1 - arch/s390/mm/maccess.c | 2 -- arch/s390/mm/vmem.c | 1 - drivers/s390/char/sclp_cmd.c | 3 +-- drivers/s390/char/sclp_config.c | 1 - drivers/s390/cio/crw.c | 1 - tools/perf/arch/s390/util/dwarf-regs.c | 3 +-- 27 files changed, 5 insertions(+), 34 deletions(-) diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 3a252d140c55..666692429db0 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -5,7 +5,6 @@ * Author(s): Hartmut Penner * Martin Schwidefsky * Rob van der Heij - * Heiko Carstens * * There are 5 different IPL methods * 1) load the image directly into ram at address 0 and do an PSW restart diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h index c6ebfd31f1db..97456d98fe76 100644 --- a/arch/s390/include/asm/crw.h +++ b/arch/s390/include/asm/crw.h @@ -5,7 +5,6 @@ * Author(s): Ingo Adlung , * Martin Schwidefsky , * Cornelia Huck , - * Heiko Carstens , */ #ifndef _ASM_S390_CRW_H diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 55c9051dddfd..292083083830 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -6,7 +6,6 @@ * Author(s): Ingo Adlung , * Martin Schwidefsky , * Cornelia Huck , - * Heiko Carstens , */ #ifndef _ASM_S390_NMI_H diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index c68ea35de498..947ef51526a0 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright IBM Corp. 2007 - * Author(s): Heiko Carstens */ #ifndef _ASM_S390_SCLP_H diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index f16f4d054ae2..7f5d4763357b 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -3,7 +3,6 @@ * Copyright IBM Corp. 1999, 2012 * Author(s): Denis Joseph Barrow, * Martin Schwidefsky , - * Heiko Carstens , */ #ifndef __ASM_SMP_H #define __ASM_SMP_H diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index d255c69c1779..f7fe4033df36 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -3,8 +3,7 @@ * arch/s390/kernel/base.S * * Copyright IBM Corp. 2006, 2007 - * Author(s): Heiko Carstens - * Michael Holzheu + * Author(s): Michael Holzheu */ #include diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index d66825e53fce..8a9c3bf69f48 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -3,7 +3,6 @@ * Extract CPU cache information and expose them via sysfs. * * Copyright IBM Corp. 2012 - * Author(s): Heiko Carstens */ #include diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 3cdf68c53614..b8cfac4918d9 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -2,7 +2,6 @@ /* * Copyright IBM Corp. 2007, 2009 * Author(s): Hongjie Yang , - * Heiko Carstens */ #define KMSG_COMPONENT "setup" diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 01bae1d51113..dc7347e43ec2 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -6,7 +6,6 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * Heiko Carstens */ #include diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 21d62d8b6b9a..61f72c7fb0da 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -4,8 +4,7 @@ * * Copyright IBM Corp. 2009,2014 * - * Author(s): Heiko Carstens , - * Martin Schwidefsky + * Author(s): Martin Schwidefsky */ #include diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 42f9a325a257..d7b8b6ad574d 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -5,7 +5,6 @@ * Author(s): Hartmut Penner * Martin Schwidefsky * Rob van der Heij - * Heiko Carstens * */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 5ad1dde23dc5..a93142785bbc 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -4,7 +4,6 @@ * * Copyright IBM Corp. 2005, 2012 * Author(s): Michael Holzheu - * Heiko Carstens * Volker Sameske */ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index a16467b3825e..088d57a3083f 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -3,7 +3,6 @@ * Copyright IBM Corp. 2005, 2011 * * Author(s): Rolf Adelsberger, - * Heiko Carstens * Michael Holzheu */ diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 39bcc0e39a10..a06eb86c178f 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -2,8 +2,6 @@ /* * Copyright IBM Corp. 2008, 2009 * - * Author(s): Heiko Carstens , - * */ #include diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 651a51914e34..fc60e29b8690 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -6,7 +6,6 @@ * Author(s): Ingo Adlung , * Martin Schwidefsky , * Cornelia Huck , - * Heiko Carstens , */ #include diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S index fe396673e8a6..9438368c3632 100644 --- a/arch/s390/kernel/relocate_kernel.S +++ b/arch/s390/kernel/relocate_kernel.S @@ -2,8 +2,7 @@ /* * Copyright IBM Corp. 2005 * - * Author(s): Rolf Adelsberger, - * Heiko Carstens + * Author(s): Rolf Adelsberger * */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2bad902d8437..b214af5164bd 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -5,7 +5,6 @@ * Copyright IBM Corp. 1999, 2012 * Author(s): Denis Joseph Barrow, * Martin Schwidefsky , - * Heiko Carstens , * * based on other smp stuff by * (c) 1995 Alan Cox, CymruNET Ltd diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index b7bb1981e9ee..7ee455e8e3d5 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -3,7 +3,6 @@ * Stack trace management functions * * Copyright IBM Corp. 2006 - * Author(s): Heiko Carstens */ #include diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 58f8291950cb..c6eecd4a5302 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2011 - * Author(s): Heiko Carstens */ #define KMSG_COMPONENT "cpu" diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 577f1ead6a51..fd65ab42443f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -6,7 +6,6 @@ * * Author(s): Carsten Otte * Christian Borntraeger - * Heiko Carstens * Christian Ehrhardt * Jason J. Herne */ diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index bccbf394ae7e..f7f5adea8940 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -4,7 +4,6 @@ * * Copyright IBM Corp. 1999, 2008 * Author(s): Martin Schwidefsky , - * Heiko Carstens , */ #include diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 9663ce3625bc..c1ed1f51c25d 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -4,8 +4,6 @@ * * Copyright IBM Corp. 2009, 2015 * - * Author(s): Heiko Carstens , - * */ #include diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 7d9705eeb02f..5410775639c5 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2006 - * Author(s): Heiko Carstens */ #include diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 998933e83610..15971997cfe2 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -2,8 +2,7 @@ /* * Copyright IBM Corp. 2007,2012 * - * Author(s): Heiko Carstens , - * Peter Oberparleiter + * Author(s): Peter Oberparleiter */ #define KMSG_COMPONENT "sclp_cmd" diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index c365110f2dae..10383e936461 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007 - * Author(s): Heiko Carstens */ #define KMSG_COMPONENT "sclp_config" diff --git a/drivers/s390/cio/crw.c b/drivers/s390/cio/crw.c index fc285ca41141..7b02a6349c4d 100644 --- a/drivers/s390/cio/crw.c +++ b/drivers/s390/cio/crw.c @@ -6,7 +6,6 @@ * Author(s): Ingo Adlung , * Martin Schwidefsky , * Cornelia Huck , - * Heiko Carstens , */ #include diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c index a8ace5cc6301..dfddb3099bfa 100644 --- a/tools/perf/arch/s390/util/dwarf-regs.c +++ b/tools/perf/arch/s390/util/dwarf-regs.c @@ -3,8 +3,7 @@ * Mapping of DWARF debug register numbers into register names. * * Copyright IBM Corp. 2010, 2017 - * Author(s): Heiko Carstens , - * Hendrik Brueckner + * Author(s): Hendrik Brueckner * */ From 98c0d24d1e7576a853b0812d95e599ba1a909e21 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sun, 23 Jan 2022 20:20:09 +0100 Subject: [PATCH 06/69] s390/ftrace: verify opcode before applying patch commit 72b3942a173c ("scripts: ftrace - move the sort-processing in ftrace_init") had the unexpected side effect that wrong code locations were patched. To prevent this from happening again, verify the opcode before patching it. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ftrace.c | 43 ++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 61f72c7fb0da..1db9cc795034 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -164,31 +164,32 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return 0; } -static void brcl_disable(void *brcl) +static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable) { - u8 op = 0x04; /* set mask field to zero */ + u16 old; + u8 op; - s390_kernel_write((char *)brcl + 1, &op, sizeof(op)); + if (get_kernel_nofault(old, addr)) + return -EFAULT; + if (old != expected) + return -EINVAL; + /* set mask field to all ones or zeroes */ + op = enable ? 0xf4 : 0x04; + s390_kernel_write((char *)addr + 1, &op, sizeof(op)); + return 0; } int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - brcl_disable((void *)rec->ip); - return 0; -} - -static void brcl_enable(void *brcl) -{ - u8 op = 0xf4; /* set mask field to all ones */ - - s390_kernel_write((char *)brcl + 1, &op, sizeof(op)); + /* Expect brcl 0xf,... */ + return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false); } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - brcl_enable((void *)rec->ip); - return 0; + /* Expect brcl 0x0,... */ + return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true); } int ftrace_update_ftrace_func(ftrace_func_t func) @@ -261,14 +262,24 @@ NOKPROBE_SYMBOL(prepare_ftrace_return); */ int ftrace_enable_ftrace_graph_caller(void) { - brcl_disable(ftrace_graph_caller); + int rc; + + /* Expect brc 0xf,... */ + rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false); + if (rc) + return rc; text_poke_sync_lock(); return 0; } int ftrace_disable_ftrace_graph_caller(void) { - brcl_enable(ftrace_graph_caller); + int rc; + + /* Expect brc 0x0,... */ + rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true); + if (rc) + return rc; text_poke_sync_lock(); return 0; } From 1f231e295024d88950c7e4b91f91a47bbeff1637 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 21 Jan 2022 10:44:25 +0100 Subject: [PATCH 07/69] s390/maccess: fix absolute lowcore virtual vs physical address confusion Due to historical reasons memcpy_absolute() and friend functions misuse the notion of physical vs virtual addresses difference. Note: this does not fix a bug currently, since virtual and physical addresses are identical. Reviewed-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/mm/maccess.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index c1ed1f51c25d..d4d311cb2bb5 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -199,15 +199,15 @@ out: /* * Check if physical address is within prefix or zero page */ -static int is_swapped(unsigned long addr) +static int is_swapped(phys_addr_t addr) { - unsigned long lc; + phys_addr_t lc; int cpu; if (addr < sizeof(struct lowcore)) return 1; for_each_online_cpu(cpu) { - lc = (unsigned long) lowcore_ptr[cpu]; + lc = virt_to_phys(lowcore_ptr[cpu]); if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc) continue; return 1; @@ -223,7 +223,8 @@ static int is_swapped(unsigned long addr) */ void *xlate_dev_mem_ptr(phys_addr_t addr) { - void *bounce = (void *) addr; + void *ptr = phys_to_virt(addr); + void *bounce = ptr; unsigned long size; cpus_read_lock(); @@ -232,7 +233,7 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) size = PAGE_SIZE - (addr & ~PAGE_MASK); bounce = (void *) __get_free_page(GFP_ATOMIC); if (bounce) - memcpy_absolute(bounce, (void *) addr, size); + memcpy_absolute(bounce, ptr, size); } preempt_enable(); cpus_read_unlock(); @@ -242,8 +243,8 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) /* * Free converted buffer for /dev/mem access (if necessary) */ -void unxlate_dev_mem_ptr(phys_addr_t addr, void *buf) +void unxlate_dev_mem_ptr(phys_addr_t addr, void *ptr) { - if ((void *) addr != buf) - free_page((unsigned long) buf); + if (addr != virt_to_phys(ptr)) + free_page((unsigned long)ptr); } From 628c66942e233d73def54fa458641cfc96be6660 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 26 Jan 2022 13:47:58 +0100 Subject: [PATCH 08/69] s390/sclp_sdias: fix sclp_sdias_copy() virtual vs physical address confusion Due to historical reasons sclp_sdias_copy() misuses the notion of physical vs virtual addresses difference. Note: this does not fix a bug currently, since virtual and physical addresses are identical. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- drivers/s390/char/sclp_sdias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c index 215d4b4a5ff5..e915a343fcf5 100644 --- a/drivers/s390/char/sclp_sdias.c +++ b/drivers/s390/char/sclp_sdias.c @@ -184,7 +184,7 @@ int sclp_sdias_copy(void *dest, int start_blk, int nr_blks) sccb->evbuf.asa_size = SDIAS_ASA_SIZE_64; sccb->evbuf.event_status = 0; sccb->evbuf.blk_cnt = nr_blks; - sccb->evbuf.asa = (unsigned long)dest; + sccb->evbuf.asa = __pa(dest); sccb->evbuf.fbn = start_blk; sccb->evbuf.lbn = 0; sccb->evbuf.dbs = 1; From 9de209c7d584d6e06ad92f120d83d4f27c200497 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 26 Jan 2022 13:47:59 +0100 Subject: [PATCH 09/69] s390/dump: fix os_info virtual vs physical address confusion Due to historical reasons os_info handling functions misuse the notion of physical vs virtual addresses difference. Note: this does not fix a bug currently, since virtual and physical addresses are identical. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/kernel/os_info.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 4bef35b79b93..fa2be71b2c6b 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -45,7 +45,7 @@ void os_info_crashkernel_add(unsigned long base, unsigned long size) */ void os_info_entry_add(int nr, void *ptr, u64 size) { - os_info.entry[nr].addr = (u64)(unsigned long)ptr; + os_info.entry[nr].addr = __pa(ptr); os_info.entry[nr].size = size; os_info.entry[nr].csum = (__force u32)csum_partial(ptr, size, 0); os_info.csum = os_info_csum(&os_info); @@ -62,7 +62,7 @@ void __init os_info_init(void) os_info.version_minor = OS_INFO_VERSION_MINOR; os_info.magic = OS_INFO_MAGIC; os_info.csum = os_info_csum(&os_info); - mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr); + mem_assign_absolute(S390_lowcore.os_info, __pa(ptr)); } #ifdef CONFIG_CRASH_DUMP From ba2d394c60ad4b5d70cd449b9e6f6380c2e36a50 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 3 Feb 2022 10:56:07 +0100 Subject: [PATCH 10/69] s390/lgr: use simple assignment instead of memcpy It is quite pointless to use memcpy to copy two bytes, besides that this construct will also partially remove type and size sanity checks. Therefore simply use an assignment. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/lgr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index 3b895971c3d0..6652e54cf3db 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -88,8 +88,7 @@ static void lgr_stsi_2_2_2(struct lgr_info *lgr_info) if (stsi(si, 2, 2, 2)) return; cpascii(lgr_info->name, si->name, sizeof(si->name)); - memcpy(&lgr_info->lpar_number, &si->lpar_number, - sizeof(lgr_info->lpar_number)); + lgr_info->lpar_number = si->lpar_number; } /* From dc306186a130c6d9feb0aabc1c71b8ed1674a3bf Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sat, 29 Jan 2022 08:38:56 +0100 Subject: [PATCH 11/69] s390/dump: fix old lowcore virtual vs physical address confusion Virtual addresses of vmcore_info and os_info members are wrongly passed to copy_oldmem_kernel(), while the function expects physical address of the source. Instead, __pa() macro should have been applied. Yet, use of __pa() macro could be somehow confusing, since copy_oldmem_kernel() may treat the source as an offset, not as a direct physical address (that depens from the oldmem availability and location). Fix the virtual vs physical address confusion and make the way the old lowcore is read consistent across all sources. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/kernel/asm-offsets.c | 2 ++ arch/s390/kernel/crash_dump.c | 2 +- arch/s390/kernel/os_info.c | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 8e00bb228662..a496b08ea5d1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -131,6 +131,8 @@ int main(void) OFFSET(__LC_LAST_BREAK, lowcore, last_break); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); + OFFSET(__LC_VMCORE_INFO, lowcore, vmcore_info); + OFFSET(__LC_OS_INFO, lowcore, os_info); /* hardware defined lowcore locations 0x1000 - 0x18ff */ OFFSET(__LC_MCESAD, lowcore, mcesad); OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index af8202121642..a62bee83a88b 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -432,7 +432,7 @@ static void *get_vmcoreinfo_old(unsigned long *size) Elf64_Nhdr note; void *addr; - if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + if (copy_oldmem_kernel(&addr, (void *)__LC_VMCORE_INFO, sizeof(addr))) return NULL; memset(nt_name, 0, sizeof(nt_name)); if (copy_oldmem_kernel(¬e, addr, sizeof(note))) diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index fa2be71b2c6b..a2b08d3f53ec 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -15,6 +15,7 @@ #include #include #include +#include /* * OS info structure has to be page aligned @@ -123,7 +124,7 @@ static void os_info_old_init(void) return; if (!oldmem_data.start) goto fail; - if (copy_oldmem_kernel(&addr, &S390_lowcore.os_info, sizeof(addr))) + if (copy_oldmem_kernel(&addr, (void *)__LC_OS_INFO, sizeof(addr))) goto fail; if (addr == 0 || addr % PAGE_SIZE) goto fail; From 303fd988ed644c7daa260410f3ac99266573557d Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sat, 29 Jan 2022 09:24:50 +0100 Subject: [PATCH 12/69] s390/maccess: fix semantics of memcpy_real() and its callers There is a confusion with regard to the source address of memcpy_real() and calling functions. While the declared type for a source assumes a virtual address, in fact it always called with physical address of the source. This confusion led to bugs in copy_oldmem_kernel() and copy_oldmem_user() functions, where __pa() macro applied mistakenly to physical addresses. It does not lead to a real issue, since virtual and physical addresses are currently the same. Fix both the bugs and memcpy_real() prototype by making type of source address consistent to the function name and the way it actually used. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/os_info.h | 2 +- arch/s390/include/asm/processor.h | 2 +- arch/s390/include/asm/uaccess.h | 2 +- arch/s390/kernel/crash_dump.c | 58 +++++++++++++++---------------- arch/s390/kernel/os_info.c | 7 ++-- arch/s390/kernel/smp.c | 2 +- arch/s390/mm/maccess.c | 4 +-- drivers/s390/char/zcore.c | 3 +- 8 files changed, 38 insertions(+), 42 deletions(-) diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h index 3c89279d2a4b..147a8d547ef9 100644 --- a/arch/s390/include/asm/os_info.h +++ b/arch/s390/include/asm/os_info.h @@ -39,7 +39,7 @@ u32 os_info_csum(struct os_info *os_info); #ifdef CONFIG_CRASH_DUMP void *os_info_old_entry(int nr, unsigned long *size); -int copy_oldmem_kernel(void *dst, void *src, size_t count); +int copy_oldmem_kernel(void *dst, unsigned long src, size_t count); #else static inline void *os_info_old_entry(int nr, unsigned long *size) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 5581b64a4236..8fd9772c7370 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -317,7 +317,7 @@ extern void (*s390_base_pgm_handler_fn)(void); #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL -extern int memcpy_real(void *, void *, size_t); +extern int memcpy_real(void *, unsigned long, size_t); extern void memcpy_absolute(void *, void *, size_t); #define mem_assign_absolute(dest, val) do { \ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index d74e26b48604..f14f4ade15a9 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -279,7 +279,7 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return __clear_user(to, n); } -int copy_to_user_real(void __user *dest, void *src, unsigned long count); +int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count); void *s390_kernel_write(void *dst, const void *src, size_t size); #define HAVE_GET_KERNEL_NOFAULT diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index a62bee83a88b..69819b765250 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -132,28 +132,27 @@ static inline void *load_real_addr(void *addr) /* * Copy memory of the old, dumped system to a kernel space virtual address */ -int copy_oldmem_kernel(void *dst, void *src, size_t count) +int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) { - unsigned long from, len; + unsigned long len; void *ra; int rc; while (count) { - from = __pa(src); - if (!oldmem_data.start && from < sclp.hsa_size) { + if (!oldmem_data.start && src < sclp.hsa_size) { /* Copy from zfcp/nvme dump HSA area */ - len = min(count, sclp.hsa_size - from); - rc = memcpy_hsa_kernel(dst, from, len); + len = min(count, sclp.hsa_size - src); + rc = memcpy_hsa_kernel(dst, src, len); if (rc) return rc; } else { /* Check for swapped kdump oldmem areas */ - if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) { - from -= oldmem_data.start; - len = min(count, oldmem_data.size - from); - } else if (oldmem_data.start && from < oldmem_data.size) { - len = min(count, oldmem_data.size - from); - from += oldmem_data.start; + if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) { + src -= oldmem_data.start; + len = min(count, oldmem_data.size - src); + } else if (oldmem_data.start && src < oldmem_data.size) { + len = min(count, oldmem_data.size - src); + src += oldmem_data.start; } else { len = count; } @@ -163,7 +162,7 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) } else { ra = dst; } - if (memcpy_real(ra, (void *) from, len)) + if (memcpy_real(ra, src, len)) return -EFAULT; } dst += len; @@ -176,31 +175,30 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) /* * Copy memory of the old, dumped system to a user space virtual address */ -static int copy_oldmem_user(void __user *dst, void *src, size_t count) +static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count) { - unsigned long from, len; + unsigned long len; int rc; while (count) { - from = __pa(src); - if (!oldmem_data.start && from < sclp.hsa_size) { + if (!oldmem_data.start && src < sclp.hsa_size) { /* Copy from zfcp/nvme dump HSA area */ - len = min(count, sclp.hsa_size - from); - rc = memcpy_hsa_user(dst, from, len); + len = min(count, sclp.hsa_size - src); + rc = memcpy_hsa_user(dst, src, len); if (rc) return rc; } else { /* Check for swapped kdump oldmem areas */ - if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) { - from -= oldmem_data.start; - len = min(count, oldmem_data.size - from); - } else if (oldmem_data.start && from < oldmem_data.size) { - len = min(count, oldmem_data.size - from); - from += oldmem_data.start; + if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) { + src -= oldmem_data.start; + len = min(count, oldmem_data.size - src); + } else if (oldmem_data.start && src < oldmem_data.size) { + len = min(count, oldmem_data.size - src); + src += oldmem_data.start; } else { len = count; } - rc = copy_to_user_real(dst, (void *) from, count); + rc = copy_to_user_real(dst, src, count); if (rc) return rc; } @@ -217,12 +215,12 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count) ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, unsigned long offset, int userbuf) { - void *src; + unsigned long src; int rc; if (!csize) return 0; - src = (void *) (pfn << PAGE_SHIFT) + offset; + src = pfn_to_phys(pfn) + offset; if (userbuf) rc = copy_oldmem_user((void __force __user *) buf, src, csize); else @@ -429,10 +427,10 @@ static void *nt_prpsinfo(void *ptr) static void *get_vmcoreinfo_old(unsigned long *size) { char nt_name[11], *vmcoreinfo; + unsigned long addr; Elf64_Nhdr note; - void *addr; - if (copy_oldmem_kernel(&addr, (void *)__LC_VMCORE_INFO, sizeof(addr))) + if (copy_oldmem_kernel(&addr, __LC_VMCORE_INFO, sizeof(addr))) return NULL; memset(nt_name, 0, sizeof(nt_name)); if (copy_oldmem_kernel(¬e, addr, sizeof(note))) diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index a2b08d3f53ec..6b5b64e67eee 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -91,7 +91,7 @@ static void os_info_old_alloc(int nr, int align) goto fail; } buf_align = PTR_ALIGN(buf, align); - if (copy_oldmem_kernel(buf_align, (void *) addr, size)) { + if (copy_oldmem_kernel(buf_align, addr, size)) { msg = "copy failed"; goto fail_free; } @@ -124,15 +124,14 @@ static void os_info_old_init(void) return; if (!oldmem_data.start) goto fail; - if (copy_oldmem_kernel(&addr, (void *)__LC_OS_INFO, sizeof(addr))) + if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr))) goto fail; if (addr == 0 || addr % PAGE_SIZE) goto fail; os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL); if (!os_info_old) goto fail; - if (copy_oldmem_kernel(os_info_old, (void *) addr, - sizeof(*os_info_old))) + if (copy_oldmem_kernel(os_info_old, addr, sizeof(*os_info_old))) goto fail_free; if (os_info_old->magic != OS_INFO_MAGIC) goto fail_free; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b214af5164bd..4f0e9f412f27 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -670,7 +670,7 @@ static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr, bool is_boot_cpu, void *regs) { if (is_boot_cpu) - copy_oldmem_kernel(regs, (void *) __LC_FPREGS_SAVE_AREA, 512); + copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); else __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(regs)); save_area_add_regs(sa, regs); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index d4d311cb2bb5..4cc5020f4e18 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -121,7 +121,7 @@ static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest, /* * Copy memory in real mode (kernel to kernel) */ -int memcpy_real(void *dest, void *src, size_t count) +int memcpy_real(void *dest, unsigned long src, size_t count) { unsigned long _dest = (unsigned long)dest; unsigned long _src = (unsigned long)src; @@ -173,7 +173,7 @@ void memcpy_absolute(void *dest, void *src, size_t count) /* * Copy memory from kernel (real) to user (virtual) */ -int copy_to_user_real(void __user *dest, void *src, unsigned long count) +int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count) { int offs = 0, size, rc; char *buf; diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 3ba2d934a3e8..516783ba950f 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -229,8 +229,7 @@ static int __init zcore_reipl_init(void) rc = memcpy_hsa_kernel(zcore_ipl_block, ipib_info.ipib, PAGE_SIZE); else - rc = memcpy_real(zcore_ipl_block, (void *) ipib_info.ipib, - PAGE_SIZE); + rc = memcpy_real(zcore_ipl_block, ipib_info.ipib, PAGE_SIZE); if (rc || (__force u32)csum_partial(zcore_ipl_block, zcore_ipl_block->hdr.len, 0) != ipib_info.checksum) { TRACE("Checksum does not match\n"); From f413f685c6c094a7b968f66ca2e8512720807203 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 7 Feb 2022 14:02:18 +0100 Subject: [PATCH 13/69] s390/mm: use CRST_ALLOC_ORDER instead of number Use CRST_ALLOC_ORDER to make it more obvious what the order means, and also to be consistent with other code, e.g. the vmemmap code. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/pgalloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index fd35c1a0213b..2de48b2c1b04 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -53,17 +53,17 @@ __initcall(page_table_register_sysctl); unsigned long *crst_table_alloc(struct mm_struct *mm) { - struct page *page = alloc_pages(GFP_KERNEL, 2); + struct page *page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER); if (!page) return NULL; - arch_set_page_dat(page, 2); + arch_set_page_dat(page, CRST_ALLOC_ORDER); return (unsigned long *) page_to_virt(page); } void crst_table_free(struct mm_struct *mm, unsigned long *table) { - free_pages((unsigned long) table, 2); + free_pages((unsigned long)table, CRST_ALLOC_ORDER); } static void __crst_table_upgrade(void *arg) @@ -403,7 +403,7 @@ void __tlb_remove_table(void *_table) switch (half) { case 0x00U: /* pmd, pud, or p4d */ - free_pages((unsigned long) table, 2); + free_pages((unsigned long)table, CRST_ALLOC_ORDER); return; case 0x01U: /* lower 2K of a 4K page table */ case 0x02U: /* higher 2K of a 4K page table */ From 42b01a553a56d9bc7c75b700fd274f1ec4a3763f Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sat, 29 Jan 2022 00:34:13 +0100 Subject: [PATCH 14/69] s390: always use the packed stack layout -mpacked-stack option has been supported by both minimum gcc and clang versions for a while. With commit e2bc3e91d91e ("scripts/min-tool-version.sh: Raise minimum clang version to 13.0.0 for s390") minimum clang version now also supports a combination of flags -mpacked-stack -mbackchain -pg -mfentry and fulfills all requirements to always enable the packed stack layout. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 16 +--------------- arch/s390/Makefile | 9 ++------- arch/s390/include/asm/stacktrace.h | 10 +--------- arch/s390/kernel/mcount.S | 6 +----- arch/s390/net/bpf_jit_comp.c | 1 - 5 files changed, 5 insertions(+), 37 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index be9f39fd06df..a492376d6e3f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -155,7 +155,7 @@ config S390 select HAVE_DYNAMIC_FTRACE_WITH_ARGS select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_DYNAMIC_FTRACE_WITH_REGS - select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES + select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP select HAVE_FENTRY @@ -656,20 +656,6 @@ config MAX_PHYSMEM_BITS Increasing the number of bits also increases the kernel image size. By default 46 bits (64TB) are supported. -config PACK_STACK - def_bool y - prompt "Pack kernel stack" - help - This option enables the compiler option -mkernel-backchain if it - is available. If the option is available the compiler supports - the new stack layout which dramatically reduces the minimum stack - frame size. With an old compiler a non-leaf function needs a - minimum of 96 bytes on 31 bit and 160 bytes on 64 bit. With - -mkernel-backchain the minimum size drops to 16 byte on 31 bit - and 24 byte on 64 bit. - - Say Y if you are unsure. - config CHECK_STACK def_bool y depends on !VMAP_STACK diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 609e3697324b..2edf25b44c4b 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -21,7 +21,7 @@ endif aflags_dwarf := -Wa,-gdwarf-2 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) -KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 +KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables @@ -68,11 +68,6 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include # cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls -ifneq ($(call cc-option,-mpacked-stack -mbackchain -msoft-float),) -cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK -aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -endif - KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) @@ -111,7 +106,7 @@ endif # Test CFI features of binutils cfi := $(call as-instr,.cfi_startproc\n.cfi_val_offset 15$(comma)-160\n.cfi_endproc,-DCONFIG_AS_CFI_VAL_OFFSET=1) -KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y) +KBUILD_CFLAGS += -mpacked-stack -mbackchain -msoft-float $(cflags-y) KBUILD_CFLAGS += -pipe -Wno-sign-compare KBUILD_CFLAGS += -fno-asynchronous-unwind-tables $(cfi) KBUILD_AFLAGS += $(aflags-y) $(cfi) diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index dd00d98804ec..275f4258fbd5 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -36,22 +36,14 @@ static inline bool on_stack(struct stack_info *info, /* * Stack layout of a C stack frame. + * Kernel uses the packed stack layout (-mpacked-stack). */ -#ifndef __PACK_STACK -struct stack_frame { - unsigned long back_chain; - unsigned long empty1[5]; - unsigned long gprs[10]; - unsigned int empty2[8]; -}; -#else struct stack_frame { unsigned long empty1[5]; unsigned int empty2[8]; unsigned long gprs[10]; unsigned long back_chain; }; -#endif /* * Unlike current_stack_pointer() which simply returns current value of %r15 diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 9d90b8bc6692..b88205224f3c 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -26,12 +26,8 @@ ENDPROC(ftrace_stub) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) #define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2) #define STACK_PTREGS_FLAGS (STACK_PTREGS + __PT_FLAGS) -#ifdef __PACK_STACK -/* allocate just enough for r14, r15 and backchain */ +/* packed stack: allocate just enough for r14, r15 and backchain */ #define TRACED_FUNC_FRAME_SIZE 24 -#else -#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD -#endif .macro ftrace_regs_entry, allregs=0 stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 9ff2bd83aad7..df5d4da06643 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -7,7 +7,6 @@ * - HAVE_MARCH_Z196_FEATURES: laal, laalg * - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj * - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf - * - PACK_STACK * - 64BIT * * Copyright IBM Corp. 2012,2015 From 81eac9079663bba7020b58c896baca839a6af8f0 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 1 Feb 2022 19:54:22 +0100 Subject: [PATCH 15/69] s390/test_unwind: show tests as skipped if unsupported Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index bc7973359ae2..653bf170ee50 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -136,7 +136,6 @@ static __always_inline unsigned long get_psw_addr(void) return psw_addr; } -#ifdef CONFIG_KPROBES static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct unwindme *u = unwindme; @@ -145,7 +144,6 @@ static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) (u->flags & UWM_SP) ? u->sp : 0); return 0; } -#endif /* This function may or may not appear in the backtrace. */ static noinline int unwindme_func4(struct unwindme *u) @@ -157,11 +155,13 @@ static noinline int unwindme_func4(struct unwindme *u) wait_event(u->task_wq, kthread_should_park()); kthread_parkme(); return 0; -#ifdef CONFIG_KPROBES } else if (u->flags & UWM_PGM) { struct kprobe kp; int ret; + if (!IS_ENABLED(CONFIG_KPROBES)) + kunit_skip(current_test, "requires CONFIG_KPROBES"); + unwindme = u; memset(&kp, 0, sizeof(kp)); kp.symbol_name = "do_report_trap"; @@ -185,7 +185,6 @@ static noinline int unwindme_func4(struct unwindme *u) unregister_kprobe(&kp); unwindme = NULL; return u->ret; -#endif } else { struct pt_regs regs; @@ -327,7 +326,6 @@ static const struct test_params param_list[] = { .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS"}, {.flags = UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK, .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK"}, - #ifdef CONFIG_KPROBES {.flags = UWM_PGM, .name = "UWM_PGM"}, {.flags = UWM_PGM | UWM_SP, .name = "UWM_PGM | UWM_SP"}, @@ -335,7 +333,6 @@ static const struct test_params param_list[] = { .name = "UWM_PGM | UWM_REGS"}, {.flags = UWM_PGM | UWM_SP | UWM_REGS, .name = "UWM_PGM | UWM_SP | UWM_REGS"}, - #endif }; /* From 93bd3232448f699f47d06590aeb56edfebab4495 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 31 Jan 2022 19:00:56 +0100 Subject: [PATCH 16/69] s390/test_unwind: minor cleanup - make current_test static - use current_test consistently - add TEST_WITH_FLAGS macro to contract parametrized tests definition Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 72 +++++++++++++++---------------------- 1 file changed, 28 insertions(+), 44 deletions(-) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 653bf170ee50..744b47692399 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -16,7 +16,7 @@ #include #include -struct kunit *current_test; +static struct kunit *current_test; #define BT_BUF_SIZE (PAGE_SIZE * 4) @@ -254,7 +254,7 @@ static int test_unwind_irq(struct unwindme *u) } /* Spawns a task and passes it to test_unwind(). */ -static int test_unwind_task(struct kunit *test, struct unwindme *u) +static int test_unwind_task(struct unwindme *u) { struct task_struct *task; int ret; @@ -269,7 +269,7 @@ static int test_unwind_task(struct kunit *test, struct unwindme *u) */ task = kthread_run(unwindme_func1, u, "%s", __func__); if (IS_ERR(task)) { - kunit_err(test, "kthread_run() failed\n"); + kunit_err(current_test, "kthread_run() failed\n"); return PTR_ERR(task); } /* @@ -292,47 +292,31 @@ struct test_params { /* * Create required parameter list for tests */ +#define TEST_WITH_FLAGS(f) { .flags = f, .name = #f } static const struct test_params param_list[] = { - {.flags = UWM_DEFAULT, .name = "UWM_DEFAULT"}, - {.flags = UWM_SP, .name = "UWM_SP"}, - {.flags = UWM_REGS, .name = "UWM_REGS"}, - {.flags = UWM_SWITCH_STACK, - .name = "UWM_SWITCH_STACK"}, - {.flags = UWM_SP | UWM_REGS, - .name = "UWM_SP | UWM_REGS"}, - {.flags = UWM_CALLER | UWM_SP, - .name = "WM_CALLER | UWM_SP"}, - {.flags = UWM_CALLER | UWM_SP | UWM_REGS, - .name = "UWM_CALLER | UWM_SP | UWM_REGS"}, - {.flags = UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK, - .name = "UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK"}, - {.flags = UWM_THREAD, .name = "UWM_THREAD"}, - {.flags = UWM_THREAD | UWM_SP, - .name = "UWM_THREAD | UWM_SP"}, - {.flags = UWM_THREAD | UWM_CALLER | UWM_SP, - .name = "UWM_THREAD | UWM_CALLER | UWM_SP"}, - {.flags = UWM_IRQ, .name = "UWM_IRQ"}, - {.flags = UWM_IRQ | UWM_SWITCH_STACK, - .name = "UWM_IRQ | UWM_SWITCH_STACK"}, - {.flags = UWM_IRQ | UWM_SP, - .name = "UWM_IRQ | UWM_SP"}, - {.flags = UWM_IRQ | UWM_REGS, - .name = "UWM_IRQ | UWM_REGS"}, - {.flags = UWM_IRQ | UWM_SP | UWM_REGS, - .name = "UWM_IRQ | UWM_SP | UWM_REGS"}, - {.flags = UWM_IRQ | UWM_CALLER | UWM_SP, - .name = "UWM_IRQ | UWM_CALLER | UWM_SP"}, - {.flags = UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS, - .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS"}, - {.flags = UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK, - .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK"}, - {.flags = UWM_PGM, .name = "UWM_PGM"}, - {.flags = UWM_PGM | UWM_SP, - .name = "UWM_PGM | UWM_SP"}, - {.flags = UWM_PGM | UWM_REGS, - .name = "UWM_PGM | UWM_REGS"}, - {.flags = UWM_PGM | UWM_SP | UWM_REGS, - .name = "UWM_PGM | UWM_SP | UWM_REGS"}, + TEST_WITH_FLAGS(UWM_DEFAULT), + TEST_WITH_FLAGS(UWM_SP), + TEST_WITH_FLAGS(UWM_REGS), + TEST_WITH_FLAGS(UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_CALLER | UWM_SP), + TEST_WITH_FLAGS(UWM_CALLER | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_THREAD), + TEST_WITH_FLAGS(UWM_THREAD | UWM_SP), + TEST_WITH_FLAGS(UWM_THREAD | UWM_CALLER | UWM_SP), + TEST_WITH_FLAGS(UWM_IRQ), + TEST_WITH_FLAGS(UWM_IRQ | UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_IRQ | UWM_SP), + TEST_WITH_FLAGS(UWM_IRQ | UWM_REGS), + TEST_WITH_FLAGS(UWM_IRQ | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP), + TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK), + TEST_WITH_FLAGS(UWM_PGM), + TEST_WITH_FLAGS(UWM_PGM | UWM_SP), + TEST_WITH_FLAGS(UWM_PGM | UWM_REGS), + TEST_WITH_FLAGS(UWM_PGM | UWM_SP | UWM_REGS), }; /* @@ -357,7 +341,7 @@ static void test_unwind_flags(struct kunit *test) params = (const struct test_params *)test->param_value; u.flags = params->flags; if (u.flags & UWM_THREAD) - KUNIT_EXPECT_EQ(test, 0, test_unwind_task(test, &u)); + KUNIT_EXPECT_EQ(test, 0, test_unwind_task(&u)); else if (u.flags & UWM_IRQ) KUNIT_EXPECT_EQ(test, 0, test_unwind_irq(&u)); else From 8a0c9705502701cc6a5d87d70bc6b631ec939265 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 1 Feb 2022 21:04:16 +0100 Subject: [PATCH 17/69] s390/test_unwind: add "backtrace" module parameter By default no backtraces are printed when a test succeeds, but sometimes it is useful to spot issues automated test doesn't cover. Add "backtrace" module parameter to force it. Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 744b47692399..8be9ca263127 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -20,6 +20,10 @@ static struct kunit *current_test; #define BT_BUF_SIZE (PAGE_SIZE * 4) +static bool force_bt; +module_param_named(backtrace, force_bt, bool, 0444); +MODULE_PARM_DESC(backtrace, "print backtraces for all tests"); + /* * To avoid printk line limit split backtrace by lines */ @@ -98,7 +102,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, kunit_err(current_test, "Maximum number of frames exceeded\n"); ret = -EINVAL; } - if (ret) + if (ret || force_bt) print_backtrace(bt); kfree(bt); return ret; From 829ec7491c401e4a3068955a438578d2c2ae8acc Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 31 Jan 2022 19:06:52 +0100 Subject: [PATCH 18/69] s390/test_unwind: add ftrace test Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 59 +++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 8be9ca263127..2224694a0888 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,7 @@ static struct unwindme *unwindme; #define UWM_SWITCH_STACK 0x10 /* Use call_on_stack. */ #define UWM_IRQ 0x20 /* Unwind from irq context. */ #define UWM_PGM 0x40 /* Unwind from program check handler. */ +#define UWM_FTRACE 0x80 /* Unwind from ftrace handler. */ static __always_inline unsigned long get_psw_addr(void) { @@ -149,6 +151,57 @@ static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +static void notrace __used test_unwind_ftrace_handler(unsigned long ip, + unsigned long parent_ip, + struct ftrace_ops *fops, + struct ftrace_regs *fregs) +{ + struct unwindme *u = (struct unwindme *)fregs->regs.gprs[2]; + + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &fregs->regs : NULL, + (u->flags & UWM_SP) ? u->sp : 0); +} + +static noinline int test_unwind_ftraced_func(struct unwindme *u) +{ + return READ_ONCE(u)->ret; +} + +static int test_unwind_ftrace(struct unwindme *u) +{ + struct ftrace_ops *fops; + int ret; + +#ifndef CONFIG_DYNAMIC_FTRACE + kunit_skip(current_test, "requires CONFIG_DYNAMIC_FTRACE"); + fops = NULL; /* used */ +#else + fops = kunit_kzalloc(current_test, sizeof(*fops), GFP_KERNEL); + fops->func = test_unwind_ftrace_handler; + fops->flags = FTRACE_OPS_FL_DYNAMIC | + FTRACE_OPS_FL_RECURSION | + FTRACE_OPS_FL_SAVE_REGS | + FTRACE_OPS_FL_PERMANENT; +#endif + + ret = ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 0, 0); + if (ret) { + kunit_err(current_test, "failed to set ftrace filter (%d)\n", ret); + return -1; + } + + ret = register_ftrace_function(fops); + if (!ret) { + ret = test_unwind_ftraced_func(u); + unregister_ftrace_function(fops); + } else { + kunit_err(current_test, "failed to register ftrace handler (%d)\n", ret); + } + + ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 1, 0); + return ret; +} + /* This function may or may not appear in the backtrace. */ static noinline int unwindme_func4(struct unwindme *u) { @@ -189,6 +242,8 @@ static noinline int unwindme_func4(struct unwindme *u) unregister_kprobe(&kp); unwindme = NULL; return u->ret; + } else if (u->flags & UWM_FTRACE) { + return test_unwind_ftrace(u); } else { struct pt_regs regs; @@ -321,6 +376,10 @@ static const struct test_params param_list[] = { TEST_WITH_FLAGS(UWM_PGM | UWM_SP), TEST_WITH_FLAGS(UWM_PGM | UWM_REGS), TEST_WITH_FLAGS(UWM_PGM | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_FTRACE), + TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP), + TEST_WITH_FLAGS(UWM_FTRACE | UWM_REGS), + TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP | UWM_REGS), }; /* From 9ba142f472c1e4ec514f3bad1134b6b6ad8f6c13 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 3 Feb 2022 00:49:41 +0100 Subject: [PATCH 19/69] s390/test_unwind: fix and extend kprobes test Running kprobe test on a kernel built with clang 14 didn't actually trigger pgm_pre_handler() and no unwinder code was called. Even though do_report_trap() is a global symbol, clang inlined it in several local functions including illegal_op() handler, so that kprobbing a global symbol didn't have a desired effect. To achieve the same test result (unwinding from a program check handler) introduce a local function and probe an instruction in the middle, so that kprobe doesn't take KPROBE_ON_FTRACE path. While at it, add another test for KPROBE_ON_FTRACE. Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 83 ++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 2224694a0888..b209014ce426 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -129,8 +129,9 @@ static struct unwindme *unwindme; #define UWM_CALLER 0x8 /* Unwind starting from caller. */ #define UWM_SWITCH_STACK 0x10 /* Use call_on_stack. */ #define UWM_IRQ 0x20 /* Unwind from irq context. */ -#define UWM_PGM 0x40 /* Unwind from program check handler. */ -#define UWM_FTRACE 0x80 /* Unwind from ftrace handler. */ +#define UWM_PGM 0x40 /* Unwind from program check handler */ +#define UWM_KPROBE_ON_FTRACE 0x80 /* Unwind from kprobe handler called via ftrace. */ +#define UWM_FTRACE 0x100 /* Unwind from ftrace handler. */ static __always_inline unsigned long get_psw_addr(void) { @@ -142,7 +143,7 @@ static __always_inline unsigned long get_psw_addr(void) return psw_addr; } -static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) +static int kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct unwindme *u = unwindme; @@ -151,6 +152,46 @@ static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +extern const char test_unwind_kprobed_insn[]; + +static noinline void test_unwind_kprobed_func(void) +{ + asm volatile( + " nopr %%r7\n" + "test_unwind_kprobed_insn:\n" + " nopr %%r7\n" + :); +} + +static int test_unwind_kprobe(struct unwindme *u) +{ + struct kprobe kp; + int ret; + + if (!IS_ENABLED(CONFIG_KPROBES)) + kunit_skip(current_test, "requires CONFIG_KPROBES"); + if (!IS_ENABLED(CONFIG_KPROBES_ON_FTRACE) && u->flags & UWM_KPROBE_ON_FTRACE) + kunit_skip(current_test, "requires CONFIG_KPROBES_ON_FTRACE"); + + u->ret = -1; /* make sure kprobe is called */ + unwindme = u; + memset(&kp, 0, sizeof(kp)); + kp.pre_handler = kprobe_pre_handler; + kp.addr = u->flags & UWM_KPROBE_ON_FTRACE ? + (kprobe_opcode_t *)test_unwind_kprobed_func : + (kprobe_opcode_t *)test_unwind_kprobed_insn; + ret = register_kprobe(&kp); + if (ret < 0) { + kunit_err(current_test, "register_kprobe failed %d\n", ret); + return -EINVAL; + } + + test_unwind_kprobed_func(); + unregister_kprobe(&kp); + unwindme = NULL; + return u->ret; +} + static void notrace __used test_unwind_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *fops, @@ -212,36 +253,8 @@ static noinline int unwindme_func4(struct unwindme *u) wait_event(u->task_wq, kthread_should_park()); kthread_parkme(); return 0; - } else if (u->flags & UWM_PGM) { - struct kprobe kp; - int ret; - - if (!IS_ENABLED(CONFIG_KPROBES)) - kunit_skip(current_test, "requires CONFIG_KPROBES"); - - unwindme = u; - memset(&kp, 0, sizeof(kp)); - kp.symbol_name = "do_report_trap"; - kp.pre_handler = pgm_pre_handler; - ret = register_kprobe(&kp); - if (ret < 0) { - kunit_err(current_test, "register_kprobe failed %d\n", ret); - return -EINVAL; - } - - /* - * Trigger operation exception; use insn notation to bypass - * llvm's integrated assembler sanity checks. - */ - asm volatile( - " .insn e,0x0000\n" /* illegal opcode */ - "0: nopr %%r7\n" - EX_TABLE(0b, 0b) - :); - - unregister_kprobe(&kp); - unwindme = NULL; - return u->ret; + } else if (u->flags & (UWM_PGM | UWM_KPROBE_ON_FTRACE)) { + return test_unwind_kprobe(u); } else if (u->flags & UWM_FTRACE) { return test_unwind_ftrace(u); } else { @@ -376,6 +389,10 @@ static const struct test_params param_list[] = { TEST_WITH_FLAGS(UWM_PGM | UWM_SP), TEST_WITH_FLAGS(UWM_PGM | UWM_REGS), TEST_WITH_FLAGS(UWM_PGM | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_SP), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_REGS), + TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_SP | UWM_REGS), TEST_WITH_FLAGS(UWM_FTRACE), TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP), TEST_WITH_FLAGS(UWM_FTRACE | UWM_REGS), From 4f8206b882868b62dc15ddfc0c17bb031876afb5 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Fri, 1 Feb 2019 16:21:11 -0500 Subject: [PATCH 20/69] s390/ap: driver callback to indicate resource in use Introduces a new driver callback to prevent a root user from re-assigning the APQN of a queue that is in use by a non-default host device driver to a default host device driver and vice versa. The callback will be invoked whenever a change to the AP bus's sysfs apmask or aqmask attributes would result in one or more APQNs being re-assigned. If the callback responds in the affirmative for any driver queried, the change to the apmask or aqmask will be rejected with a device busy error. For this patch, only non-default drivers will be queried. Currently, there is only one non-default driver, the vfio_ap device driver. The vfio_ap device driver facilitates pass-through of an AP queue to a guest. The idea here is that a guest may be administered by a different sysadmin than the host and we don't want AP resources to unexpectedly disappear from a guest's AP configuration (i.e., adapters and domains assigned to the matrix mdev). This will enforce the proper procedure for removing AP resources intended for guest usage which is to first unassign them from the matrix mdev, then unbind them from the vfio_ap device driver. Signed-off-by: Tony Krowiak Reviewed-by: Harald Freudenberger Reviewed-by: Halil Pasic Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 145 ++++++++++++++++++++++++++++++++--- drivers/s390/crypto/ap_bus.h | 4 + 2 files changed, 139 insertions(+), 10 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 1986243f9cd3..d71d2d2c341f 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ap_bus.h" #include "ap_debug.h" @@ -1067,6 +1068,23 @@ static int modify_bitmap(const char *str, unsigned long *bitmap, int bits) return 0; } +static int ap_parse_bitmap_str(const char *str, unsigned long *bitmap, int bits, + unsigned long *newmap) +{ + unsigned long size; + int rc; + + size = BITS_TO_LONGS(bits) * sizeof(unsigned long); + if (*str == '+' || *str == '-') { + memcpy(newmap, bitmap, size); + rc = modify_bitmap(str, newmap, bits); + } else { + memset(newmap, 0, size); + rc = hex2bitmap(str, newmap, bits); + } + return rc; +} + int ap_parse_mask_str(const char *str, unsigned long *bitmap, int bits, struct mutex *lock) @@ -1086,14 +1104,7 @@ int ap_parse_mask_str(const char *str, kfree(newmap); return -ERESTARTSYS; } - - if (*str == '+' || *str == '-') { - memcpy(newmap, bitmap, size); - rc = modify_bitmap(str, newmap, bits); - } else { - memset(newmap, 0, size); - rc = hex2bitmap(str, newmap, bits); - } + rc = ap_parse_bitmap_str(str, bitmap, bits, newmap); if (rc == 0) memcpy(bitmap, newmap, size); mutex_unlock(lock); @@ -1286,12 +1297,69 @@ static ssize_t apmask_show(struct bus_type *bus, char *buf) return rc; } +static int __verify_card_reservations(struct device_driver *drv, void *data) +{ + int rc = 0; + struct ap_driver *ap_drv = to_ap_drv(drv); + unsigned long *newapm = (unsigned long *)data; + + /* + * increase the driver's module refcounter to be sure it is not + * going away when we invoke the callback function. + */ + if (!try_module_get(drv->owner)) + return 0; + + if (ap_drv->in_use) { + rc = ap_drv->in_use(newapm, ap_perms.aqm); + if (rc) + rc = -EBUSY; + } + + /* release the driver's module */ + module_put(drv->owner); + + return rc; +} + +static int apmask_commit(unsigned long *newapm) +{ + int rc; + unsigned long reserved[BITS_TO_LONGS(AP_DEVICES)]; + + /* + * Check if any bits in the apmask have been set which will + * result in queues being removed from non-default drivers + */ + if (bitmap_andnot(reserved, newapm, ap_perms.apm, AP_DEVICES)) { + rc = bus_for_each_drv(&ap_bus_type, NULL, reserved, + __verify_card_reservations); + if (rc) + return rc; + } + + memcpy(ap_perms.apm, newapm, APMASKSIZE); + + return 0; +} + static ssize_t apmask_store(struct bus_type *bus, const char *buf, size_t count) { int rc; + DECLARE_BITMAP(newapm, AP_DEVICES); - rc = ap_parse_mask_str(buf, ap_perms.apm, AP_DEVICES, &ap_perms_mutex); + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + + rc = ap_parse_bitmap_str(buf, ap_perms.apm, AP_DEVICES, newapm); + if (rc) + goto done; + + rc = apmask_commit(newapm); + +done: + mutex_unlock(&ap_perms_mutex); if (rc) return rc; @@ -1317,12 +1385,69 @@ static ssize_t aqmask_show(struct bus_type *bus, char *buf) return rc; } +static int __verify_queue_reservations(struct device_driver *drv, void *data) +{ + int rc = 0; + struct ap_driver *ap_drv = to_ap_drv(drv); + unsigned long *newaqm = (unsigned long *)data; + + /* + * increase the driver's module refcounter to be sure it is not + * going away when we invoke the callback function. + */ + if (!try_module_get(drv->owner)) + return 0; + + if (ap_drv->in_use) { + rc = ap_drv->in_use(ap_perms.apm, newaqm); + if (rc) + return -EBUSY; + } + + /* release the driver's module */ + module_put(drv->owner); + + return rc; +} + +static int aqmask_commit(unsigned long *newaqm) +{ + int rc; + unsigned long reserved[BITS_TO_LONGS(AP_DOMAINS)]; + + /* + * Check if any bits in the aqmask have been set which will + * result in queues being removed from non-default drivers + */ + if (bitmap_andnot(reserved, newaqm, ap_perms.aqm, AP_DOMAINS)) { + rc = bus_for_each_drv(&ap_bus_type, NULL, reserved, + __verify_queue_reservations); + if (rc) + return rc; + } + + memcpy(ap_perms.aqm, newaqm, AQMASKSIZE); + + return 0; +} + static ssize_t aqmask_store(struct bus_type *bus, const char *buf, size_t count) { int rc; + DECLARE_BITMAP(newaqm, AP_DOMAINS); - rc = ap_parse_mask_str(buf, ap_perms.aqm, AP_DOMAINS, &ap_perms_mutex); + if (mutex_lock_interruptible(&ap_perms_mutex)) + return -ERESTARTSYS; + + rc = ap_parse_bitmap_str(buf, ap_perms.aqm, AP_DOMAINS, newaqm); + if (rc) + goto done; + + rc = aqmask_commit(newaqm); + +done: + mutex_unlock(&ap_perms_mutex); if (rc) return rc; diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 714c7583af5e..7dd992bad949 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -143,6 +143,7 @@ struct ap_driver { int (*probe)(struct ap_device *); void (*remove)(struct ap_device *); + int (*in_use)(unsigned long *apm, unsigned long *aqm); }; #define to_ap_drv(x) container_of((x), struct ap_driver, driver) @@ -290,6 +291,9 @@ void ap_queue_init_state(struct ap_queue *aq); struct ap_card *ap_card_create(int id, int queue_depth, int raw_type, int comp_type, unsigned int functions, int ml); +#define APMASKSIZE (BITS_TO_LONGS(AP_DEVICES) * sizeof(unsigned long)) +#define AQMASKSIZE (BITS_TO_LONGS(AP_DOMAINS) * sizeof(unsigned long)) + struct ap_perms { unsigned long ioctlm[BITS_TO_LONGS(AP_IOCTLS)]; unsigned long apm[BITS_TO_LONGS(AP_DEVICES)]; From 283915850a4455e8af40ce5b8d291dc79638cdae Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Fri, 1 Oct 2021 13:39:13 -0400 Subject: [PATCH 21/69] s390/ap: notify drivers on config changed and scan complete callbacks This patch introduces an extension to the ap bus to notify device drivers when the host AP configuration changes - i.e., adapters, domains or control domains are added or removed. When an adapter or domain is added to the host's AP configuration, the AP bus will create the associated queue devices in the linux sysfs device model. Each new type 10 (i.e., CEX4) or newer queue device with an APQN that is not reserved for the default device driver will get bound to the vfio_ap device driver. Likewise, whan an adapter or domain is removed from the host's AP configuration, the AP bus will remove the associated queue devices from the sysfs device model. Each of the queues that is bound to the vfio_ap device driver will get unbound. With the introduction of hot plug support, binding or unbinding of a queue device will result in plugging or unplugging one or more queues from a guest that is using the queue. If there are multiple changes to the host's AP configuration, it could result in the probe and remove callbacks getting invoked multiple times. Each time queues are plugged into or unplugged from a guest, the guest's VCPUs must be taken out of SIE. If this occurs multiple times due to changes in the host's AP configuration, that can have an undesirable negative affect on the guest's performance. To alleviate this problem, this patch introduces two new callbacks: one to notify the vfio_ap device driver when the AP bus scan routine detects a change to the host's AP configuration; and, one to notify the driver when the AP bus is done scanning. This will allow the vfio_ap driver to do bulk processing of all affected adapters, domains and control domains for affected guests rather than plugging or unplugging them one at a time when the probe or remove callback is invoked. The two new callbacks are: void (*on_config_changed)(struct ap_config_info *new_config_info, struct ap_config_info *old_config_info); This callback is invoked at the start of the AP bus scan function when it determines that the host AP configuration information has changed since the previous scan. This is done by storing an old and current QCI info struct and comparing them. If there is any difference, the callback is invoked. void (*on_scan_complete)(struct ap_config_info *new_config_info, struct ap_config_info *old_config_info); The on_scan_complete callback is invoked after the ap bus scan is completed if the host AP configuration data has changed. Signed-off-by: Tony Krowiak Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 81 +++++++++++++++++++++++++++++++++++- drivers/s390/crypto/ap_bus.h | 12 ++++++ 2 files changed, 91 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index d71d2d2c341f..f5fae8b62bdf 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -92,6 +92,7 @@ static atomic64_t ap_bindings_complete_count = ATOMIC64_INIT(0); static DECLARE_COMPLETION(ap_init_apqn_bindings_complete); static struct ap_config_info *ap_qci_info; +static struct ap_config_info *ap_qci_info_old; /* * AP bus related debug feature things. @@ -229,9 +230,14 @@ static void __init ap_init_qci_info(void) ap_qci_info = kzalloc(sizeof(*ap_qci_info), GFP_KERNEL); if (!ap_qci_info) return; + ap_qci_info_old = kzalloc(sizeof(*ap_qci_info_old), GFP_KERNEL); + if (!ap_qci_info_old) + return; if (ap_fetch_qci_info(ap_qci_info) != 0) { kfree(ap_qci_info); + kfree(ap_qci_info_old); ap_qci_info = NULL; + ap_qci_info_old = NULL; return; } AP_DBF_INFO("%s successful fetched initial qci info\n", __func__); @@ -248,6 +254,8 @@ static void __init ap_init_qci_info(void) __func__, ap_max_domain_id); } } + + memcpy(ap_qci_info_old, ap_qci_info, sizeof(*ap_qci_info)); } /* @@ -1630,6 +1638,49 @@ static int __match_queue_device_with_queue_id(struct device *dev, const void *da && AP_QID_QUEUE(to_ap_queue(dev)->qid) == (int)(long) data; } +/* Helper function for notify_config_changed */ +static int __drv_notify_config_changed(struct device_driver *drv, void *data) +{ + struct ap_driver *ap_drv = to_ap_drv(drv); + + if (try_module_get(drv->owner)) { + if (ap_drv->on_config_changed) + ap_drv->on_config_changed(ap_qci_info, ap_qci_info_old); + module_put(drv->owner); + } + + return 0; +} + +/* Notify all drivers about an qci config change */ +static inline void notify_config_changed(void) +{ + bus_for_each_drv(&ap_bus_type, NULL, NULL, + __drv_notify_config_changed); +} + +/* Helper function for notify_scan_complete */ +static int __drv_notify_scan_complete(struct device_driver *drv, void *data) +{ + struct ap_driver *ap_drv = to_ap_drv(drv); + + if (try_module_get(drv->owner)) { + if (ap_drv->on_scan_complete) + ap_drv->on_scan_complete(ap_qci_info, + ap_qci_info_old); + module_put(drv->owner); + } + + return 0; +} + +/* Notify all drivers about bus scan complete */ +static inline void notify_scan_complete(void) +{ + bus_for_each_drv(&ap_bus_type, NULL, NULL, + __drv_notify_scan_complete); +} + /* * Helper function for ap_scan_bus(). * Remove card device and associated queue devices. @@ -1917,6 +1968,25 @@ static inline void ap_scan_adapter(int ap) put_device(&ac->ap_dev.device); } +/** + * ap_get_configuration - get the host AP configuration + * + * Stores the host AP configuration information returned from the previous call + * to Query Configuration Information (QCI), then retrieves and stores the + * current AP configuration returned from QCI. + * + * Return: true if the host AP configuration changed between calls to QCI; + * otherwise, return false. + */ +static bool ap_get_configuration(void) +{ + memcpy(ap_qci_info_old, ap_qci_info, sizeof(*ap_qci_info)); + ap_fetch_qci_info(ap_qci_info); + + return memcmp(ap_qci_info, ap_qci_info_old, + sizeof(struct ap_config_info)) != 0; +} + /** * ap_scan_bus(): Scan the AP bus for new devices * Runs periodically, workqueue timer (ap_config_time) @@ -1924,9 +1994,12 @@ static inline void ap_scan_adapter(int ap) */ static void ap_scan_bus(struct work_struct *unused) { - int ap; + int ap, config_changed = 0; - ap_fetch_qci_info(ap_qci_info); + /* config change notify */ + config_changed = ap_get_configuration(); + if (config_changed) + notify_config_changed(); ap_select_domain(); AP_DBF_DBG("%s running\n", __func__); @@ -1935,6 +2008,10 @@ static void ap_scan_bus(struct work_struct *unused) for (ap = 0; ap <= ap_max_adapter_id; ap++) ap_scan_adapter(ap); + /* scan complete notify */ + if (config_changed) + notify_scan_complete(); + /* check if there is at least one queue available with default domain */ if (ap_domain_index >= 0) { struct device *dev = diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 7dd992bad949..dc6f563e8787 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -144,6 +144,18 @@ struct ap_driver { int (*probe)(struct ap_device *); void (*remove)(struct ap_device *); int (*in_use)(unsigned long *apm, unsigned long *aqm); + /* + * Called at the start of the ap bus scan function when + * the crypto config information (qci) has changed. + */ + void (*on_config_changed)(struct ap_config_info *new_config_info, + struct ap_config_info *old_config_info); + /* + * Called at the end of the ap bus scan function when + * the crypto config information (qci) has changed. + */ + void (*on_scan_complete)(struct ap_config_info *new_config_info, + struct ap_config_info *old_config_info); }; #define to_ap_drv(x) container_of((x), struct ap_driver, driver) From 8944d05f9bbf910c8b241e29a3de114900e31e42 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 16 Feb 2022 12:30:34 +0100 Subject: [PATCH 22/69] s390/ap: enable sysfs attribute scans to force AP bus rescan This patch switches the sysfs attribute /sys/bus/ap/scans from read-only to read-write. If there is something written to this attribute, an AP bus rescan is forced. If an AP bus scan is triggered this way a debug feature entry line reports this in /sys/kernel/debug/s390dbf/ap/sprintf. Signed-off-by: Harald Freudenberger Reviewed-by: Jakob Naucke Reviewed-by: Juergen Christ Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index f5fae8b62bdf..555cc3394fe3 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1472,7 +1472,17 @@ static ssize_t scans_show(struct bus_type *bus, char *buf) atomic64_read(&ap_scan_bus_count)); } -static BUS_ATTR_RO(scans); +static ssize_t scans_store(struct bus_type *bus, const char *buf, + size_t count) +{ + AP_DBF_INFO("%s force AP bus rescan\n", __func__); + + ap_bus_force_rescan(); + + return count; +} + +static BUS_ATTR_RW(scans); static ssize_t bindings_show(struct bus_type *bus, char *buf) { From 4851d2262236c27cafbecb0fd2440db1f1e726d3 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 15 Feb 2022 14:10:48 +0100 Subject: [PATCH 23/69] s390/smp: sort out physical vs virtual pointers usage With commit 5789284710aa ("s390/smp: reallocate IPL CPU lowcore") virtual addresses are wrongly passed to memblock_free_late() and SPX instructions on IPL CPU reinitialization. Note: this does not fix a bug currently, since virtual and physical addresses are identical. Fixes: 5789284710aa ("s390/smp: reallocate IPL CPU lowcore") Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/kernel/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 4f0e9f412f27..368b58e4c2e7 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1252,7 +1252,7 @@ static __always_inline void set_new_lowcore(struct lowcore *lc) src.odd = sizeof(S390_lowcore); dst.even = (unsigned long) lc; dst.odd = sizeof(*lc); - pfx = (unsigned long) lc; + pfx = __pa(lc); asm volatile( " mvcl %[dst],%[src]\n" @@ -1292,8 +1292,8 @@ static int __init smp_reinit_ipl_cpu(void) local_irq_restore(flags); free_pages(lc_ipl->async_stack - STACK_INIT_OFFSET, THREAD_SIZE_ORDER); - memblock_free_late(lc_ipl->mcck_stack - STACK_INIT_OFFSET, THREAD_SIZE); - memblock_free_late((unsigned long) lc_ipl, sizeof(*lc_ipl)); + memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE); + memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl)); return 0; } From 96f6641a6a284102fe9f52c9789e98f0a18ece11 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 17 Feb 2022 15:46:01 +0100 Subject: [PATCH 24/69] s390/ptrace: remove opencoded offsetof Remove opencoded offsetof and use offsetof instead. The generated code is identical before/after this change. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ptrace.c | 164 ++++++++++++++++++-------------------- 1 file changed, 76 insertions(+), 88 deletions(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 0ea3d02b378d..ed3439515bb2 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -147,38 +147,36 @@ void ptrace_disable(struct task_struct *task) static inline unsigned long __peek_user_per(struct task_struct *child, addr_t addr) { - struct per_struct_kernel *dummy = NULL; - - if (addr == (addr_t) &dummy->cr9) + if (addr == offsetof(struct per_struct_kernel, cr9)) /* Control bits of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? PER_EVENT_IFETCH : child->thread.per_user.control; - else if (addr == (addr_t) &dummy->cr10) + else if (addr == offsetof(struct per_struct_kernel, cr10)) /* Start address of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? 0 : child->thread.per_user.start; - else if (addr == (addr_t) &dummy->cr11) + else if (addr == offsetof(struct per_struct_kernel, cr11)) /* End address of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? -1UL : child->thread.per_user.end; - else if (addr == (addr_t) &dummy->bits) + else if (addr == offsetof(struct per_struct_kernel, bits)) /* Single-step bit. */ return test_thread_flag(TIF_SINGLE_STEP) ? (1UL << (BITS_PER_LONG - 1)) : 0; - else if (addr == (addr_t) &dummy->starting_addr) + else if (addr == offsetof(struct per_struct_kernel, starting_addr)) /* Start address of the user specified per set. */ return child->thread.per_user.start; - else if (addr == (addr_t) &dummy->ending_addr) + else if (addr == offsetof(struct per_struct_kernel, ending_addr)) /* End address of the user specified per set. */ return child->thread.per_user.end; - else if (addr == (addr_t) &dummy->perc_atmid) + else if (addr == offsetof(struct per_struct_kernel, perc_atmid)) /* PER code, ATMID and AI of the last PER trap */ return (unsigned long) child->thread.per_event.cause << (BITS_PER_LONG - 16); - else if (addr == (addr_t) &dummy->address) + else if (addr == offsetof(struct per_struct_kernel, address)) /* Address of the last PER trap */ return child->thread.per_event.address; - else if (addr == (addr_t) &dummy->access_id) + else if (addr == offsetof(struct per_struct_kernel, access_id)) /* Access id of the last PER trap */ return (unsigned long) child->thread.per_event.paid << (BITS_PER_LONG - 8); @@ -196,61 +194,60 @@ static inline unsigned long __peek_user_per(struct task_struct *child, */ static unsigned long __peek_user(struct task_struct *child, addr_t addr) { - struct user *dummy = NULL; addr_t offset, tmp; - if (addr < (addr_t) &dummy->regs.acrs) { + if (addr < offsetof(struct user, regs.acrs)) { /* * psw and gprs are stored on the stack */ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); - if (addr == (addr_t) &dummy->regs.psw.mask) { + if (addr == offsetof(struct user, regs.psw.mask)) { /* Return a clean psw mask. */ tmp &= PSW_MASK_USER | PSW_MASK_RI; tmp |= PSW_USER_BITS; } - } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { + } else if (addr < offsetof(struct user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy->regs.acrs; + offset = addr - offsetof(struct user, regs.acrs); /* * Very special case: old & broken 64 bit gdb reading * from acrs[15]. Result is a 64 bit value. Read the * 32 bit acrs[15] value and shift it by 32. Sick... */ - if (addr == (addr_t) &dummy->regs.acrs[15]) + if (addr == offsetof(struct user, regs.acrs[15])) tmp = ((unsigned long) child->thread.acrs[15]) << 32; else tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset); - } else if (addr == (addr_t) &dummy->regs.orig_gpr2) { + } else if (addr == offsetof(struct user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ tmp = (addr_t) task_pt_regs(child)->orig_gpr2; - } else if (addr < (addr_t) &dummy->regs.fp_regs) { + } else if (addr < offsetof(struct user, regs.fp_regs)) { /* * prevent reads of padding hole between * orig_gpr2 and fp_regs on s390. */ tmp = 0; - } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) { + } else if (addr == offsetof(struct user, regs.fp_regs.fpc)) { /* * floating point control reg. is in the thread structure */ tmp = child->thread.fpu.fpc; tmp <<= BITS_PER_LONG - 32; - } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { + } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* * floating point regs. are either in child->thread.fpu * or the child->thread.fpu.vxrs array */ - offset = addr - (addr_t) &dummy->regs.fp_regs.fprs; + offset = addr - offsetof(struct user, regs.fp_regs.fprs); if (MACHINE_HAS_VX) tmp = *(addr_t *) ((addr_t) child->thread.fpu.vxrs + 2*offset); @@ -258,11 +255,11 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) tmp = *(addr_t *) ((addr_t) child->thread.fpu.fprs + offset); - } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { + } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy->regs.per_info; + addr -= offsetof(struct user, regs.per_info); tmp = __peek_user_per(child, addr); } else @@ -281,8 +278,8 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) * an alignment of 4. Programmers from hell... */ mask = __ADDR_MASK; - if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && - addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) + if (addr >= offsetof(struct user, regs.acrs) && + addr < offsetof(struct user, regs.orig_gpr2)) mask = 3; if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) return -EIO; @@ -294,8 +291,6 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) static inline void __poke_user_per(struct task_struct *child, addr_t addr, addr_t data) { - struct per_struct_kernel *dummy = NULL; - /* * There are only three fields in the per_info struct that the * debugger user can write to. @@ -308,14 +303,14 @@ static inline void __poke_user_per(struct task_struct *child, * addresses are used only if single stepping is not in effect. * Writes to any other field in per_info are ignored. */ - if (addr == (addr_t) &dummy->cr9) + if (addr == offsetof(struct per_struct_kernel, cr9)) /* PER event mask of the user specified per set. */ child->thread.per_user.control = data & (PER_EVENT_MASK | PER_CONTROL_MASK); - else if (addr == (addr_t) &dummy->starting_addr) + else if (addr == offsetof(struct per_struct_kernel, starting_addr)) /* Starting address of the user specified per set. */ child->thread.per_user.start = data; - else if (addr == (addr_t) &dummy->ending_addr) + else if (addr == offsetof(struct per_struct_kernel, ending_addr)) /* Ending address of the user specified per set. */ child->thread.per_user.end = data; } @@ -328,16 +323,15 @@ static inline void __poke_user_per(struct task_struct *child, */ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) { - struct user *dummy = NULL; addr_t offset; - if (addr < (addr_t) &dummy->regs.acrs) { + if (addr < offsetof(struct user, regs.acrs)) { struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ - if (addr == (addr_t) &dummy->regs.psw.mask) { + if (addr == offsetof(struct user, regs.psw.mask)) { unsigned long mask = PSW_MASK_USER; mask |= is_ri_task(child) ? PSW_MASK_RI : 0; @@ -359,36 +353,36 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) regs->int_code = 0x20000 | (data & 0xffff); } *(addr_t *)((addr_t) ®s->psw + addr) = data; - } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { + } else if (addr < offsetof(struct user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy->regs.acrs; + offset = addr - offsetof(struct user, regs.acrs); /* * Very special case: old & broken 64 bit gdb writing * to acrs[15] with a 64 bit value. Ignore the lower * half of the value and write the upper 32 bit to * acrs[15]. Sick... */ - if (addr == (addr_t) &dummy->regs.acrs[15]) + if (addr == offsetof(struct user, regs.acrs[15])) child->thread.acrs[15] = (unsigned int) (data >> 32); else *(addr_t *)((addr_t) &child->thread.acrs + offset) = data; - } else if (addr == (addr_t) &dummy->regs.orig_gpr2) { + } else if (addr == offsetof(struct user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ task_pt_regs(child)->orig_gpr2 = data; - } else if (addr < (addr_t) &dummy->regs.fp_regs) { + } else if (addr < offsetof(struct user, regs.fp_regs)) { /* * prevent writes of padding hole between * orig_gpr2 and fp_regs on s390. */ return 0; - } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) { + } else if (addr == offsetof(struct user, regs.fp_regs.fpc)) { /* * floating point control reg. is in the thread structure */ @@ -397,12 +391,12 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) return -EINVAL; child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32); - } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { + } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* * floating point regs. are either in child->thread.fpu * or the child->thread.fpu.vxrs array */ - offset = addr - (addr_t) &dummy->regs.fp_regs.fprs; + offset = addr - offsetof(struct user, regs.fp_regs.fprs); if (MACHINE_HAS_VX) *(addr_t *)((addr_t) child->thread.fpu.vxrs + 2*offset) = data; @@ -410,11 +404,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) *(addr_t *)((addr_t) child->thread.fpu.fprs + offset) = data; - } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { + } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy->regs.per_info; + addr -= offsetof(struct user, regs.per_info); __poke_user_per(child, addr, data); } @@ -431,8 +425,8 @@ static int poke_user(struct task_struct *child, addr_t addr, addr_t data) * an alignment of 4. Programmers from hell indeed... */ mask = __ADDR_MASK; - if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs && - addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2) + if (addr >= offsetof(struct user, regs.acrs) && + addr < offsetof(struct user, regs.orig_gpr2)) mask = 3; if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK) return -EIO; @@ -540,37 +534,35 @@ long arch_ptrace(struct task_struct *child, long request, static inline __u32 __peek_user_per_compat(struct task_struct *child, addr_t addr) { - struct compat_per_struct_kernel *dummy32 = NULL; - - if (addr == (addr_t) &dummy32->cr9) + if (addr == offsetof(struct compat_per_struct_kernel, cr9)) /* Control bits of the active per set. */ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? PER_EVENT_IFETCH : child->thread.per_user.control; - else if (addr == (addr_t) &dummy32->cr10) + else if (addr == offsetof(struct compat_per_struct_kernel, cr10)) /* Start address of the active per set. */ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? 0 : child->thread.per_user.start; - else if (addr == (addr_t) &dummy32->cr11) + else if (addr == offsetof(struct compat_per_struct_kernel, cr11)) /* End address of the active per set. */ return test_thread_flag(TIF_SINGLE_STEP) ? PSW32_ADDR_INSN : child->thread.per_user.end; - else if (addr == (addr_t) &dummy32->bits) + else if (addr == offsetof(struct compat_per_struct_kernel, bits)) /* Single-step bit. */ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ? 0x80000000 : 0; - else if (addr == (addr_t) &dummy32->starting_addr) + else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr)) /* Start address of the user specified per set. */ return (__u32) child->thread.per_user.start; - else if (addr == (addr_t) &dummy32->ending_addr) + else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr)) /* End address of the user specified per set. */ return (__u32) child->thread.per_user.end; - else if (addr == (addr_t) &dummy32->perc_atmid) + else if (addr == offsetof(struct compat_per_struct_kernel, perc_atmid)) /* PER code, ATMID and AI of the last PER trap */ return (__u32) child->thread.per_event.cause << 16; - else if (addr == (addr_t) &dummy32->address) + else if (addr == offsetof(struct compat_per_struct_kernel, address)) /* Address of the last PER trap */ return (__u32) child->thread.per_event.address; - else if (addr == (addr_t) &dummy32->access_id) + else if (addr == offsetof(struct compat_per_struct_kernel, access_id)) /* Access id of the last PER trap */ return (__u32) child->thread.per_event.paid << 24; return 0; @@ -581,21 +573,20 @@ static inline __u32 __peek_user_per_compat(struct task_struct *child, */ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) { - struct compat_user *dummy32 = NULL; addr_t offset; __u32 tmp; - if (addr < (addr_t) &dummy32->regs.acrs) { + if (addr < offsetof(struct compat_user, regs.acrs)) { struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ - if (addr == (addr_t) &dummy32->regs.psw.mask) { + if (addr == offsetof(struct compat_user, regs.psw.mask)) { /* Fake a 31 bit psw mask. */ tmp = (__u32)(regs->psw.mask >> 32); tmp &= PSW32_MASK_USER | PSW32_MASK_RI; tmp |= PSW32_USER_BITS; - } else if (addr == (addr_t) &dummy32->regs.psw.addr) { + } else if (addr == offsetof(struct compat_user, regs.psw.addr)) { /* Fake a 31 bit psw address. */ tmp = (__u32) regs->psw.addr | (__u32)(regs->psw.mask & PSW_MASK_BA); @@ -603,38 +594,38 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) /* gpr 0-15 */ tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); } - } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { + } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy32->regs.acrs; + offset = addr - offsetof(struct compat_user, regs.acrs); tmp = *(__u32*)((addr_t) &child->thread.acrs + offset); - } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) { + } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4); - } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + } else if (addr < offsetof(struct compat_user, regs.fp_regs)) { /* * prevent reads of padding hole between * orig_gpr2 and fp_regs on s390. */ tmp = 0; - } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) { + } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) { /* * floating point control reg. is in the thread structure */ tmp = child->thread.fpu.fpc; - } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { + } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* * floating point regs. are either in child->thread.fpu * or the child->thread.fpu.vxrs array */ - offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs; + offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs); if (MACHINE_HAS_VX) tmp = *(__u32 *) ((addr_t) child->thread.fpu.vxrs + 2*offset); @@ -642,11 +633,11 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) tmp = *(__u32 *) ((addr_t) child->thread.fpu.fprs + offset); - } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { + } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy32->regs.per_info; + addr -= offsetof(struct compat_user, regs.per_info); tmp = __peek_user_per_compat(child, addr); } else @@ -673,16 +664,14 @@ static int peek_user_compat(struct task_struct *child, static inline void __poke_user_per_compat(struct task_struct *child, addr_t addr, __u32 data) { - struct compat_per_struct_kernel *dummy32 = NULL; - - if (addr == (addr_t) &dummy32->cr9) + if (addr == offsetof(struct compat_per_struct_kernel, cr9)) /* PER event mask of the user specified per set. */ child->thread.per_user.control = data & (PER_EVENT_MASK | PER_CONTROL_MASK); - else if (addr == (addr_t) &dummy32->starting_addr) + else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr)) /* Starting address of the user specified per set. */ child->thread.per_user.start = data; - else if (addr == (addr_t) &dummy32->ending_addr) + else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr)) /* Ending address of the user specified per set. */ child->thread.per_user.end = data; } @@ -693,16 +682,15 @@ static inline void __poke_user_per_compat(struct task_struct *child, static int __poke_user_compat(struct task_struct *child, addr_t addr, addr_t data) { - struct compat_user *dummy32 = NULL; __u32 tmp = (__u32) data; addr_t offset; - if (addr < (addr_t) &dummy32->regs.acrs) { + if (addr < offsetof(struct compat_user, regs.acrs)) { struct pt_regs *regs = task_pt_regs(child); /* * psw, gprs, acrs and orig_gpr2 are stored on the stack */ - if (addr == (addr_t) &dummy32->regs.psw.mask) { + if (addr == offsetof(struct compat_user, regs.psw.mask)) { __u32 mask = PSW32_MASK_USER; mask |= is_ri_task(child) ? PSW32_MASK_RI : 0; @@ -716,7 +704,7 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | (regs->psw.mask & PSW_MASK_BA) | (__u64)(tmp & mask) << 32; - } else if (addr == (addr_t) &dummy32->regs.psw.addr) { + } else if (addr == offsetof(struct compat_user, regs.psw.addr)) { /* Build a 64 bit psw address from 31 bit address. */ regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; /* Transfer 31 bit amode bit to psw mask. */ @@ -732,27 +720,27 @@ static int __poke_user_compat(struct task_struct *child, /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } - } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { + } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) { /* * access registers are stored in the thread structure */ - offset = addr - (addr_t) &dummy32->regs.acrs; + offset = addr - offsetof(struct compat_user, regs.acrs); *(__u32*)((addr_t) &child->thread.acrs + offset) = tmp; - } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) { + } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) { /* * orig_gpr2 is stored on the kernel stack */ *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp; - } else if (addr < (addr_t) &dummy32->regs.fp_regs) { + } else if (addr < offsetof(struct compat_user, regs.fp_regs)) { /* * prevent writess of padding hole between * orig_gpr2 and fp_regs on s390. */ return 0; - } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) { + } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) { /* * floating point control reg. is in the thread structure */ @@ -760,12 +748,12 @@ static int __poke_user_compat(struct task_struct *child, return -EINVAL; child->thread.fpu.fpc = data; - } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { + } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* * floating point regs. are either in child->thread.fpu * or the child->thread.fpu.vxrs array */ - offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs; + offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs); if (MACHINE_HAS_VX) *(__u32 *)((addr_t) child->thread.fpu.vxrs + 2*offset) = tmp; @@ -773,11 +761,11 @@ static int __poke_user_compat(struct task_struct *child, *(__u32 *)((addr_t) child->thread.fpu.fprs + offset) = tmp; - } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { + } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) { /* * Handle access to the per_info structure. */ - addr -= (addr_t) &dummy32->regs.per_info; + addr -= offsetof(struct compat_user, regs.per_info); __poke_user_per_compat(child, addr, data); } From 1a5e3f262e0310365cf7b5c8b8fc3a6e94a19cb7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 22 Feb 2022 15:27:52 +0100 Subject: [PATCH 25/69] s390/ftrace: make use of epsw to get psw mask Finally use epsw to create a complete psw mask within pt_regs. Without this only some bits are correct, while other bits are (incorrectly) always zero. The epsw instruction is quite heavy weight, however given that this only effects ftrace_regs_caller this seems to be the right thing, so we finally get a complete psw mask for ftrace kprobed functions. Reviewed-by: Sven Schnelle Acked-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/mcount.S | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index b88205224f3c..6ace43d7e8d2 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -33,9 +33,16 @@ ENDPROC(ftrace_stub) stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller .if \allregs == 1 - lghi %r14,0 # save condition code - ipm %r14 # don't put any instructions - sllg %r14,%r14,16 # clobbering CC before this point + # save psw mask + # don't put any instructions clobbering CC before this point +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + epsw %r1,%r14 + risbg %r14,%r1,0,31,32 +#else + epsw %r14,%r1 + sllg %r14,%r14,32 + lr %r14,%r1 +#endif .endif lgr %r1,%r15 @@ -51,7 +58,6 @@ ENDPROC(ftrace_stub) .if \allregs == 1 stg %r14,(STACK_PTREGS_PSW)(%r15) - stosm (STACK_PTREGS_PSW)(%r15),0 #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES mvghi STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS #else From f0003a9e4c18ae71308fb6b24de8248ac6180777 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Feb 2022 09:41:24 +0100 Subject: [PATCH 26/69] s390/entry: remove unused expoline thunk Remove __s390_indirect_jump_r13use_r14 expoline thunk unused since commit fbbdfca5c553 ("s390/entry.S: factor out SIEEXIT macro"). Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index dc7347e43ec2..1f6df6d4a914 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -190,7 +190,6 @@ _LPP_OFFSET = __LC_LPP #endif GEN_BR_THUNK %r14 - GEN_BR_THUNK %r14,%r13 .section .kprobes.text, "ax" .Ldummy: From e2aaae2d3677563c1bb6cd15fbddd701381823be Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 10 Feb 2022 16:08:29 +0100 Subject: [PATCH 27/69] s390/mm: add set_pXd()/set_pte() helper functions Add set_pXd()/set_pte() helper functions which must be used to update page table entries. The new helpers use WRITE_ONCE() to make sure that a page table entry is written to only once. Without this the compiler could otherwise generate code which writes several times to a page table entry when updating its contents from invalid to valid, which could lead to surprising results especially for multithreaded processes... Reviewed-by: Claudio Imbrenda Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgtable.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 008a6c856fa4..2aa8057c7ef8 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -881,6 +881,31 @@ static inline pgprot_t pte_pgprot(pte_t pte) * pgd/pmd/pte modification functions */ +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + WRITE_ONCE(*pgdp, pgd); +} + +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) +{ + WRITE_ONCE(*p4dp, p4d); +} + +static inline void set_pud(pud_t *pudp, pud_t pud) +{ + WRITE_ONCE(*pudp, pud); +} + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + WRITE_ONCE(*pmdp, pmd); +} + +static inline void set_pte(pte_t *ptep, pte_t pte) +{ + WRITE_ONCE(*ptep, pte); +} + static inline void pgd_clear(pgd_t *pgd) { if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) From f29111f117aaafa2887e13923f6e88fc11f5e065 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 21:18:29 +0100 Subject: [PATCH 28/69] s390/mm: add set_pte_bit()/clear_pte_bit() helper functions Add set_pte_bit()/clear_pte_bit() and set_pXd_bit()/clear_pXd_bit helper functions which are supposed to be used if bits within ptes/pXds are set/cleared. The only point of these helper functions is to get more readable code. This is quite similar to what arm64 has. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgtable.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2aa8057c7ef8..4bbb6d38365f 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -538,6 +538,36 @@ static inline int mm_alloc_pgste(struct mm_struct *mm) return 0; } +static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) +{ + return __pte(pte_val(pte) & ~pgprot_val(prot)); +} + +static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) +{ + return __pte(pte_val(pte) | pgprot_val(prot)); +} + +static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + return __pmd(pmd_val(pmd) & ~pgprot_val(prot)); +} + +static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + return __pmd(pmd_val(pmd) | pgprot_val(prot)); +} + +static inline pud_t clear_pud_bit(pud_t pud, pgprot_t prot) +{ + return __pud(pud_val(pud) & ~pgprot_val(prot)); +} + +static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot) +{ + return __pud(pud_val(pud) | pgprot_val(prot)); +} + /* * In the case that a guest uses storage keys * faults should no longer be backed by zero pages From b8e3b37900a57f9e7b32a92cb16313b32c41c0db Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 20:50:07 +0100 Subject: [PATCH 29/69] s390/mm: use set_pXd()/set_pte() helper functions everywhere Use the new set_pXd()/set_pte() helper functions at all places where page table entries are modified. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/hugetlb.h | 4 ++-- arch/s390/include/asm/pgalloc.h | 8 ++++---- arch/s390/include/asm/pgtable.h | 18 +++++++++--------- arch/s390/mm/gmap.c | 12 ++++++------ arch/s390/mm/hugetlbpage.c | 2 +- arch/s390/mm/kasan_init.c | 8 ++++---- arch/s390/mm/pageattr.c | 17 +++++++++-------- arch/s390/mm/pgtable.c | 22 +++++++++++----------- arch/s390/mm/vmem.c | 10 +++++----- 9 files changed, 51 insertions(+), 50 deletions(-) diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 60f9241e5e4a..bea47e7cc6a0 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -45,9 +45,9 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) { if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - pte_val(*ptep) = _REGION3_ENTRY_EMPTY; + set_pte(ptep, __pte(_REGION3_ENTRY_EMPTY)); else - pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY; + set_pte(ptep, __pte(_SEGMENT_ENTRY_EMPTY)); } static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index f14a555eff74..17eb618f1348 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -103,17 +103,17 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) { - pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d); + set_pgd(pgd, __pgd(_REGION1_ENTRY | __pa(p4d))); } static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) { - p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud); + set_p4d(p4d, __p4d(_REGION2_ENTRY | __pa(pud))); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); + set_pud(pud, __pud(_REGION3_ENTRY | __pa(pmd))); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) @@ -129,7 +129,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { - pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); + set_pmd(pmd, __pmd(_SEGMENT_ENTRY | __pa(pte))); } #define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4bbb6d38365f..2ea58513da97 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -939,29 +939,29 @@ static inline void set_pte(pte_t *ptep, pte_t pte) static inline void pgd_clear(pgd_t *pgd) { if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1) - pgd_val(*pgd) = _REGION1_ENTRY_EMPTY; + set_pgd(pgd, __pgd(_REGION1_ENTRY_EMPTY)); } static inline void p4d_clear(p4d_t *p4d) { if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) - p4d_val(*p4d) = _REGION2_ENTRY_EMPTY; + set_p4d(p4d, __p4d(_REGION2_ENTRY_EMPTY)); } static inline void pud_clear(pud_t *pud) { if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - pud_val(*pud) = _REGION3_ENTRY_EMPTY; + set_pud(pud, __pud(_REGION3_ENTRY_EMPTY)); } static inline void pmd_clear(pmd_t *pmdp) { - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_val(*ptep) = _PAGE_INVALID; + set_pte(ptep, __pte(_PAGE_INVALID)); } /* @@ -1169,7 +1169,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, if (full) { res = *ptep; - *ptep = __pte(_PAGE_INVALID); + set_pte(ptep, __pte(_PAGE_INVALID)); } else { res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } @@ -1257,7 +1257,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (mm_has_pgste(mm)) ptep_set_pte_at(mm, addr, ptep, entry); else - *ptep = entry; + set_pte(ptep, entry); } /* @@ -1641,7 +1641,7 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, { if (!MACHINE_HAS_NX) pmd_val(entry) &= ~_SEGMENT_ENTRY_NOEXEC; - *pmdp = entry; + set_pmd(pmdp, entry); } static inline pmd_t pmd_mkhuge(pmd_t pmd) @@ -1666,7 +1666,7 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, { if (full) { pmd_t pmd = *pmdp; - *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); + set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); return pmd; } return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index dfee0ebb2fac..8cdc77fee41c 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -985,7 +985,7 @@ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, } if (bits & GMAP_NOTIFY_MPROT) - pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN; + set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); /* Shadow GMAP protection needs split PMDs */ if (bits & GMAP_NOTIFY_SHADOW) @@ -1151,7 +1151,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) address = pte_val(pte) & PAGE_MASK; address += gaddr & ~PAGE_MASK; *val = *(unsigned long *) address; - pte_val(*ptep) |= _PAGE_YOUNG; + set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG))); /* Do *NOT* clear the _PAGE_INVALID bit! */ rc = 0; } @@ -2275,7 +2275,7 @@ EXPORT_SYMBOL_GPL(ptep_notify); static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, unsigned long gaddr) { - pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN; + set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); } @@ -2302,7 +2302,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); - *pmdp = new; + set_pmd(pmdp, new); } static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, @@ -2324,7 +2324,7 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, _SEGMENT_ENTRY_GMAP_UC)); if (purge) __pmdp_csp(pmdp); - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); } spin_unlock(&gmap->guest_table_lock); } @@ -2447,7 +2447,7 @@ static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, return false; /* Clear UC indication and reset protection */ - pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC; + set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC))); gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0); return true; } diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 082793d497ec..082b72d29bb5 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -168,7 +168,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, rste |= _SEGMENT_ENTRY_LARGE; clear_huge_pte_skeys(mm, rste); - pte_val(*ptep) = rste; + set_pte(ptep, __pte(rste)); } pte_t huge_ptep_get(pte_t *ptep) diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 483b9dbe0970..9f988d4582ed 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -175,7 +175,7 @@ static void __init kasan_early_pgtable_populate(unsigned long address, page = kasan_early_alloc_segment(); memset(page, 0, _SEGMENT_SIZE); } - pmd_val(*pm_dir) = __pa(page) | sgt_prot; + set_pmd(pm_dir, __pmd(__pa(page) | sgt_prot)); address = (address + PMD_SIZE) & PMD_MASK; continue; } @@ -194,16 +194,16 @@ static void __init kasan_early_pgtable_populate(unsigned long address, switch (mode) { case POPULATE_ONE2ONE: page = (void *)address; - pte_val(*pt_dir) = __pa(page) | pgt_prot; + set_pte(pt_dir, __pte(__pa(page) | pgt_prot)); break; case POPULATE_MAP: page = kasan_early_alloc_pages(0); memset(page, 0, PAGE_SIZE); - pte_val(*pt_dir) = __pa(page) | pgt_prot; + set_pte(pt_dir, __pte(__pa(page) | pgt_prot)); break; case POPULATE_ZERO_SHADOW: page = kasan_early_shadow_page; - pte_val(*pt_dir) = __pa(page) | pgt_prot_zero; + set_pte(pt_dir, __pte(__pa(page) | pgt_prot_zero)); break; case POPULATE_SHALLOW: /* should never happen */ diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 654019181a37..2959bfa0eb54 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -127,7 +127,7 @@ static int split_pmd_page(pmd_t *pmdp, unsigned long addr) prot &= ~_PAGE_NOEXEC; ptep = pt_dir; for (i = 0; i < PTRS_PER_PTE; i++) { - pte_val(*ptep) = pte_addr | prot; + set_pte(ptep, __pte(pte_addr | prot)); pte_addr += PAGE_SIZE; ptep++; } @@ -208,7 +208,7 @@ static int split_pud_page(pud_t *pudp, unsigned long addr) prot &= ~_SEGMENT_ENTRY_NOEXEC; pmdp = pm_dir; for (i = 0; i < PTRS_PER_PMD; i++) { - pmd_val(*pmdp) = pmd_addr | prot; + set_pmd(pmdp, __pmd(pmd_addr | prot)); pmd_addr += PMD_SIZE; pmdp++; } @@ -347,23 +347,24 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr) void __kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long address; + pte_t *ptep, pte; int nr, i, j; - pte_t *pte; for (i = 0; i < numpages;) { address = (unsigned long)page_to_virt(page + i); - pte = virt_to_kpte(address); - nr = (unsigned long)pte >> ilog2(sizeof(long)); + ptep = virt_to_kpte(address); + nr = (unsigned long)ptep >> ilog2(sizeof(long)); nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); nr = min(numpages - i, nr); if (enable) { for (j = 0; j < nr; j++) { - pte_val(*pte) &= ~_PAGE_INVALID; + pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID)); + set_pte(ptep, pte); address += PAGE_SIZE; - pte++; + ptep++; } } else { - ipte_range(pte, address, nr); + ipte_range(ptep, address, nr); } i += nr; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index c16232cd0ec5..26f8a53f0ae1 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -115,7 +115,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (cpumask_equal(&mm->context.cpu_attach_mask, cpumask_of(smp_processor_id()))) { - pte_val(*ptep) |= _PAGE_INVALID; + set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_INVALID))); mm->context.flush_mm = 1; } else ptep_ipte_global(mm, addr, ptep, nodat); @@ -232,7 +232,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) pgste_val(pgste) |= PGSTE_UC_BIT; } #endif - *ptep = entry; + set_pte(ptep, entry); return pgste; } @@ -280,7 +280,7 @@ static inline pte_t ptep_xchg_commit(struct mm_struct *mm, pgste = pgste_set_pte(ptep, pgste, new); pgste_set_unlock(ptep, pgste); } else { - *ptep = new; + set_pte(ptep, new); } return old; } @@ -352,7 +352,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pgste = pgste_set_pte(ptep, pgste, pte); pgste_set_unlock(ptep, pgste); } else { - *ptep = pte; + set_pte(ptep, pte); } preempt_enable(); } @@ -417,7 +417,7 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, atomic_inc(&mm->context.flush_count); if (cpumask_equal(&mm->context.cpu_attach_mask, cpumask_of(smp_processor_id()))) { - pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; + set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_INVALID))); mm->context.flush_mm = 1; if (mm_has_pgste(mm)) gmap_pmdp_invalidate(mm, addr); @@ -469,7 +469,7 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, preempt_disable(); old = pmdp_flush_direct(mm, addr, pmdp); - *pmdp = new; + set_pmd(pmdp, new); preempt_enable(); return old; } @@ -482,7 +482,7 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, preempt_disable(); old = pmdp_flush_lazy(mm, addr, pmdp); - *pmdp = new; + set_pmd(pmdp, new); preempt_enable(); return old; } @@ -539,7 +539,7 @@ pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, preempt_disable(); old = pudp_flush_direct(mm, addr, pudp); - *pudp = new; + set_pud(pudp, new); preempt_enable(); return old; } @@ -579,9 +579,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) list_del(lh); } ptep = (pte_t *) pgtable; - pte_val(*ptep) = _PAGE_INVALID; + set_pte(ptep, __pte(_PAGE_INVALID)); ptep++; - pte_val(*ptep) = _PAGE_INVALID; + set_pte(ptep, __pte(_PAGE_INVALID)); return pgtable; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -776,7 +776,7 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, pte_val(pte) |= _PAGE_PROTECT; else pte_val(pte) |= _PAGE_INVALID; - *ptep = pte; + set_pte(ptep, pte); } pgste_set_unlock(ptep, pgste); return dirty; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 5410775639c5..72c525ce7221 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -174,9 +174,9 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, if (!new_page) goto out; - pte_val(*pte) = __pa(new_page) | prot; + set_pte(pte, __pte(__pa(new_page) | prot)); } else { - pte_val(*pte) = __pa(addr) | prot; + set_pte(pte, __pte(__pa(addr) | prot)); } } else { continue; @@ -242,7 +242,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, IS_ALIGNED(next, PMD_SIZE) && MACHINE_HAS_EDAT1 && addr && direct && !debug_pagealloc_enabled()) { - pmd_val(*pmd) = __pa(addr) | prot; + set_pmd(pmd, __pmd(__pa(addr) | prot)); pages++; continue; } else if (!direct && MACHINE_HAS_EDAT1) { @@ -257,7 +257,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, */ new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE); if (new_page) { - pmd_val(*pmd) = __pa(new_page) | prot; + set_pmd(pmd, __pmd(__pa(new_page) | prot)); if (!IS_ALIGNED(addr, PMD_SIZE) || !IS_ALIGNED(next, PMD_SIZE)) { vmemmap_use_new_sub_pmd(addr, next); @@ -338,7 +338,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE) && MACHINE_HAS_EDAT2 && addr && direct && !debug_pagealloc_enabled()) { - pud_val(*pud) = __pa(addr) | prot; + set_pud(pud, __pud(__pa(addr) | prot)); pages++; continue; } From 4a366f519a3c8883a6bc24efa710bc0e75bc3558 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 21:24:01 +0100 Subject: [PATCH 30/69] s390/mm,pgtable: don't use pte_val()/pXd_val() as lvalue Convert pgtable code so pte_val()/pXd_val() aren't used as lvalue anymore. This allows in later step to convert pte_val()/pXd_val() to functions, which in turn makes it impossible to use these macros to modify page table entries like they have been used before. Therefore a construct like this: pte_val(*pte) = __pa(addr) | prot; which would directly write into a page table, isn't possible anymore with the last step of this series. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgtable.h | 144 +++++++++++++++----------------- arch/s390/mm/pgtable.c | 22 ++--- 2 files changed, 77 insertions(+), 89 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2ea58513da97..a3f26e3aa8b5 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -834,15 +834,13 @@ static inline int pte_soft_dirty(pte_t pte) static inline pte_t pte_mksoft_dirty(pte_t pte) { - pte_val(pte) |= _PAGE_SOFT_DIRTY; - return pte; + return set_pte_bit(pte, __pgprot(_PAGE_SOFT_DIRTY)); } #define pte_swp_mksoft_dirty pte_mksoft_dirty static inline pte_t pte_clear_soft_dirty(pte_t pte) { - pte_val(pte) &= ~_PAGE_SOFT_DIRTY; - return pte; + return clear_pte_bit(pte, __pgprot(_PAGE_SOFT_DIRTY)); } #define pte_swp_clear_soft_dirty pte_clear_soft_dirty @@ -853,14 +851,12 @@ static inline int pmd_soft_dirty(pmd_t pmd) static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_SOFT_DIRTY; - return pmd; + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY)); } static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_SOFT_DIRTY; - return pmd; + return clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY)); } /* @@ -970,79 +966,74 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - pte_val(pte) &= _PAGE_CHG_MASK; - pte_val(pte) |= pgprot_val(newprot); + pte = clear_pte_bit(pte, __pgprot(~_PAGE_CHG_MASK)); + pte = set_pte_bit(pte, newprot); /* * newprot for PAGE_NONE, PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX * has the invalid bit set, clear it again for readable, young pages */ if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ)) - pte_val(pte) &= ~_PAGE_INVALID; + pte = clear_pte_bit(pte, __pgprot(_PAGE_INVALID)); /* * newprot for PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX has the page * protection bit set, clear it again for writable, dirty pages */ if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE)) - pte_val(pte) &= ~_PAGE_PROTECT; + pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT)); return pte; } static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) &= ~_PAGE_WRITE; - pte_val(pte) |= _PAGE_PROTECT; - return pte; + pte = clear_pte_bit(pte, __pgprot(_PAGE_WRITE)); + return set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= _PAGE_WRITE; + pte = set_pte_bit(pte, __pgprot(_PAGE_WRITE)); if (pte_val(pte) & _PAGE_DIRTY) - pte_val(pte) &= ~_PAGE_PROTECT; + pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT)); return pte; } static inline pte_t pte_mkclean(pte_t pte) { - pte_val(pte) &= ~_PAGE_DIRTY; - pte_val(pte) |= _PAGE_PROTECT; - return pte; + pte = clear_pte_bit(pte, __pgprot(_PAGE_DIRTY)); + return set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); } static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= _PAGE_DIRTY | _PAGE_SOFT_DIRTY; + pte = set_pte_bit(pte, __pgprot(_PAGE_DIRTY | _PAGE_SOFT_DIRTY)); if (pte_val(pte) & _PAGE_WRITE) - pte_val(pte) &= ~_PAGE_PROTECT; + pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT)); return pte; } static inline pte_t pte_mkold(pte_t pte) { - pte_val(pte) &= ~_PAGE_YOUNG; - pte_val(pte) |= _PAGE_INVALID; - return pte; + pte = clear_pte_bit(pte, __pgprot(_PAGE_YOUNG)); + return set_pte_bit(pte, __pgprot(_PAGE_INVALID)); } static inline pte_t pte_mkyoung(pte_t pte) { - pte_val(pte) |= _PAGE_YOUNG; + pte = set_pte_bit(pte, __pgprot(_PAGE_YOUNG)); if (pte_val(pte) & _PAGE_READ) - pte_val(pte) &= ~_PAGE_INVALID; + pte = clear_pte_bit(pte, __pgprot(_PAGE_INVALID)); return pte; } static inline pte_t pte_mkspecial(pte_t pte) { - pte_val(pte) |= _PAGE_SPECIAL; - return pte; + return set_pte_bit(pte, __pgprot(_PAGE_SPECIAL)); } #ifdef CONFIG_HUGETLB_PAGE static inline pte_t pte_mkhuge(pte_t pte) { - pte_val(pte) |= _PAGE_LARGE; - return pte; + return set_pte_bit(pte, __pgprot(_PAGE_LARGE)); } #endif @@ -1253,7 +1244,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { if (pte_present(entry)) - pte_val(entry) &= ~_PAGE_UNUSED; + entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED)); if (mm_has_pgste(mm)) ptep_set_pte_at(mm, addr, ptep, entry); else @@ -1268,9 +1259,9 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; - pte_val(__pte) = physpage | pgprot_val(pgprot); + __pte = __pte(physpage | pgprot_val(pgprot)); if (!MACHINE_HAS_NX) - pte_val(__pte) &= ~_PAGE_NOEXEC; + __pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC)); return pte_mkyoung(__pte); } @@ -1410,61 +1401,57 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end) static inline pmd_t pmd_wrprotect(pmd_t pmd) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE; - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - return pmd; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE)); + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); } static inline pmd_t pmd_mkwrite(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE)); if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); return pmd; } static inline pmd_t pmd_mkclean(pmd_t pmd) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY; - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - return pmd; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_DIRTY)); + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); } static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY)); if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); return pmd; } static inline pud_t pud_wrprotect(pud_t pud) { - pud_val(pud) &= ~_REGION3_ENTRY_WRITE; - pud_val(pud) |= _REGION_ENTRY_PROTECT; - return pud; + pud = clear_pud_bit(pud, __pgprot(_REGION3_ENTRY_WRITE)); + return set_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT)); } static inline pud_t pud_mkwrite(pud_t pud) { - pud_val(pud) |= _REGION3_ENTRY_WRITE; + pud = set_pud_bit(pud, __pgprot(_REGION3_ENTRY_WRITE)); if (pud_val(pud) & _REGION3_ENTRY_DIRTY) - pud_val(pud) &= ~_REGION_ENTRY_PROTECT; + pud = clear_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT)); return pud; } static inline pud_t pud_mkclean(pud_t pud) { - pud_val(pud) &= ~_REGION3_ENTRY_DIRTY; - pud_val(pud) |= _REGION_ENTRY_PROTECT; - return pud; + pud = clear_pud_bit(pud, __pgprot(_REGION3_ENTRY_DIRTY)); + return set_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT)); } static inline pud_t pud_mkdirty(pud_t pud) { - pud_val(pud) |= _REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY; + pud = set_pud_bit(pud, __pgprot(_REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY)); if (pud_val(pud) & _REGION3_ENTRY_WRITE) - pud_val(pud) &= ~_REGION_ENTRY_PROTECT; + pud = clear_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT)); return pud; } @@ -1488,37 +1475,39 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) static inline pmd_t pmd_mkyoung(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG)); if (pmd_val(pmd) & _SEGMENT_ENTRY_READ) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID)); return pmd; } static inline pmd_t pmd_mkold(pmd_t pmd) { - pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; - pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; - return pmd; + pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG)); + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID)); } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE | - _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG | - _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY; - pmd_val(pmd) |= massage_pgprot_pmd(newprot); + unsigned long mask; + + mask = _SEGMENT_ENTRY_ORIGIN_LARGE; + mask |= _SEGMENT_ENTRY_DIRTY; + mask |= _SEGMENT_ENTRY_YOUNG; + mask |= _SEGMENT_ENTRY_LARGE; + mask |= _SEGMENT_ENTRY_SOFT_DIRTY; + pmd = __pmd(pmd_val(pmd) & mask); + pmd = set_pmd_bit(pmd, __pgprot(massage_pgprot_pmd(newprot))); if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)) - pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID)); return pmd; } static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) { - pmd_t __pmd; - pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); - return __pmd; + return __pmd(physpage + massage_pgprot_pmd(pgprot)); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ @@ -1640,16 +1629,15 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { if (!MACHINE_HAS_NX) - pmd_val(entry) &= ~_SEGMENT_ENTRY_NOEXEC; + entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmdp, entry); } static inline pmd_t pmd_mkhuge(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; - return pmd; + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_LARGE)); + pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG)); + return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR @@ -1745,12 +1733,12 @@ static inline int has_transparent_hugepage(void) static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { - pte_t pte; + unsigned long pteval; - pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT; - pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT; - pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT; - return pte; + pteval = _PAGE_INVALID | _PAGE_PROTECT; + pteval |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT; + pteval |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT; + return __pte(pteval); } static inline unsigned long __swp_type(swp_entry_t entry) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 26f8a53f0ae1..697df02362af 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -224,8 +224,8 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) * Without enhanced suppression-on-protection force * the dirty bit on for all writable ptes. */ - pte_val(entry) |= _PAGE_DIRTY; - pte_val(entry) &= ~_PAGE_PROTECT; + entry = set_pte_bit(entry, __pgprot(_PAGE_DIRTY)); + entry = clear_pte_bit(entry, __pgprot(_PAGE_PROTECT)); } if (!(pte_val(entry) & _PAGE_PROTECT)) /* This pte allows write access, set user-dirty */ @@ -275,7 +275,7 @@ static inline pte_t ptep_xchg_commit(struct mm_struct *mm, pgste = pgste_update_all(old, pgste, mm); if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) - pte_val(old) |= _PAGE_UNUSED; + old = set_pte_bit(old, __pgprot(_PAGE_UNUSED)); } pgste = pgste_set_pte(ptep, pgste, new); pgste_set_unlock(ptep, pgste); @@ -345,7 +345,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, struct mm_struct *mm = vma->vm_mm; if (!MACHINE_HAS_NX) - pte_val(pte) &= ~_PAGE_NOEXEC; + pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC)); if (mm_has_pgste(mm)) { pgste = pgste_get(ptep); pgste_set_key(ptep, pgste, pte, mm); @@ -646,12 +646,12 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr, if (prot == PROT_NONE && !pte_i) { ptep_flush_direct(mm, addr, ptep, nodat); pgste = pgste_update_all(entry, pgste, mm); - pte_val(entry) |= _PAGE_INVALID; + entry = set_pte_bit(entry, __pgprot(_PAGE_INVALID)); } if (prot == PROT_READ && !pte_p) { ptep_flush_direct(mm, addr, ptep, nodat); - pte_val(entry) &= ~_PAGE_INVALID; - pte_val(entry) |= _PAGE_PROTECT; + entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID)); + entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT)); } pgste_val(pgste) |= bit; pgste = pgste_set_pte(ptep, pgste, entry); @@ -675,8 +675,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, !(pte_val(pte) & _PAGE_PROTECT))) { pgste_val(spgste) |= PGSTE_VSIE_BIT; tpgste = pgste_get_lock(tptep); - pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | - (pte_val(pte) & _PAGE_PROTECT); + tpte = __pte((pte_val(spte) & PAGE_MASK) | + (pte_val(pte) & _PAGE_PROTECT)); /* don't touch the storage key - it belongs to parent pgste */ tpgste = pgste_set_pte(tptep, tpgste, tpte); pgste_set_unlock(tptep, tpgste); @@ -773,9 +773,9 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); ptep_ipte_global(mm, addr, ptep, nodat); if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) - pte_val(pte) |= _PAGE_PROTECT; + pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); else - pte_val(pte) |= _PAGE_INVALID; + pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID)); set_pte(ptep, pte); } pgste_set_unlock(ptep, pgste); From 869a9dbc10d25c5544169209fd4284f1f570c233 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 21:24:35 +0100 Subject: [PATCH 31/69] s390/mm,pageattr: don't use pte_val()/pXd_val() as lvalue Convert pgtable code so pte_val()/pXd_val() aren't used as lvalue anymore. This allows in later step to convert pte_val()/pXd_val() to functions, which in turn makes it impossible to use these macros to modify page table entries like they have been used before. Therefore a construct like this: pte_val(*pte) = __pa(addr) | prot; which would directly write into a page table, isn't possible anymore with the last step of this series. Reviewed-by: Claudio Imbrenda Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/pageattr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 2959bfa0eb54..85195c18b2e8 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -98,9 +98,9 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, else if (flags & SET_MEMORY_RW) new = pte_mkwrite(pte_mkdirty(new)); if (flags & SET_MEMORY_NX) - pte_val(new) |= _PAGE_NOEXEC; + new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC)); else if (flags & SET_MEMORY_X) - pte_val(new) &= ~_PAGE_NOEXEC; + new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); ptep++; addr += PAGE_SIZE; @@ -131,7 +131,7 @@ static int split_pmd_page(pmd_t *pmdp, unsigned long addr) pte_addr += PAGE_SIZE; ptep++; } - pmd_val(new) = __pa(pt_dir) | _SEGMENT_ENTRY; + new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY); pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE); update_page_count(PG_DIRECT_MAP_1M, -1); @@ -148,9 +148,9 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, else if (flags & SET_MEMORY_RW) new = pmd_mkwrite(pmd_mkdirty(new)); if (flags & SET_MEMORY_NX) - pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC; + new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); else if (flags & SET_MEMORY_X) - pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC; + new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); } @@ -212,7 +212,7 @@ static int split_pud_page(pud_t *pudp, unsigned long addr) pmd_addr += PMD_SIZE; pmdp++; } - pud_val(new) = __pa(pm_dir) | _REGION3_ENTRY; + new = __pud(__pa(pm_dir) | _REGION3_ENTRY); pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD); update_page_count(PG_DIRECT_MAP_2G, -1); @@ -229,9 +229,9 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, else if (flags & SET_MEMORY_RW) new = pud_mkwrite(pud_mkdirty(new)); if (flags & SET_MEMORY_NX) - pud_val(new) |= _REGION_ENTRY_NOEXEC; + new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); else if (flags & SET_MEMORY_X) - pud_val(new) &= ~_REGION_ENTRY_NOEXEC; + new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); } From 933b7253adf61eb4b7be2e7b1ec64217a5d8434c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 21:24:51 +0100 Subject: [PATCH 32/69] s390/mm,hugetlb: don't use pte_val()/pXd_val() as lvalue Convert pgtable code so pte_val()/pXd_val() aren't used as lvalue anymore. This allows in later step to convert pte_val()/pXd_val() to functions, which in turn makes it impossible to use these macros to modify page table entries like they have been used before. Therefore a construct like this: pte_val(*pte) = __pa(addr) | prot; which would directly write into a page table, isn't possible anymore with the last step of this series. Reviewed-by: Claudio Imbrenda Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/hugetlbpage.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 082b72d29bb5..10e51ef9c79a 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -73,8 +73,8 @@ static inline unsigned long __pte_to_rste(pte_t pte) static inline pte_t __rste_to_pte(unsigned long rste) { + unsigned long pteval; int present; - pte_t pte; if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) present = pud_present(__pud(rste)); @@ -102,29 +102,21 @@ static inline pte_t __rste_to_pte(unsigned long rste) * u unused, l large */ if (present) { - pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; - pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ, - _PAGE_READ); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, - _PAGE_WRITE); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, - _PAGE_INVALID); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, - _PAGE_PROTECT); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, - _PAGE_DIRTY); - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, - _PAGE_YOUNG); + pteval = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; + pteval |= _PAGE_LARGE | _PAGE_PRESENT; + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_READ, _PAGE_READ); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, _PAGE_WRITE); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, _PAGE_INVALID); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, _PAGE_PROTECT); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, _PAGE_DIRTY); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, _PAGE_YOUNG); #ifdef CONFIG_MEM_SOFT_DIRTY - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, - _PAGE_SOFT_DIRTY); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY); #endif - pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, - _PAGE_NOEXEC); + pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); } else - pte_val(pte) = _PAGE_INVALID; - return pte; + pteval = _PAGE_INVALID; + return __pte(pteval); } static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) From e1fc74ff23463f308709b824eed0d73e0a345f4e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 21:25:09 +0100 Subject: [PATCH 33/69] s390/mm,gmap: don't use pte_val()/pXd_val() as lvalue Convert pgtable code so pte_val()/pXd_val() aren't used as lvalue anymore. This allows in later step to convert pte_val()/pXd_val() to functions, which in turn makes it impossible to use these macros to modify page table entries like they have been used before. Therefore a construct like this: pte_val(*pte) = __pa(addr) | prot; which would directly write into a page table, isn't possible anymore with the last step of this series. Reviewed-by: Claudio Imbrenda Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/gmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 8cdc77fee41c..a57224a4c141 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -974,13 +974,13 @@ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, return -EAGAIN; if (prot == PROT_NONE && !pmd_i) { - pmd_val(new) |= _SEGMENT_ENTRY_INVALID; + new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); gmap_pmdp_xchg(gmap, pmdp, new, gaddr); } if (prot == PROT_READ && !pmd_p) { - pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID; - pmd_val(new) |= _SEGMENT_ENTRY_PROTECT; + new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); + new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT)); gmap_pmdp_xchg(gmap, pmdp, new, gaddr); } @@ -2294,7 +2294,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, { gaddr &= HPAGE_MASK; pmdp_notify_gmap(gmap, pmdp, gaddr); - pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN; + new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); From 966ffbd8615a5c1b0bf474314bbd9ea8eb6ef423 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Feb 2022 21:25:19 +0100 Subject: [PATCH 34/69] s390/mm: convert pte_val()/pXd_val() into functions Disallow constructs like this: pte_val(*pte) = __pa(addr) | prot; which would directly write into a page table. Users are supposed to use the set_pte()/set_pXd() primitives, which guarantee block concurrent (aka atomic) writes. Reviewed-by: Claudio Imbrenda Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/page.h | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index d98d17a36c7b..5a831a6b8fc9 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -90,11 +90,31 @@ typedef pte_t *pgtable_t; #define pgprot_val(x) ((x).pgprot) #define pgste_val(x) ((x).pgste) -#define pte_val(x) ((x).pte) -#define pmd_val(x) ((x).pmd) -#define pud_val(x) ((x).pud) -#define p4d_val(x) ((x).p4d) -#define pgd_val(x) ((x).pgd) + +static inline unsigned long pte_val(pte_t pte) +{ + return pte.pte; +} + +static inline unsigned long pmd_val(pmd_t pmd) +{ + return pmd.pmd; +} + +static inline unsigned long pud_val(pud_t pud) +{ + return pud.pud; +} + +static inline unsigned long p4d_val(p4d_t p4d) +{ + return p4d.p4d; +} + +static inline unsigned long pgd_val(pgd_t pgd) +{ + return pgd.pgd; +} #define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) From d64e5e9120a6afc8ebb9e9b46c1302f13b16b68d Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 11 Nov 2021 14:31:46 +0100 Subject: [PATCH 35/69] s390/ap/zcrypt: debug feature improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds some debug feature improvements related to some failures happened in the past. With CEX8 the max request and response sizes have been extended but the user space applications did not rework their code and thus ran into receive buffer issues. This ffdc patch here helps with additional checks and debug feature messages in debugging and pointing to the root cause of some failures related to wrong buffer sizes. Signed-off-by: Harald Freudenberger Reviewed-by: Jürgen Christ Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_queue.c | 3 ++- drivers/s390/crypto/zcrypt_api.c | 19 +++++++++++++++ drivers/s390/crypto/zcrypt_msgtype50.c | 8 +++++++ drivers/s390/crypto/zcrypt_msgtype6.c | 32 +++++++++++++++++++++++--- 4 files changed, 58 insertions(+), 4 deletions(-) diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 1901449768dd..921a82a41e66 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -455,7 +455,7 @@ static ap_func_t *ap_jumptable[NR_AP_SM_STATES][NR_AP_SM_EVENTS] = { enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event) { - if (aq->dev_state > AP_DEV_STATE_UNINITIATED) + if (aq->config && aq->dev_state > AP_DEV_STATE_UNINITIATED) return ap_jumptable[aq->sm_state][event](aq); else return AP_SM_WAIT_NONE; @@ -915,6 +915,7 @@ void ap_queue_init_state(struct ap_queue *aq) spin_lock_bh(&aq->lock); aq->dev_state = AP_DEV_STATE_OPERATING; aq->sm_state = AP_SM_STATE_RESET_START; + aq->last_err_rc = 0; ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL)); spin_unlock_bh(&aq->lock); } diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 9811ab81f3c4..00f6859c4e72 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -714,6 +714,8 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n", + __func__); rc = -ENODEV; goto out; } @@ -822,6 +824,8 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n", + __func__); rc = -ENODEV; goto out; } @@ -940,6 +944,8 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + ZCRYPT_DBF_DBG("%s no match for address %02x.%04x => ENODEV\n", + __func__, xcRB->user_defined, *domain); rc = -ENODEV; goto out; } @@ -1112,6 +1118,17 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + if (targets && target_num == 1) { + ZCRYPT_DBF_DBG("%s no match for address %02x.%04x => ENODEV\n", + __func__, (int) targets->ap_id, + (int) targets->dom_id); + } else if (targets) { + ZCRYPT_DBF_DBG("%s no match for %d target addrs => ENODEV\n", + __func__, (int) target_num); + } else { + ZCRYPT_DBF_DBG("%s no match for address ff.ffff => ENODEV\n", + __func__); + } rc = -ENODEV; goto out_free; } @@ -1188,6 +1205,8 @@ static long zcrypt_rng(char *buffer) spin_unlock(&zcrypt_list_lock); if (!pref_zq) { + ZCRYPT_DBF_DBG("%s no matching queue found => ENODEV\n", + __func__); rc = -ENODEV; goto out; } diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index f42e8c511184..b6dcdd2a66d4 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -497,6 +497,10 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, ap_cancel_message(zq->queue, ap_msg); out: ap_msg->private = NULL; + if (rc) + ZCRYPT_DBF_DBG("%s send me cprb at dev=%02x.%04x rc=%d\n", + __func__, AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), rc); return rc; } @@ -542,6 +546,10 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, ap_cancel_message(zq->queue, ap_msg); out: ap_msg->private = NULL; + if (rc) + ZCRYPT_DBF_DBG("%s send crt cprb at dev=%02x.%04x rc=%d\n", + __func__, AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), rc); return rc; } diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 8582dd0d6969..df283729191a 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -714,17 +714,31 @@ static int convert_type86_xcrb(bool userspace, struct zcrypt_queue *zq, char *data = reply->msg; /* Copy CPRB to user */ + if (xcRB->reply_control_blk_length < msg->fmt2.count1) { + ZCRYPT_DBF_DBG("%s reply_control_blk_length %u < required %u => EMSGSIZE\n", + __func__, xcRB->reply_control_blk_length, + msg->fmt2.count1); + return -EMSGSIZE; + } if (z_copy_to_user(userspace, xcRB->reply_control_blk_addr, data + msg->fmt2.offset1, msg->fmt2.count1)) return -EFAULT; xcRB->reply_control_blk_length = msg->fmt2.count1; /* Copy data buffer to user */ - if (msg->fmt2.count2) + if (msg->fmt2.count2) { + if (xcRB->reply_data_length < msg->fmt2.count2) { + ZCRYPT_DBF_DBG("%s reply_data_length %u < required %u => EMSGSIZE\n", + __func__, xcRB->reply_data_length, + msg->fmt2.count2); + return -EMSGSIZE; + } if (z_copy_to_user(userspace, xcRB->reply_data_addr, data + msg->fmt2.offset2, msg->fmt2.count2)) return -EFAULT; + } xcRB->reply_data_length = msg->fmt2.count2; + return 0; } @@ -744,8 +758,12 @@ static int convert_type86_ep11_xcrb(bool userspace, struct zcrypt_queue *zq, struct type86_fmt2_msg *msg = reply->msg; char *data = reply->msg; - if (xcRB->resp_len < msg->fmt2.count1) - return -EINVAL; + if (xcRB->resp_len < msg->fmt2.count1) { + ZCRYPT_DBF_DBG("%s resp_len %u < required %u => EMSGSIZE\n", + __func__, (unsigned int)xcRB->resp_len, + msg->fmt2.count1); + return -EMSGSIZE; + } /* Copy response CPRB to user */ if (z_copy_to_user(userspace, (char __force __user *)xcRB->resp, @@ -1167,6 +1185,10 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq, /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); out: + if (rc) + ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n", + __func__, AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), rc); return rc; } @@ -1272,6 +1294,10 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); out: + if (rc) + ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n", + __func__, AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), rc); return rc; } From 985214af939b9935dac94aa6fb56c85039fb77e8 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 16 Nov 2021 14:54:19 +0100 Subject: [PATCH 36/69] s390/zcrypt: CEX8S exploitation support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds CEX8 exploitation support for the AP bus code, the zcrypt device driver zoo and the vfio device driver. Signed-off-by: Harald Freudenberger Reviewed-by: Jürgen Christ Signed-off-by: Vasily Gorbik --- arch/s390/include/uapi/asm/zcrypt.h | 2 +- drivers/s390/crypto/ap_bus.c | 12 ++--- drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/vfio_ap_drv.c | 2 + drivers/s390/crypto/zcrypt_cex4.c | 75 ++++++++++++++++++++--------- 5 files changed, 63 insertions(+), 29 deletions(-) diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 22fd202856bc..2f04a5499d74 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -288,7 +288,7 @@ struct zcrypt_device_matrix_ext { * 0x08: CEX3A * 0x0a: CEX4 * 0x0b: CEX5 - * 0x0c: CEX6 and CEX7 + * 0x0c: CEX6, CEX7 or CEX8 * 0x0d: device is disabled * * ZCRYPT_QDEPTH_MASK diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 555cc3394fe3..0f8f4230dbc2 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1589,24 +1589,24 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func) AP_QID_QUEUE(qid), rawtype); return 0; } - /* up to CEX7 known and fully supported */ - if (rawtype <= AP_DEVICE_TYPE_CEX7) + /* up to CEX8 known and fully supported */ + if (rawtype <= AP_DEVICE_TYPE_CEX8) return rawtype; /* - * unknown new type > CEX7, check for compatibility + * unknown new type > CEX8, check for compatibility * to the highest known and supported type which is - * currently CEX7 with the help of the QACT function. + * currently CEX8 with the help of the QACT function. */ if (ap_qact_available()) { struct ap_queue_status status; union ap_qact_ap_info apinfo = {0}; apinfo.mode = (func >> 26) & 0x07; - apinfo.cat = AP_DEVICE_TYPE_CEX7; + apinfo.cat = AP_DEVICE_TYPE_CEX8; status = ap_qact(qid, 0, &apinfo); if (status.response_code == AP_RESPONSE_NORMAL && apinfo.cat >= AP_DEVICE_TYPE_CEX2A - && apinfo.cat <= AP_DEVICE_TYPE_CEX7) + && apinfo.cat <= AP_DEVICE_TYPE_CEX8) comp_type = apinfo.cat; } if (!comp_type) diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index dc6f563e8787..5700d4024681 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -70,6 +70,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_DEVICE_TYPE_CEX5 11 #define AP_DEVICE_TYPE_CEX6 12 #define AP_DEVICE_TYPE_CEX7 13 +#define AP_DEVICE_TYPE_CEX8 14 /* * Known function facilities diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index 82b6ffee06c5..29ebd54f8919 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -41,6 +41,8 @@ static struct ap_device_id ap_queue_ids[] = { .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { .dev_type = AP_DEVICE_TYPE_CEX7, .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, + { .dev_type = AP_DEVICE_TYPE_CEX8, + .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { /* end of sibling */ }, }; diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index 06024bbe9a58..fe5664c7589e 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright IBM Corp. 2012, 2019 + * Copyright IBM Corp. 2012, 2022 * Author(s): Holger Dengler */ @@ -36,8 +36,8 @@ #define CEX4_CLEANUP_TIME (900*HZ) MODULE_AUTHOR("IBM Corporation"); -MODULE_DESCRIPTION("CEX4/CEX5/CEX6/CEX7 Cryptographic Card device driver, " \ - "Copyright IBM Corp. 2019"); +MODULE_DESCRIPTION("CEX[45678] Cryptographic Card device driver, " \ + "Copyright IBM Corp. 2022"); MODULE_LICENSE("GPL"); static struct ap_device_id zcrypt_cex4_card_ids[] = { @@ -49,6 +49,8 @@ static struct ap_device_id zcrypt_cex4_card_ids[] = { .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, { .dev_type = AP_DEVICE_TYPE_CEX7, .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, + { .dev_type = AP_DEVICE_TYPE_CEX8, + .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE }, { /* end of list */ }, }; @@ -63,6 +65,8 @@ static struct ap_device_id zcrypt_cex4_queue_ids[] = { .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { .dev_type = AP_DEVICE_TYPE_CEX7, .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, + { .dev_type = AP_DEVICE_TYPE_CEX8, + .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE }, { /* end of list */ }, }; @@ -395,7 +399,7 @@ static const struct attribute_group ep11_queue_attr_grp = { }; /* - * Probe function for CEX4/CEX5/CEX6/CEX7 card device. It always + * Probe function for CEX[45678] card device. It always * accepts the AP device since the bus_match already checked * the hardware type. * @ap_dev: pointer to the AP device. @@ -414,6 +418,8 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) 6, 9, 20, 17, 65, 438, 0, 0}; static const int CEX7A_SPEED_IDX[NUM_OPS] = { 6, 8, 17, 15, 54, 362, 0, 0}; + static const int CEX8A_SPEED_IDX[NUM_OPS] = { + 6, 8, 17, 15, 54, 362, 0, 0}; static const int CEX4C_SPEED_IDX[NUM_OPS] = { 59, 69, 308, 83, 278, 2204, 209, 40}; @@ -423,6 +429,8 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) 16, 20, 32, 27, 77, 455, 24, 9}; static const int CEX7C_SPEED_IDX[NUM_OPS] = { 14, 16, 26, 23, 64, 376, 23, 8}; + static const int CEX8C_SPEED_IDX[NUM_OPS] = { + 14, 16, 26, 23, 64, 376, 23, 8}; static const int CEX4P_SPEED_IDX[NUM_OPS] = { 0, 0, 0, 0, 0, 0, 0, 50}; @@ -432,6 +440,8 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) 0, 0, 0, 0, 0, 0, 0, 9}; static const int CEX7P_SPEED_IDX[NUM_OPS] = { 0, 0, 0, 0, 0, 0, 0, 8}; + static const int CEX8P_SPEED_IDX[NUM_OPS] = { + 0, 0, 0, 0, 0, 0, 0, 8}; struct ap_card *ac = to_ap_card(&ap_dev->device); struct zcrypt_card *zc; @@ -455,13 +465,20 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) zc->type_string = "CEX6A"; zc->user_space_type = ZCRYPT_CEX6; zc->speed_rating = CEX6A_SPEED_IDX; - } else { + } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX7) { zc->type_string = "CEX7A"; + zc->speed_rating = CEX7A_SPEED_IDX; + /* wrong user space type, just for compatibility + * with the ZCRYPT_STATUS_MASK ioctl. + */ + zc->user_space_type = ZCRYPT_CEX6; + } else { + zc->type_string = "CEX8A"; + zc->speed_rating = CEX8A_SPEED_IDX; /* wrong user space type, just for compatibility * with the ZCRYPT_STATUS_MASK ioctl. */ zc->user_space_type = ZCRYPT_CEX6; - zc->speed_rating = CEX7A_SPEED_IDX; } zc->min_mod_size = CEX4A_MIN_MOD_SIZE; if (ap_test_bit(&ac->functions, AP_FUNC_MEX4K) && @@ -477,32 +494,39 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) } else if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) { if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX4) { zc->type_string = "CEX4C"; - /* wrong user space type, must be CEX4 + zc->speed_rating = CEX4C_SPEED_IDX; + /* wrong user space type, must be CEX3C * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; - zc->speed_rating = CEX4C_SPEED_IDX; } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX5) { zc->type_string = "CEX5C"; - /* wrong user space type, must be CEX5 + zc->speed_rating = CEX5C_SPEED_IDX; + /* wrong user space type, must be CEX3C * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; - zc->speed_rating = CEX5C_SPEED_IDX; } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) { zc->type_string = "CEX6C"; - /* wrong user space type, must be CEX6 - * just keep it for cca compatibility - */ - zc->user_space_type = ZCRYPT_CEX3C; zc->speed_rating = CEX6C_SPEED_IDX; - } else { - zc->type_string = "CEX7C"; - /* wrong user space type, must be CEX7 + /* wrong user space type, must be CEX3C * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; + } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX7) { + zc->type_string = "CEX7C"; zc->speed_rating = CEX7C_SPEED_IDX; + /* wrong user space type, must be CEX3C + * just keep it for cca compatibility + */ + zc->user_space_type = ZCRYPT_CEX3C; + } else { + zc->type_string = "CEX8C"; + zc->speed_rating = CEX8C_SPEED_IDX; + /* wrong user space type, must be CEX3C + * just keep it for cca compatibility + */ + zc->user_space_type = ZCRYPT_CEX3C; } zc->min_mod_size = CEX4C_MIN_MOD_SIZE; zc->max_mod_size = CEX4C_MAX_MOD_SIZE; @@ -520,13 +544,20 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) zc->type_string = "CEX6P"; zc->user_space_type = ZCRYPT_CEX6; zc->speed_rating = CEX6P_SPEED_IDX; - } else { + } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX7) { zc->type_string = "CEX7P"; + zc->speed_rating = CEX7P_SPEED_IDX; + /* wrong user space type, just for compatibility + * with the ZCRYPT_STATUS_MASK ioctl. + */ + zc->user_space_type = ZCRYPT_CEX6; + } else { + zc->type_string = "CEX8P"; + zc->speed_rating = CEX8P_SPEED_IDX; /* wrong user space type, just for compatibility * with the ZCRYPT_STATUS_MASK ioctl. */ zc->user_space_type = ZCRYPT_CEX6; - zc->speed_rating = CEX7P_SPEED_IDX; } zc->min_mod_size = CEX4C_MIN_MOD_SIZE; zc->max_mod_size = CEX4C_MAX_MOD_SIZE; @@ -563,7 +594,7 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) } /* - * This is called to remove the CEX4/CEX5/CEX6/CEX7 card driver + * This is called to remove the CEX[45678] card driver * information if an AP card device is removed. */ static void zcrypt_cex4_card_remove(struct ap_device *ap_dev) @@ -587,7 +618,7 @@ static struct ap_driver zcrypt_cex4_card_driver = { }; /* - * Probe function for CEX4/CEX5/CEX6/CEX7 queue device. It always + * Probe function for CEX[45678] queue device. It always * accepts the AP device since the bus_match already checked * the hardware type. * @ap_dev: pointer to the AP device. @@ -653,7 +684,7 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev) } /* - * This is called to remove the CEX4/CEX5/CEX6/CEX7 queue driver + * This is called to remove the CEX[45678] queue driver * information if an AP queue device is removed. */ static void zcrypt_cex4_queue_remove(struct ap_device *ap_dev) From a7e701dba1234adbfbacad5ce19656c5606728da Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 17 Nov 2021 15:38:39 +0100 Subject: [PATCH 37/69] s390/zcrypt: handle checkstopped cards with new state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A crypto card may be in checkstopped state. With this patch this is handled as a new state in the ap card and ap queue structs. There is also a new card sysfs attribute /sys/devices/ap/cardxx/chkstop and a new queue sysfs attribute /sys/devices/ap/cardxx/xx.yyyy/chkstop displaying the checkstop state of the card or queue. Please note that the queue's checkstop state is only a copy of the card's checkstop state but makes maintenance much easier. The checkstop state expressed here is the result of an RC 0x04 (CHECKSTOP) during an AP command, mostly the PQAP(TAPQ) command which is 'testing' the queue. Signed-off-by: Harald Freudenberger Reviewed-by: Jürgen Christ Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 78 ++++++++++++++++++++++++++------ drivers/s390/crypto/ap_bus.h | 2 + drivers/s390/crypto/ap_card.c | 11 +++++ drivers/s390/crypto/ap_queue.c | 18 +++++++- drivers/s390/crypto/zcrypt_api.c | 26 +++++------ 5 files changed, 105 insertions(+), 30 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 0f8f4230dbc2..fdf16cb70881 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -323,7 +323,7 @@ EXPORT_SYMBOL(ap_test_config_ctrl_domain); * false otherwise. */ static bool ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, - int *q_depth, int *q_ml, bool *q_decfg) + int *q_depth, int *q_ml, bool *q_decfg, bool *q_cstop) { struct ap_queue_status status; union { @@ -366,6 +366,7 @@ static bool ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac, *q_depth = tapq_info.tapq_gr2.qd; *q_ml = tapq_info.tapq_gr2.ml; *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED; + *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED; switch (*q_type) { /* For CEX2 and CEX3 the available functions * are not reflected by the facilities bits. @@ -1710,7 +1711,7 @@ static inline void ap_scan_rm_card_dev_and_queue_devs(struct ap_card *ac) */ static inline void ap_scan_domains(struct ap_card *ac) { - bool decfg; + bool decfg, chkstop; ap_qid_t qid; unsigned int func; struct device *dev; @@ -1739,7 +1740,8 @@ static inline void ap_scan_domains(struct ap_card *ac) continue; } /* domain is valid, get info from this APQN */ - if (!ap_queue_info(qid, &type, &func, &depth, &ml, &decfg)) { + if (!ap_queue_info(qid, &type, &func, &depth, + &ml, &decfg, &chkstop)) { if (aq) { AP_DBF_INFO("%s(%d,%d) queue_info() failed, rm queue dev\n", __func__, ac->id, dom); @@ -1758,6 +1760,7 @@ static inline void ap_scan_domains(struct ap_card *ac) } aq->card = ac; aq->config = !decfg; + aq->chkstop = chkstop; dev = &aq->ap_dev.device; dev->bus = &ap_bus_type; dev->parent = &ac->ap_dev.device; @@ -1774,13 +1777,43 @@ static inline void ap_scan_domains(struct ap_card *ac) if (decfg) AP_DBF_INFO("%s(%d,%d) new (decfg) queue dev created\n", __func__, ac->id, dom); + else if (chkstop) + AP_DBF_INFO("%s(%d,%d) new (chkstop) queue dev created\n", + __func__, ac->id, dom); else AP_DBF_INFO("%s(%d,%d) new queue dev created\n", __func__, ac->id, dom); goto put_dev_and_continue; } - /* Check config state on the already existing queue device */ + /* handle state changes on already existing queue device */ spin_lock_bh(&aq->lock); + /* checkstop state */ + if (chkstop && !aq->chkstop) { + /* checkstop on */ + aq->chkstop = true; + if (aq->dev_state > AP_DEV_STATE_UNINITIATED) { + aq->dev_state = AP_DEV_STATE_ERROR; + aq->last_err_rc = AP_RESPONSE_CHECKSTOPPED; + } + spin_unlock_bh(&aq->lock); + AP_DBF_DBG("%s(%d,%d) queue dev checkstop on\n", + __func__, ac->id, dom); + /* 'receive' pending messages with -EAGAIN */ + ap_flush_queue(aq); + goto put_dev_and_continue; + } else if (!chkstop && aq->chkstop) { + /* checkstop off */ + aq->chkstop = false; + if (aq->dev_state > AP_DEV_STATE_UNINITIATED) { + aq->dev_state = AP_DEV_STATE_OPERATING; + aq->sm_state = AP_SM_STATE_RESET_START; + } + spin_unlock_bh(&aq->lock); + AP_DBF_DBG("%s(%d,%d) queue dev checkstop off\n", + __func__, ac->id, dom); + goto put_dev_and_continue; + } + /* config state change */ if (decfg && aq->config) { /* config off this queue device */ aq->config = false; @@ -1789,14 +1822,13 @@ static inline void ap_scan_domains(struct ap_card *ac) aq->last_err_rc = AP_RESPONSE_DECONFIGURED; } spin_unlock_bh(&aq->lock); - AP_DBF_INFO("%s(%d,%d) queue dev config off\n", - __func__, ac->id, dom); + AP_DBF_DBG("%s(%d,%d) queue dev config off\n", + __func__, ac->id, dom); ap_send_config_uevent(&aq->ap_dev, aq->config); /* 'receive' pending messages with -EAGAIN */ ap_flush_queue(aq); goto put_dev_and_continue; - } - if (!decfg && !aq->config) { + } else if (!decfg && !aq->config) { /* config on this queue device */ aq->config = true; if (aq->dev_state > AP_DEV_STATE_UNINITIATED) { @@ -1804,8 +1836,8 @@ static inline void ap_scan_domains(struct ap_card *ac) aq->sm_state = AP_SM_STATE_RESET_START; } spin_unlock_bh(&aq->lock); - AP_DBF_INFO("%s(%d,%d) queue dev config on\n", - __func__, ac->id, dom); + AP_DBF_DBG("%s(%d,%d) queue dev config on\n", + __func__, ac->id, dom); ap_send_config_uevent(&aq->ap_dev, aq->config); goto put_dev_and_continue; } @@ -1832,7 +1864,7 @@ put_dev_and_continue: */ static inline void ap_scan_adapter(int ap) { - bool decfg; + bool decfg, chkstop; ap_qid_t qid; unsigned int func; struct device *dev; @@ -1866,8 +1898,8 @@ static inline void ap_scan_adapter(int ap) for (dom = 0; dom <= ap_max_domain_id; dom++) if (ap_test_config_usage_domain(dom)) { qid = AP_MKQID(ap, dom); - if (ap_queue_info(qid, &type, &func, - &depth, &ml, &decfg)) + if (ap_queue_info(qid, &type, &func, &depth, + &ml, &decfg, &chkstop)) break; } if (dom > ap_max_domain_id) { @@ -1912,13 +1944,25 @@ static inline void ap_scan_adapter(int ap) put_device(dev); ac = NULL; } else { + /* handle checkstop state change */ + if (chkstop && !ac->chkstop) { + /* checkstop on */ + ac->chkstop = true; + AP_DBF_INFO("%s(%d) card dev checkstop on\n", + __func__, ap); + } else if (!chkstop && ac->chkstop) { + /* checkstop off */ + ac->chkstop = false; + AP_DBF_INFO("%s(%d) card dev checkstop off\n", + __func__, ap); + } + /* handle config state change */ if (decfg && ac->config) { ac->config = false; AP_DBF_INFO("%s(%d) card dev config off\n", __func__, ap); ap_send_config_uevent(&ac->ap_dev, ac->config); - } - if (!decfg && !ac->config) { + } else if (!decfg && !ac->config) { ac->config = true; AP_DBF_INFO("%s(%d) card dev config on\n", __func__, ap); @@ -1942,6 +1986,7 @@ static inline void ap_scan_adapter(int ap) return; } ac->config = !decfg; + ac->chkstop = chkstop; dev = &ac->ap_dev.device; dev->bus = &ap_bus_type; dev->parent = ap_root_device; @@ -1966,6 +2011,9 @@ static inline void ap_scan_adapter(int ap) if (decfg) AP_DBF_INFO("%s(%d) new (decfg) card dev type=%d func=0x%08x created\n", __func__, ap, type, func); + else if (chkstop) + AP_DBF_INFO("%s(%d) new (chkstop) card dev type=%d func=0x%08x created\n", + __func__, ap, type, func); else AP_DBF_INFO("%s(%d) new card dev type=%d func=0x%08x created\n", __func__, ap, type, func); diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 5700d4024681..703d6782ce65 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -179,6 +179,7 @@ struct ap_card { int id; /* AP card number. */ unsigned int maxmsgsize; /* AP msg limit for this card */ bool config; /* configured state */ + bool chkstop; /* checkstop state */ atomic64_t total_request_count; /* # requests ever for this AP device.*/ }; @@ -191,6 +192,7 @@ struct ap_queue { spinlock_t lock; /* Per device lock. */ enum ap_dev_state dev_state; /* queue device state */ bool config; /* configured state */ + bool chkstop; /* checkstop state */ ap_qid_t qid; /* AP queue id. */ bool interrupt; /* indicate if interrupts are enabled */ int queue_count; /* # messages currently on AP queue. */ diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c index 196325a66662..6b2170cf186e 100644 --- a/drivers/s390/crypto/ap_card.c +++ b/drivers/s390/crypto/ap_card.c @@ -174,6 +174,16 @@ static ssize_t config_store(struct device *dev, static DEVICE_ATTR_RW(config); +static ssize_t chkstop_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_card *ac = to_ap_card(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", ac->chkstop ? 1 : 0); +} + +static DEVICE_ATTR_RO(chkstop); + static ssize_t max_msg_size_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -194,6 +204,7 @@ static struct attribute *ap_card_dev_attrs[] = { &dev_attr_pendingq_count.attr, &dev_attr_modalias.attr, &dev_attr_config.attr, + &dev_attr_chkstop.attr, &dev_attr_max_msg_size.attr, NULL }; diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 921a82a41e66..205045cd998d 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -455,7 +455,8 @@ static ap_func_t *ap_jumptable[NR_AP_SM_STATES][NR_AP_SM_EVENTS] = { enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event) { - if (aq->config && aq->dev_state > AP_DEV_STATE_UNINITIATED) + if (aq->config && !aq->chkstop && + aq->dev_state > AP_DEV_STATE_UNINITIATED) return ap_jumptable[aq->sm_state][event](aq); else return AP_SM_WAIT_NONE; @@ -615,6 +616,20 @@ static ssize_t config_show(struct device *dev, static DEVICE_ATTR_RO(config); +static ssize_t chkstop_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_queue *aq = to_ap_queue(dev); + int rc; + + spin_lock_bh(&aq->lock); + rc = scnprintf(buf, PAGE_SIZE, "%d\n", aq->chkstop ? 1 : 0); + spin_unlock_bh(&aq->lock); + return rc; +} + +static DEVICE_ATTR_RO(chkstop); + #ifdef CONFIG_ZCRYPT_DEBUG static ssize_t states_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -729,6 +744,7 @@ static struct attribute *ap_queue_dev_attrs[] = { &dev_attr_reset.attr, &dev_attr_interrupt.attr, &dev_attr_config.attr, + &dev_attr_chkstop.attr, #ifdef CONFIG_ZCRYPT_DEBUG &dev_attr_states.attr, &dev_attr_last_err_rc.attr, diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 00f6859c4e72..8b81e6385d55 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -671,7 +671,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable accelarator or CCA card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x18000000)) continue; /* Check for size limits */ @@ -692,7 +692,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, for_each_zcrypt_queue(zq, zc) { /* check if device is useable and eligible */ if (!zq->online || !zq->ops->rsa_modexpo || - !zq->queue->config) + !zq->queue->config || zq->queue->chkstop) continue; /* check if device node has admission for this queue */ if (!zcrypt_check_queue(perms, @@ -781,7 +781,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable accelarator or CCA card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x18000000)) continue; /* Check for size limits */ @@ -802,7 +802,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, for_each_zcrypt_queue(zq, zc) { /* check if device is useable and eligible */ if (!zq->online || !zq->ops->rsa_modexpo_crt || - !zq->queue->config) + !zq->queue->config || zq->queue->chkstop) continue; /* check if device node has admission for this queue */ if (!zcrypt_check_queue(perms, @@ -895,7 +895,7 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable CCA card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x10000000)) continue; /* Check for user selected CCA card */ @@ -918,9 +918,8 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, continue; for_each_zcrypt_queue(zq, zc) { /* check for device useable and eligible */ - if (!zq->online || - !zq->ops->send_cprb || - !zq->queue->config || + if (!zq->online || !zq->ops->send_cprb || + !zq->queue->config || zq->queue->chkstop || (tdom != AUTOSEL_DOM && tdom != AP_QID_QUEUE(zq->queue->qid))) continue; @@ -1068,7 +1067,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable EP11 card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x04000000)) continue; /* Check for user selected EP11 card */ @@ -1091,9 +1090,8 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, continue; for_each_zcrypt_queue(zq, zc) { /* check if device is useable and eligible */ - if (!zq->online || - !zq->ops->send_ep11_cprb || - !zq->queue->config || + if (!zq->online || !zq->ops->send_ep11_cprb || + !zq->queue->config || zq->queue->chkstop || (targets && !is_desired_ep11_queue(zq->queue->qid, target_num, targets))) @@ -1182,7 +1180,7 @@ static long zcrypt_rng(char *buffer) spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { /* Check for useable CCA card */ - if (!zc->online || !zc->card->config || + if (!zc->online || !zc->card->config || zc->card->chkstop || !(zc->card->functions & 0x10000000)) continue; /* get weight index of the card device */ @@ -1192,7 +1190,7 @@ static long zcrypt_rng(char *buffer) for_each_zcrypt_queue(zq, zc) { /* check if device is useable and eligible */ if (!zq->online || !zq->ops->rng || - !zq->queue->config) + !zq->queue->config || zq->queue->chkstop) continue; if (!zcrypt_queue_compare(zq, pref_zq, wgt, pref_wgt)) continue; From 383366b58016361cc8a2e4c585b7d581eb76263a Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 23 Nov 2021 15:16:06 +0100 Subject: [PATCH 38/69] s390/zcrypt: Support CPRB minor version T7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a new CPRB minor version T7 to be supported with this patch. Together with this the functions which extract the CPRB data from userspace and prepare the AP message do now check the CPRB minor version and provide some info in the flag field of the ap message struct for further processing. The 3 functions doing this job have been renamed to prep_cca_ap_msg, prep_ep11_ap_msg and prep_rng_ap_msg to reflect their job better (old was get..fc). This patch also introduces two new flags to be used internal with the flag field of the struct ap_message: AP_MSG_FLAG_USAGE is set when prep_cca_ap_msg or prep_ep11_ap_msg come to the conclusion that this is a ordinary crypto load CPRB (which means T2 for CCA CPRBs and no admin bit for EP11 CPRBs). AP_MSG_FLAG_ADMIN is set when prep_cca_ap_msg or prep_ep11_ap_msg think, this is an administrative (control) crypto load CPRB (which means T3, T5, T6 or T7 for CCA CPRBs and admin bit set for EP11 CPRBs). Signed-off-by: Harald Freudenberger Reviewed-by: Jürgen Christ Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.h | 4 +- drivers/s390/crypto/zcrypt_api.c | 6 +-- drivers/s390/crypto/zcrypt_msgtype50.c | 4 +- drivers/s390/crypto/zcrypt_msgtype50.h | 4 +- drivers/s390/crypto/zcrypt_msgtype6.c | 56 +++++++++++++++++++------- drivers/s390/crypto/zcrypt_msgtype6.h | 13 +++--- 6 files changed, 60 insertions(+), 27 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 703d6782ce65..8fd5a17bdf99 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -251,7 +251,9 @@ struct ap_message { struct ap_message *); }; -#define AP_MSG_FLAG_SPECIAL 1 /* flag msg as 'special' with NQAP */ +#define AP_MSG_FLAG_SPECIAL 0x0001 /* flag msg as 'special' with NQAP */ +#define AP_MSG_FLAG_USAGE 0x0002 /* CCA, EP11: usage (no admin) msg */ +#define AP_MSG_FLAG_ADMIN 0x0004 /* CCA, EP11: admin (=control) msg */ /** * ap_init_message() - Initialize ap_message. diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 8b81e6385d55..af3e83541886 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -876,7 +876,7 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, } #endif - rc = get_cprb_fc(userspace, xcRB, &ap_msg, &func_code, &domain); + rc = prep_cca_ap_msg(userspace, xcRB, &ap_msg, &func_code, &domain); if (rc) goto out; @@ -1058,7 +1058,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, } } - rc = get_ep11cprb_fc(userspace, xcrb, &ap_msg, &func_code); + rc = prep_ep11_ap_msg(userspace, xcrb, &ap_msg, &func_code); if (rc) goto out_free; @@ -1171,7 +1171,7 @@ static long zcrypt_rng(char *buffer) trace_s390_zcrypt_req(buffer, TP_HWRNGCPRB); ap_init_message(&ap_msg); - rc = get_rng_fc(&ap_msg, &func_code, &domain); + rc = prep_rng_ap_msg(&ap_msg, &func_code, &domain); if (rc) goto out; diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index b6dcdd2a66d4..259145aa393f 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -156,7 +156,7 @@ struct type80_hdr { unsigned char reserved3[8]; } __packed; -unsigned int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fcode) +int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fcode) { if (!mex->inputdatalength) @@ -172,7 +172,7 @@ unsigned int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fcode) return 0; } -unsigned int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fcode) +int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fcode) { if (!crt->inputdatalength) diff --git a/drivers/s390/crypto/zcrypt_msgtype50.h b/drivers/s390/crypto/zcrypt_msgtype50.h index 66bec4f45c56..eb49f06bed29 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.h +++ b/drivers/s390/crypto/zcrypt_msgtype50.h @@ -20,8 +20,8 @@ #define MSGTYPE_ADJUSTMENT 0x08 /* type04 extension (not needed in type50) */ -unsigned int get_rsa_modex_fc(struct ica_rsa_modexpo *, int *); -unsigned int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *, int *); +int get_rsa_modex_fc(struct ica_rsa_modexpo *mex, int *fc); +int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fc); void zcrypt_msgtype50_init(void); void zcrypt_msgtype50_exit(void); diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index df283729191a..98755b57104f 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -472,6 +472,7 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg, *fcode = (msg->hdr.function_code[0] << 8) | msg->hdr.function_code[1]; *dom = (unsigned short *)&msg->cprbx.domain; + /* check subfunction, US and AU need special flag with NQAP */ if (memcmp(function_code, "US", 2) == 0 || memcmp(function_code, "AU", 2) == 0) ap_msg->flags |= AP_MSG_FLAG_SPECIAL; @@ -481,6 +482,23 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg, ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; #endif + /* check CPRB minor version, set info bits in ap_message flag field */ + switch (*(unsigned short *)(&msg->cprbx.func_id[0])) { + case 0x5432: /* "T2" */ + ap_msg->flags |= AP_MSG_FLAG_USAGE; + break; + case 0x5433: /* "T3" */ + case 0x5435: /* "T5" */ + case 0x5436: /* "T6" */ + case 0x5437: /* "T7" */ + ap_msg->flags |= AP_MSG_FLAG_ADMIN; + break; + default: + ZCRYPT_DBF_DBG("%s unknown CPRB minor version '%c%c'\n", + __func__, msg->cprbx.func_id[0], + msg->cprbx.func_id[1]); + } + /* copy data block */ if (xcRB->request_data_length && z_copy_from_user(userspace, req_data, xcRB->request_data_address, @@ -568,6 +586,12 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; #endif + /* set info bits in ap_message flag field */ + if (msg->cprbx.flags & 0x80) + ap_msg->flags |= AP_MSG_FLAG_ADMIN; + else + ap_msg->flags |= AP_MSG_FLAG_USAGE; + return 0; } @@ -1131,15 +1155,17 @@ out_free: } /* - * Fetch function code from cprb. - * Extracting the fc requires to copy the cprb from userspace. - * So this function allocates memory and needs an ap_msg prepared + * Prepare a CCA AP msg request. + * Prepare a CCA AP msg: fetch the required data from userspace, + * prepare the AP msg, fill some info into the ap_message struct, + * extract some data from the CPRB and give back to the caller. + * This function allocates memory and needs an ap_msg prepared * by the caller with ap_init_message(). Also the caller has to * make sure ap_release_message() is always called even on failure. */ -unsigned int get_cprb_fc(bool userspace, struct ica_xcRB *xcRB, - struct ap_message *ap_msg, - unsigned int *func_code, unsigned short **dom) +int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcRB, + struct ap_message *ap_msg, + unsigned int *func_code, unsigned short **dom) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_XCRB, @@ -1193,15 +1219,17 @@ out: } /* - * Fetch function code from ep11 cprb. - * Extracting the fc requires to copy the ep11 cprb from userspace. - * So this function allocates memory and needs an ap_msg prepared + * Prepare an EP11 AP msg request. + * Prepare an EP11 AP msg: fetch the required data from userspace, + * prepare the AP msg, fill some info into the ap_message struct, + * extract some data from the CPRB and give back to the caller. + * This function allocates memory and needs an ap_msg prepared * by the caller with ap_init_message(). Also the caller has to * make sure ap_release_message() is always called even on failure. */ -unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *xcrb, - struct ap_message *ap_msg, - unsigned int *func_code) +int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, + struct ap_message *ap_msg, + unsigned int *func_code) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_EP11, @@ -1301,8 +1329,8 @@ out: return rc; } -unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code, - unsigned int *domain) +int prep_rng_ap_msg(struct ap_message *ap_msg, int *func_code, + unsigned int *domain) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_XCRB, diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 155c73514bac..ec960d01cca0 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -94,11 +94,14 @@ struct type86_fmt2_ext { unsigned int offset4; /* 0x00000000 */ } __packed; -unsigned int get_cprb_fc(bool userspace, struct ica_xcRB *, struct ap_message *, - unsigned int *, unsigned short **); -unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *, struct ap_message *, - unsigned int *); -unsigned int get_rng_fc(struct ap_message *, int *, unsigned int *); +int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcrb, + struct ap_message *ap_msg, + unsigned int *fc, unsigned short **dom); +int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, + struct ap_message *ap_msg, + unsigned int *fc); +int prep_rng_ap_msg(struct ap_message *ap_msg, + int *fc, unsigned int *dom); #define LOW 10 #define MEDIUM 100 From 252a1ff777639ad13978a614f2cde1f0c43a7c2f Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 23 Nov 2021 16:02:47 +0100 Subject: [PATCH 39/69] s390/zcrypt: change reply buffer size offering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of offering the user space given receive buffer size to the crypto card firmware as limit for the reply message offer the internal per queue reply buffer size. As the queue's reply buffer is always adjusted to the max message size possible for this card this may offer more buffer space. However, now it is important to check the user space reply buffer on pushing back the reply. If the reply does not fit into the user space provided buffer the ioctl will fail with errno EMSGSIZE. Signed-off-by: Harald Freudenberger Reviewed-by: Jürgen Christ Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_msgtype6.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 98755b57104f..be3f46a014f4 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -1197,6 +1197,21 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq, { int rc; struct response_type *rtype = (struct response_type *)(ap_msg->private); + struct { + struct type6_hdr hdr; + struct CPRBX cprbx; + /* ... more data blocks ... */ + } __packed * msg = ap_msg->msg; + + /* + * Set the queue's reply buffer length minus 128 byte padding + * as reply limit for the card firmware. + */ + msg->hdr.FromCardLen1 = min_t(unsigned int, msg->hdr.FromCardLen1, + zq->reply.bufsize - 128); + if (msg->hdr.FromCardLen2) + msg->hdr.FromCardLen2 = + zq->reply.bufsize - msg->hdr.FromCardLen1 - 128; init_completion(&rtype->work); rc = ap_queue_message(zq->queue, ap_msg); @@ -1277,7 +1292,6 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * unsigned int dom_val; /* domain id */ } __packed * payload_hdr = NULL; - /* * The target domain field within the cprb body/payload block will be * replaced by the usage domain for non-management commands only. @@ -1309,6 +1323,13 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * AP_QID_QUEUE(zq->queue->qid); } + /* + * Set the queue's reply buffer length minus the two prepend headers + * as reply limit for the card firmware. + */ + msg->hdr.FromCardLen1 = zq->reply.bufsize - + sizeof(struct type86_hdr) - sizeof(struct type86_fmt2_ext); + init_completion(&rtype->work); rc = ap_queue_message(zq->queue, ap_msg); if (rc) From 1024063effc3ba86d1fec0f2ee0a9259a1065ed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Christ?= Date: Mon, 10 Jan 2022 13:33:30 +0100 Subject: [PATCH 40/69] s390/zcrypt: Provide target domain for EP11 cprbs to scheduling function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The scheduling function will get an extension which will process the target_id value from an EP11 cprb. This patch extracts the value during preparation of the ap message. Signed-off-by: Jürgen Christ Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_api.c | 4 ++-- drivers/s390/crypto/zcrypt_msgtype6.c | 10 +++++++--- drivers/s390/crypto/zcrypt_msgtype6.h | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index af3e83541886..80e2a306709a 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1021,7 +1021,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, struct ep11_target_dev *targets; unsigned short target_num; unsigned int wgt = 0, pref_wgt = 0; - unsigned int func_code; + unsigned int func_code, domain; struct ap_message ap_msg; int cpen, qpen, qid = 0, rc = -ENODEV; struct module *mod; @@ -1058,7 +1058,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, } } - rc = prep_ep11_ap_msg(userspace, xcrb, &ap_msg, &func_code); + rc = prep_ep11_ap_msg(userspace, xcrb, &ap_msg, &func_code, &domain); if (rc) goto out_free; diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index be3f46a014f4..57d885158cf0 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -510,7 +510,8 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg, static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap_msg, struct ep11_urb *xcRB, - unsigned int *fcode) + unsigned int *fcode, + unsigned int *domain) { unsigned int lfmt; static struct type6_hdr static_type6_ep11_hdr = { @@ -592,6 +593,8 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap else ap_msg->flags |= AP_MSG_FLAG_USAGE; + *domain = msg->cprbx.target_id; + return 0; } @@ -1244,7 +1247,7 @@ out: */ int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, struct ap_message *ap_msg, - unsigned int *func_code) + unsigned int *func_code, unsigned int *domain) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_EP11, @@ -1260,7 +1263,8 @@ int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL); if (!ap_msg->private) return -ENOMEM; - return xcrb_msg_to_type6_ep11cprb_msgx(userspace, ap_msg, xcrb, func_code); + return xcrb_msg_to_type6_ep11cprb_msgx(userspace, ap_msg, xcrb, + func_code, domain); } /* diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index ec960d01cca0..9da4f4175c44 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -99,7 +99,7 @@ int prep_cca_ap_msg(bool userspace, struct ica_xcRB *xcrb, unsigned int *fc, unsigned short **dom); int prep_ep11_ap_msg(bool userspace, struct ep11_urb *xcrb, struct ap_message *ap_msg, - unsigned int *fc); + unsigned int *fc, unsigned int *dom); int prep_rng_ap_msg(struct ap_message *ap_msg, int *fc, unsigned int *dom); From 9d792ef17f18734bca823910b89254dec37b50c5 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Tue, 14 Dec 2021 15:54:16 +0100 Subject: [PATCH 41/69] s390/airq: use DMA memory for summary indicators Protected virtualization guests have to use shared pages for airq notifier bit vectors and summary bytes or bits, thus these need to be allocated as DMA coherent memory. Commit b50623e5db80 ("s390/airq: use DMA memory for adapter interrupts") took care of the notifier bit vectors, but omitted to take care of the summary bytes/bits. In practice this omission is not a big deal, because the summary ain't necessarily allocated here, but can be supplied by the driver. Currently all the I/O we have for SE guests is virtio-ccw, and virtio-ccw uses a self-allocated array of summary indicators. Let us cover all our bases nevertheless! Signed-off-by: Halil Pasic Reviewed-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/cio/airq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c index e56535c99888..c0ed364bf446 100644 --- a/drivers/s390/cio/airq.c +++ b/drivers/s390/cio/airq.c @@ -44,7 +44,7 @@ int register_adapter_interrupt(struct airq_struct *airq) if (!airq->handler || airq->isc > MAX_ISC) return -EINVAL; if (!airq->lsi_ptr) { - airq->lsi_ptr = kzalloc(1, GFP_KERNEL); + airq->lsi_ptr = cio_dma_zalloc(1); if (!airq->lsi_ptr) return -ENOMEM; airq->flags |= AIRQ_PTR_ALLOCATED; @@ -79,7 +79,7 @@ void unregister_adapter_interrupt(struct airq_struct *airq) synchronize_rcu(); isc_unregister(airq->isc); if (airq->flags & AIRQ_PTR_ALLOCATED) { - kfree(airq->lsi_ptr); + cio_dma_free(airq->lsi_ptr, 1); airq->lsi_ptr = NULL; airq->flags &= ~AIRQ_PTR_ALLOCATED; } From a156f09c9063bc6c11502fc49a6d006489f25eb3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 10:45:43 +0100 Subject: [PATCH 42/69] s390/extable: sort amode31 extable early The early program check handler is active before the amode31 extable is sorted. Therefore in case a program check happens early within the amode31 code the extable entry might not be found. Fix this by sorting the amode31 extable early. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/early.c | 6 ++++++ arch/s390/kernel/traps.c | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index b8cfac4918d9..3dae0c04d33b 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -293,6 +293,11 @@ static void __init check_image_bootable(void) disabled_wait(); } +static void __init sort_amode31_extable(void) +{ + sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table); +} + void __init startup_init(void) { sclp_early_adjust_va(); @@ -301,6 +306,7 @@ void __init startup_init(void) time_early_init(); init_kernel_storage_key(); lockdep_off(); + sort_amode31_extable(); setup_lowcore_early(); setup_facility_list(); detect_machine_type(); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 2b780786fc68..c2b1ffe88acf 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -291,7 +291,6 @@ static void __init test_monitor_call(void) void __init trap_init(void) { - sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table); local_mcck_enable(); test_monitor_call(); } From 1952954569d1907eaf6df4b15bb23969e57b6599 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 10:53:34 +0100 Subject: [PATCH 43/69] s390/extable: search amode31 extable last It is very unlikely that an exception happens within the amode31 text section, therefore safe a couple of cycles for the common case, and search the amode31 extable last. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/fault.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ff16ce0d04ee..1336541103e7 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -230,13 +230,13 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) const struct exception_table_entry *s390_search_extables(unsigned long addr) { const struct exception_table_entry *fixup; + size_t num; - fixup = search_extable(__start_amode31_ex_table, - __stop_amode31_ex_table - __start_amode31_ex_table, - addr); - if (!fixup) - fixup = search_exception_tables(addr); - return fixup; + fixup = search_exception_tables(addr); + if (fixup) + return fixup; + num = __stop_amode31_ex_table - __start_amode31_ex_table; + return search_extable(__start_amode31_ex_table, num, addr); } static noinline void do_no_context(struct pt_regs *regs) From d09a307fde1c943d23ccb9fecc9a0e1a569732ad Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 11:22:12 +0100 Subject: [PATCH 44/69] s390/extable: move EX_TABLE define to asm-extable.h Follow arm64 and riscv and move the EX_TABLE define to asm-extable.h which is a lot less generic than the current linkage.h. Also make sure that all files which contain EX_TABLE usages actually include the new header file. This should make sure that the files always compile and there won't be any random compile breakage due to other header file dependencies. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/hypfs/hypfs_vm.c | 1 + arch/s390/include/asm/ap.h | 1 + arch/s390/include/asm/asm-extable.h | 21 +++++++++++++++++++++ arch/s390/include/asm/cpu_mf.h | 1 + arch/s390/include/asm/diag.h | 1 + arch/s390/include/asm/fpu/api.h | 1 + arch/s390/include/asm/futex.h | 1 + arch/s390/include/asm/linkage.h | 18 ------------------ arch/s390/include/asm/mmu.h | 1 + arch/s390/include/asm/uaccess.h | 1 + arch/s390/kernel/diag.c | 1 + arch/s390/kernel/early.c | 1 + arch/s390/kernel/entry.S | 1 + arch/s390/kernel/entry.h | 1 + arch/s390/kernel/ipl.c | 1 + arch/s390/kernel/sysinfo.c | 1 + arch/s390/kernel/text_amode31.S | 1 + arch/s390/kernel/traps.c | 1 + arch/s390/lib/uaccess.c | 1 + arch/s390/mm/fault.c | 1 + arch/s390/mm/maccess.c | 1 + arch/s390/mm/page-states.c | 1 + arch/s390/net/bpf_jit_comp.c | 1 + arch/s390/pci/pci_clp.c | 1 + arch/s390/pci/pci_insn.c | 1 + arch/s390/pci/pci_mmio.c | 1 + drivers/s390/block/dasd_diag.c | 2 +- drivers/s390/char/diag_ftp.c | 1 + drivers/s390/char/sclp.h | 1 + drivers/s390/cio/ioasm.c | 1 + 30 files changed, 49 insertions(+), 19 deletions(-) create mode 100644 arch/s390/include/asm/asm-extable.h diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index e8f15dbb89d0..3765c2d81df5 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index c0c8a1f6c35d..ae75da592ccb 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -13,6 +13,7 @@ #define _ASM_S390_AP_H_ #include +#include /** * The ap_qid_t identifier of an ap queue. diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h new file mode 100644 index 000000000000..620390f17f0c --- /dev/null +++ b/arch/s390/include/asm/asm-extable.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_EXTABLE_H +#define __ASM_EXTABLE_H + +#include +#include + +#define __EX_TABLE(_section, _fault, _target) \ + stringify_in_c(.section _section,"a";) \ + stringify_in_c(.align 8;) \ + stringify_in_c(.long (_fault) - .;) \ + stringify_in_c(.long (_target) - .;) \ + stringify_in_c(.quad 0;) \ + stringify_in_c(.previous) + +#define EX_TABLE(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target) +#define EX_TABLE_AMODE31(_fault, _target) \ + __EX_TABLE(.amode31.ex_table, _fault, _target) + +#endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index e3f12db46cfc..78bb336600bf 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -10,6 +10,7 @@ #define _ASM_S390_CPU_MF_H #include +#include #include asm(".include \"asm/cpu_mf-insn.h\"\n"); diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index bdcd64f0c1d7..56e99c286d12 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -11,6 +11,7 @@ #include #include +#include enum diag_stat_enum { DIAG_STAT_X008, diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index a959b815a58b..b714ed0ef688 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -45,6 +45,7 @@ #define _ASM_S390_FPU_API_H #include +#include void save_fpu_regs(void); void load_fpu_regs(void); diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index c22debfcebf1..e08c882dccaa 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -4,6 +4,7 @@ #include #include +#include #include #include diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index 1ffea75b8ebc..c76777b15fec 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -2,27 +2,9 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H -#include #include #define __ALIGN .align 16, 0x07 #define __ALIGN_STR __stringify(__ALIGN) -/* - * Helper macro for exception table entries - */ - -#define __EX_TABLE(_section, _fault, _target) \ - stringify_in_c(.section _section,"a";) \ - stringify_in_c(.align 8;) \ - stringify_in_c(.long (_fault) - .;) \ - stringify_in_c(.long (_target) - .;) \ - stringify_in_c(.quad 0;) \ - stringify_in_c(.previous) - -#define EX_TABLE(_fault, _target) \ - __EX_TABLE(__ex_table, _fault, _target) -#define EX_TABLE_AMODE31(_fault, _target) \ - __EX_TABLE(.amode31.ex_table, _fault, _target) - #endif diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index e12ff0f29d1a..82aae78e1315 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -4,6 +4,7 @@ #include #include +#include typedef struct { spinlock_t lock; diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f14f4ade15a9..2c029ee89b7c 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -13,6 +13,7 @@ /* * User space memory access functions */ +#include #include #include #include diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 76a656b2146f..a778714e4d8b 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 3dae0c04d33b..9d151808d03e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 1f6df6d4a914..3781de26f207 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 6083090be1f4..56e5e3712fbb 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index a93142785bbc..28ae7df26c4a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index ef3f2659876c..b5e364358ce4 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S index 868e4a604110..2c8b14cc5556 100644 --- a/arch/s390/kernel/text_amode31.S +++ b/arch/s390/kernel/text_amode31.S @@ -6,6 +6,7 @@ */ #include +#include #include #include diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index c2b1ffe88acf..309cb0503feb 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include "entry.h" diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 8a5d21461889..f846ebe038fa 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 1336541103e7..adaf5f5c9a45 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 4cc5020f4e18..421efa46946b 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 18a6381097a9..d5ea09d78938 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index df5d4da06643..f884d1b9ca79 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index be077b39da33..63f3e057c168 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 4dd58b196cea..1710d006ee93 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index c5b35ea129cf..080c88620723 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index db5987281010..e9edf3b6ed7c 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -19,7 +19,7 @@ #include #include #include - +#include #include #include #include diff --git a/drivers/s390/char/diag_ftp.c b/drivers/s390/char/diag_ftp.c index 6bf1058de873..36bbd6b6e210 100644 --- a/drivers/s390/char/diag_ftp.c +++ b/drivers/s390/char/diag_ftp.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 8a30e77db469..86dd2cde0f78 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -11,6 +11,7 @@ #include #include +#include #include #include diff --git a/drivers/s390/cio/ioasm.c b/drivers/s390/cio/ioasm.c index 180913007824..acf1edd36549 100644 --- a/drivers/s390/cio/ioasm.c +++ b/drivers/s390/cio/ioasm.c @@ -5,6 +5,7 @@ #include +#include #include #include #include From 0741ec112ca67ea95dc8158a7bc7547ae36cbecc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 11:37:52 +0100 Subject: [PATCH 45/69] s390/extable: move extable related functions to mm/extable.c Just like arm64, riscv, and x86 move extable related functions to mm/extable.c. This is currently only one function, but this will change with subsequent changes. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/Makefile | 2 +- arch/s390/mm/extable.c | 16 ++++++++++++++++ arch/s390/mm/fault.c | 12 ------------ 3 files changed, 17 insertions(+), 13 deletions(-) create mode 100644 arch/s390/mm/extable.c diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index cd67e94c16aa..57e4f3a24829 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -4,7 +4,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o -obj-y += page-states.o pageattr.o pgtable.o pgalloc.o +obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c new file mode 100644 index 000000000000..a4eb3d8aae7b --- /dev/null +++ b/arch/s390/mm/extable.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +const struct exception_table_entry *s390_search_extables(unsigned long addr) +{ + const struct exception_table_entry *fixup; + size_t num; + + fixup = search_exception_tables(addr); + if (fixup) + return fixup; + num = __stop_amode31_ex_table - __start_amode31_ex_table; + return search_extable(__start_amode31_ex_table, num, addr); +} diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index adaf5f5c9a45..caa4ab0ff80a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -228,18 +228,6 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); } -const struct exception_table_entry *s390_search_extables(unsigned long addr) -{ - const struct exception_table_entry *fixup; - size_t num; - - fixup = search_exception_tables(addr); - if (fixup) - return fixup; - num = __stop_amode31_ex_table - __start_amode31_ex_table; - return search_extable(__start_amode31_ex_table, num, addr); -} - static noinline void do_no_context(struct pt_regs *regs) { const struct exception_table_entry *fixup; From cfa45c5e0d36b87f99e76f1060526eac032dd624 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 13:31:33 +0100 Subject: [PATCH 46/69] s390/base: pass pt_regs to early program check handler Pass pt_regs to early program check handler like it is done for every other interrupt and exception handler. Also the passed pt_regs can be changed by the called function and the changes register contents and psw contents will be taken into account when returning. In addition the return psw will not be copied to the program check old psw in lowcore, but to the usual return psw location, like it is also done by the regular program check handler. This allows also to get rid of the code that disabled lowcore protection when changing the return address. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 2 +- arch/s390/kernel/base.S | 22 ++++++++++++++++------ arch/s390/kernel/early.c | 14 +++----------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 8fd9772c7370..022cf0925e56 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -313,7 +313,7 @@ static __always_inline void __noreturn disabled_wait(void) * Basic Program Check Handler. */ extern void s390_base_pgm_handler(void); -extern void (*s390_base_pgm_handler_fn)(void); +extern void (*s390_base_pgm_handler_fn)(struct pt_regs *regs); #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index f7fe4033df36..172c23c8ca00 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -14,18 +14,28 @@ GEN_BR_THUNK %r9 GEN_BR_THUNK %r14 +__PT_R0 = __PT_GPRS +__PT_R8 = __PT_GPRS + 64 + ENTRY(s390_base_pgm_handler) - stmg %r0,%r15,__LC_SAVE_AREA_SYNC - basr %r13,0 -0: aghi %r15,-STACK_FRAME_OVERHEAD + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE) + la %r11,STACK_FRAME_OVERHEAD(%r15) + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + stmg %r0,%r7,__PT_R0(%r11) + mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + lgr %r2,%r11 larl %r1,s390_base_pgm_handler_fn lg %r9,0(%r1) ltgr %r9,%r9 jz 1f BASR_EX %r14,%r9 - lmg %r0,%r15,__LC_SAVE_AREA_SYNC - lpswe __LC_PGM_OLD_PSW -1: lpswe disabled_wait_psw-0b(%r13) + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + lpswe __LC_RETURN_PSW +1: larl %r13,disabled_wait_psw + lpswe 0(%r13) ENDPROC(s390_base_pgm_handler) .align 8 diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 9d151808d03e..5715d1aab173 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -149,22 +149,14 @@ static __init void setup_topology(void) topology_max_mnest = max_mnest; } -static void early_pgm_check_handler(void) +static void early_pgm_check_handler(struct pt_regs *regs) { const struct exception_table_entry *fixup; - unsigned long cr0, cr0_new; - unsigned long addr; - addr = S390_lowcore.program_old_psw.addr; - fixup = s390_search_extables(addr); + fixup = s390_search_extables(regs->psw.addr); if (!fixup) disabled_wait(); - /* Disable low address protection before storing into lowcore. */ - __ctl_store(cr0, 0, 0); - cr0_new = cr0 & ~(1UL << 28); - __ctl_load(cr0_new, 0, 0); - S390_lowcore.program_old_psw.addr = extable_fixup(fixup); - __ctl_load(cr0, 0, 0); + regs->psw.addr = extable_fixup(fixup); } static noinline __init void setup_lowcore_early(void) From 46fee16f571250d6cef74af73ffd47512da981a2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 14:29:25 +0100 Subject: [PATCH 47/69] s390/extable: add and use fixup_exception helper function Add and use fixup_exception helper function in order to remove the duplicated exception handler fixup code at several places. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/extable.h | 13 ++----------- arch/s390/kernel/early.c | 6 +----- arch/s390/kernel/kprobes.c | 5 +---- arch/s390/kernel/traps.c | 10 ++-------- arch/s390/mm/extable.c | 15 +++++++++++++++ arch/s390/mm/fault.c | 7 +------ 6 files changed, 22 insertions(+), 34 deletions(-) diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index 8511f0e59290..d39d7159832a 100644 --- a/arch/s390/include/asm/extable.h +++ b/arch/s390/include/asm/extable.h @@ -49,17 +49,6 @@ ex_fixup_handler(const struct exception_table_entry *x) return (ex_handler_t)((unsigned long)&x->handler + x->handler); } -static inline bool ex_handle(const struct exception_table_entry *x, - struct pt_regs *regs) -{ - ex_handler_t handler = ex_fixup_handler(x); - - if (unlikely(handler)) - return handler(x, regs); - regs->psw.addr = extable_fixup(x); - return true; -} - #define ARCH_HAS_RELATIVE_EXTABLE static inline void swap_ex_entry_fixup(struct exception_table_entry *a, @@ -78,4 +67,6 @@ static inline void swap_ex_entry_fixup(struct exception_table_entry *a, } #define swap_ex_entry_fixup swap_ex_entry_fixup +bool fixup_exception(struct pt_regs *regs); + #endif diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 5715d1aab173..08cc86a0db90 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -151,12 +151,8 @@ static __init void setup_topology(void) static void early_pgm_check_handler(struct pt_regs *regs) { - const struct exception_table_entry *fixup; - - fixup = s390_search_extables(regs->psw.addr); - if (!fixup) + if (!fixup_exception(regs)) disabled_wait(); - regs->psw.addr = extable_fixup(fixup); } static noinline __init void setup_lowcore_early(void) diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index e27a7d3b0364..7e2910e4172b 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -465,7 +465,6 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); struct kprobe *p = kprobe_running(); - const struct exception_table_entry *entry; switch(kcb->kprobe_status) { case KPROBE_HIT_SS: @@ -487,10 +486,8 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) * In case the user-specified fault handler returned * zero, try to fix up. */ - entry = s390_search_extables(regs->psw.addr); - if (entry && ex_handle(entry, regs)) + if (fixup_exception(regs)) return 1; - /* * fixup_exception() could not handle it, * Let do_page_fault() fix it. diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 309cb0503feb..7f0fadd10d68 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -54,9 +54,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) force_sig_fault(si_signo, si_code, get_trap_ip(regs)); report_user_fault(regs, si_signo, 0); } else { - const struct exception_table_entry *fixup; - fixup = s390_search_extables(regs->psw.addr); - if (!fixup || !ex_handle(fixup, regs)) + if (!fixup_exception(regs)) die(regs, str); } } @@ -245,16 +243,12 @@ static void space_switch_exception(struct pt_regs *regs) static void monitor_event_exception(struct pt_regs *regs) { - const struct exception_table_entry *fixup; - if (user_mode(regs)) return; switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { case BUG_TRAP_TYPE_NONE: - fixup = s390_search_extables(regs->psw.addr); - if (fixup) - ex_handle(fixup, regs); + fixup_exception(regs); break; case BUG_TRAP_TYPE_WARN: break; diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index a4eb3d8aae7b..d6ca75570dcf 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -14,3 +14,18 @@ const struct exception_table_entry *s390_search_extables(unsigned long addr) num = __stop_amode31_ex_table - __start_amode31_ex_table; return search_extable(__start_amode31_ex_table, num, addr); } + +bool fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *ex; + ex_handler_t handler; + + ex = s390_search_extables(instruction_pointer(regs)); + if (!ex) + return false; + handler = ex_fixup_handler(ex); + if (unlikely(handler)) + return handler(ex, regs); + regs->psw.addr = extable_fixup(ex); + return true; +} diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index caa4ab0ff80a..e173b6187ad5 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -230,13 +230,8 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) static noinline void do_no_context(struct pt_regs *regs) { - const struct exception_table_entry *fixup; - - /* Are we prepared to handle this kernel fault? */ - fixup = s390_search_extables(regs->psw.addr); - if (fixup && ex_handle(fixup, regs)) + if (fixup_exception(regs)) return; - /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. From 3d66718cd62d45f3210f047248eab9e76d227e47 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 14:52:42 +0100 Subject: [PATCH 48/69] s390/extable: convert to relative table with data Follow arm64, riscv, and x86 and change extable layout to common "relative table with data". This allows to get rid of s390 specific code in sorttable.c. The main difference to before is that extable entries do not contain a relative function pointer anymore. Instead data and type fields are added. The type field is used to indicate which exception handler needs to be called, while the data field is currently unused. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/asm-extable.h | 15 ++++++---- arch/s390/include/asm/extable.h | 36 ++++++++++++------------ arch/s390/mm/extable.c | 21 ++++++++++---- arch/s390/net/bpf_jit_comp.c | 5 ++-- scripts/sorttable.c | 43 +---------------------------- 5 files changed, 46 insertions(+), 74 deletions(-) diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 620390f17f0c..61484a5f1209 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -5,17 +5,22 @@ #include #include -#define __EX_TABLE(_section, _fault, _target) \ +#define EX_TYPE_NONE 0 +#define EX_TYPE_FIXUP 1 +#define EX_TYPE_BPF 2 + +#define __EX_TABLE(_section, _fault, _target, _type) \ stringify_in_c(.section _section,"a";) \ - stringify_in_c(.align 8;) \ + stringify_in_c(.align 4;) \ stringify_in_c(.long (_fault) - .;) \ stringify_in_c(.long (_target) - .;) \ - stringify_in_c(.quad 0;) \ + stringify_in_c(.short (_type);) \ + stringify_in_c(.short 0;) \ stringify_in_c(.previous) #define EX_TABLE(_fault, _target) \ - __EX_TABLE(__ex_table, _fault, _target) + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP) #define EX_TABLE_AMODE31(_fault, _target) \ - __EX_TABLE(.amode31.ex_table, _fault, _target) + __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP) #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index d39d7159832a..af6ba52743e9 100644 --- a/arch/s390/include/asm/extable.h +++ b/arch/s390/include/asm/extable.h @@ -25,7 +25,7 @@ struct exception_table_entry { int insn, fixup; - long handler; + short type, data; }; extern struct exception_table_entry *__start_amode31_ex_table; @@ -38,17 +38,6 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x) return (unsigned long)&x->fixup + x->fixup; } -typedef bool (*ex_handler_t)(const struct exception_table_entry *, - struct pt_regs *); - -static inline ex_handler_t -ex_fixup_handler(const struct exception_table_entry *x) -{ - if (likely(!x->handler)) - return NULL; - return (ex_handler_t)((unsigned long)&x->handler + x->handler); -} - #define ARCH_HAS_RELATIVE_EXTABLE static inline void swap_ex_entry_fixup(struct exception_table_entry *a, @@ -58,15 +47,26 @@ static inline void swap_ex_entry_fixup(struct exception_table_entry *a, { a->fixup = b->fixup + delta; b->fixup = tmp.fixup - delta; - a->handler = b->handler; - if (a->handler) - a->handler += delta; - b->handler = tmp.handler; - if (b->handler) - b->handler -= delta; + a->type = b->type; + b->type = tmp.type; + a->data = b->data; + b->data = tmp.data; } #define swap_ex_entry_fixup swap_ex_entry_fixup +#ifdef CONFIG_BPF_JIT + +bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs); + +#else /* !CONFIG_BPF_JIT */ + +static inline bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + return false; +} + +#endif /* CONFIG_BPF_JIT */ + bool fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index d6ca75570dcf..ac6b736ac883 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include #include const struct exception_table_entry *s390_search_extables(unsigned long addr) @@ -15,17 +17,24 @@ const struct exception_table_entry *s390_search_extables(unsigned long addr) return search_extable(__start_amode31_ex_table, num, addr); } +static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + regs->psw.addr = extable_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; - ex_handler_t handler; ex = s390_search_extables(instruction_pointer(regs)); if (!ex) return false; - handler = ex_fixup_handler(ex); - if (unlikely(handler)) - return handler(ex, regs); - regs->psw.addr = extable_fixup(ex); - return true; + switch (ex->type) { + case EX_TYPE_FIXUP: + return ex_handler_fixup(ex, regs); + case EX_TYPE_BPF: + return ex_handler_bpf(ex, regs); + } + panic("invalid exception table entry"); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index f884d1b9ca79..a1a3a10c514c 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -622,8 +622,7 @@ static int get_probe_mem_regno(const u8 *insn) return insn[1] >> 4; } -static bool ex_handler_bpf(const struct exception_table_entry *x, - struct pt_regs *regs) +bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) { int regno; u8 *insn; @@ -678,7 +677,7 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp, /* JIT bug - landing pad and extable must be close. */ return -1; ex->fixup = delta; - ex->handler = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; + ex->type = EX_TYPE_BPF; jit->excnt++; } return 0; diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 3a8ea5ed553d..d00504c5f530 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -261,45 +261,6 @@ static void sort_relative_table_with_data(char *extab_image, int image_size) } } -static void s390_sort_relative_table(char *extab_image, int image_size) -{ - int i; - - for (i = 0; i < image_size; i += 16) { - char *loc = extab_image + i; - uint64_t handler; - - w(r((uint32_t *)loc) + i, (uint32_t *)loc); - w(r((uint32_t *)(loc + 4)) + (i + 4), (uint32_t *)(loc + 4)); - /* - * 0 is a special self-relative handler value, which means that - * handler should be ignored. It is safe, because it means that - * handler field points to itself, which should never happen. - * When creating extable-relative values, keep it as 0, since - * this should never occur either: it would mean that handler - * field points to the first extable entry. - */ - handler = r8((uint64_t *)(loc + 8)); - if (handler) - handler += i + 8; - w8(handler, (uint64_t *)(loc + 8)); - } - - qsort(extab_image, image_size / 16, 16, compare_relative_table); - - for (i = 0; i < image_size; i += 16) { - char *loc = extab_image + i; - uint64_t handler; - - w(r((uint32_t *)loc) - i, (uint32_t *)loc); - w(r((uint32_t *)(loc + 4)) - (i + 4), (uint32_t *)(loc + 4)); - handler = r8((uint64_t *)(loc + 8)); - if (handler) - handler -= i + 8; - w8(handler, (uint64_t *)(loc + 8)); - } -} - static int do_file(char const *const fname, void *addr) { int rc = -1; @@ -340,12 +301,10 @@ static int do_file(char const *const fname, void *addr) case EM_386: case EM_AARCH64: case EM_RISCV: + case EM_S390: case EM_X86_64: custom_sort = sort_relative_table_with_data; break; - case EM_S390: - custom_sort = s390_sort_relative_table; - break; case EM_PARISC: case EM_PPC: case EM_PPC64: From 484a8ed8b7d145ff38889e4598a4804e9d7e8ca6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 15:02:46 +0100 Subject: [PATCH 49/69] s390/extable: add dedicated uaccess handler This is more or less a combination of commit 2e77a62cb3a6 ("arm64: extable: add a dedicated uaccess handler") and commit 4b5305decc84 ("x86/extable: Extend extable functionality"). To describe the problem that needs to solved let's cite the full arm64 commit message: ------ For inline assembly, we place exception fixups out-of-line in the `.fixup` section such that these are out of the way of the fast path. This has a few drawbacks: * Since the fixup code is anonymous, backtraces will symbolize fixups as offsets from the nearest prior symbol, currently `__entry_tramp_text_end`. This is confusing, and painful to debug without access to the relevant vmlinux. * Since the exception handler adjusts the PC to execute the fixup, and the fixup uses a direct branch back into the function it fixes, backtraces of fixups miss the original function. This is confusing, and violates requirements for RELIABLE_STACKTRACE (and therefore LIVEPATCH). * Inline assembly and associated fixups are generated from templates, and we have many copies of logically identical fixups which only differ in which specific registers are written to and which address is branched to at the end of the fixup. This is potentially wasteful of I-cache resources, and makes it hard to add additional logic to fixups without significant bloat. This patch address all three concerns for inline uaccess fixups by adding a dedicated exception handler which updates registers in exception context and subsequent returns back into the function which faulted, removing the need for fixups specialized to each faulting instruction. Other than backtracing, there should be no functional change as a result of this patch. ------ Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/asm-extable.h | 27 +++++++++++++++++++++++++++ arch/s390/include/asm/uaccess.h | 24 ++++++------------------ arch/s390/mm/extable.c | 10 ++++++++++ 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 61484a5f1209..fb62df5e16a2 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -8,6 +8,7 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 #define EX_TYPE_BPF 2 +#define EX_TYPE_UACCESS 3 #define __EX_TABLE(_section, _fault, _target, _type) \ stringify_in_c(.section _section,"a";) \ @@ -18,9 +19,35 @@ stringify_in_c(.short 0;) \ stringify_in_c(.previous) +#define __EX_TABLE_UA(_section, _fault, _target, _type, _reg) \ + stringify_in_c(.section _section,"a";) \ + stringify_in_c(.align 4;) \ + stringify_in_c(.long (_fault) - .;) \ + stringify_in_c(.long (_target) - .;) \ + stringify_in_c(.short (_type);) \ + stringify_in_c(.macro extable_reg reg;) \ + stringify_in_c(.set found, 0;) \ + stringify_in_c(.set regnr, 0;) \ + stringify_in_c(.irp rs,r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15;) \ + stringify_in_c(.ifc "\reg", "%%\rs";) \ + stringify_in_c(.set found, 1;) \ + stringify_in_c(.short regnr;) \ + stringify_in_c(.endif;) \ + stringify_in_c(.set regnr, regnr+1;) \ + stringify_in_c(.endr;) \ + stringify_in_c(.ifne (found != 1);) \ + stringify_in_c(.error "extable_reg: bad register argument";) \ + stringify_in_c(.endif;) \ + stringify_in_c(.endm;) \ + stringify_in_c(extable_reg _reg;) \ + stringify_in_c(.purgem extable_reg;) \ + stringify_in_c(.previous) + #define EX_TABLE(_fault, _target) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP) #define EX_TABLE_AMODE31(_fault, _target) \ __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP) +#define EX_TABLE_UA(_fault, _target, _reg) \ + __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UACCESS, _reg) #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 2c029ee89b7c..44b18800721a 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -81,14 +81,10 @@ union oac { "0: mvcos %[_to],%[_from],%[_size]\n" \ "1: xr %[rc],%[rc]\n" \ "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: lhi %[rc],%[retval]\n" \ - " jg 2b\n" \ - ".popsection\n" \ - EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + EX_TABLE_UA(0b,2b,%[rc]) EX_TABLE_UA(1b,2b,%[rc]) \ : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \ : [_size] "d" (size), [_from] "Q" (*(from)), \ - [retval] "K" (-EFAULT), [spec] "d" (oac_spec.val) \ + [spec] "d" (oac_spec.val) \ : "cc", "0"); \ __rc; \ }) @@ -295,13 +291,9 @@ int __noreturn __put_kernel_bad(void); "0: " insn " %2,%1\n" \ "1: xr %0,%0\n" \ "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: lhi %0,%3\n" \ - " jg 2b\n" \ - ".popsection\n" \ - EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + EX_TABLE_UA(0b,2b,%0) EX_TABLE_UA(1b,2b,%0) \ : "=d" (__rc), "+Q" (*(to)) \ - : "d" (val), "K" (-EFAULT) \ + : "d" (val) \ : "cc"); \ __rc; \ }) @@ -342,13 +334,9 @@ int __noreturn __get_kernel_bad(void); "0: " insn " %1,%2\n" \ "1: xr %0,%0\n" \ "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: lhi %0,%3\n" \ - " jg 2b\n" \ - ".popsection\n" \ - EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + EX_TABLE_UA(0b,2b,%0) EX_TABLE_UA(1b,2b,%0) \ : "=d" (__rc), "+d" (val) \ - : "Q" (*(from)), "K" (-EFAULT) \ + : "Q" (*(from)) \ : "cc"); \ __rc; \ }) diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index ac6b736ac883..8ac8ad2474a0 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -23,6 +24,13 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_r return true; } +static bool ex_handler_uaccess(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + regs->gprs[ex->data] = -EFAULT; + regs->psw.addr = extable_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -35,6 +43,8 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_fixup(ex, regs); case EX_TYPE_BPF: return ex_handler_bpf(ex, regs); + case EX_TYPE_UACCESS: + return ex_handler_uaccess(ex, regs); } panic("invalid exception table entry"); } From 7fc8c362e782042bf36ceeb9343f8217d3d7dbb9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 27 Feb 2022 21:32:54 +0100 Subject: [PATCH 50/69] s390/bpf: encode register within extable entry Instead of decoding the instruction that faulted to get the register which needs to be zeroed, simply encode its number into the extable entries during code generation. This allows to get rid of a bit of code, and is also what other architectures are doing. Acked-by: Alexander Gordeev Reviewed-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/net/bpf_jit_comp.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index a1a3a10c514c..e1e57a30ac66 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -624,16 +624,8 @@ static int get_probe_mem_regno(const u8 *insn) bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) { - int regno; - u8 *insn; - regs->psw.addr = extable_fixup(x); - insn = (u8 *)__rewind_psw(regs->psw, regs->int_code >> 16); - regno = get_probe_mem_regno(insn); - if (WARN_ON_ONCE(regno < 0)) - /* JIT bug - unexpected instruction. */ - return false; - regs->gprs[regno] = 0; + regs->gprs[x->data] = 0; return true; } @@ -641,16 +633,17 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp, int probe_prg, int nop_prg) { struct exception_table_entry *ex; + int reg, prg; s64 delta; u8 *insn; - int prg; int i; if (!fp->aux->extable) /* Do nothing during early JIT passes. */ return 0; insn = jit->prg_buf + probe_prg; - if (WARN_ON_ONCE(get_probe_mem_regno(insn) < 0)) + reg = get_probe_mem_regno(insn); + if (WARN_ON_ONCE(reg < 0)) /* JIT bug - unexpected probe instruction. */ return -1; if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg)) @@ -678,6 +671,7 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp, return -1; ex->fixup = delta; ex->type = EX_TYPE_BPF; + ex->data = reg; jit->excnt++; } return 0; From df5a95f481c4ef62199fb7e25263a035b4a337d2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Feb 2022 18:36:46 +0100 Subject: [PATCH 51/69] s390: remove .fixup section The only user is gone. Remove the section. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vmlinux.lds.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 42c43521878f..2e526f11b91e 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -49,7 +49,6 @@ SECTIONS SOFTIRQENTRY_TEXT FTRACE_HOTPATCH_TRAMPOLINES_TEXT *(.text.*_indirect_*) - *(.fixup) *(.gnu.warning) . = ALIGN(PAGE_SIZE); _etext = .; /* End of text section */ From 6b1ca46ab3aacc905eaaae0f52cb3311f76d7bd6 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 3 Mar 2022 16:38:34 +0100 Subject: [PATCH 52/69] s390/test_unwind: avoid build warning with W=1 Fix the following build warning with W=1 arch/s390/lib/test_unwind.c:172:21: warning: variable 'fops' set but not used [-Wunused-but-set-variable] struct ftrace_ops *fops; Reported-by: kernel test robot Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index b209014ce426..07ef89eeb85a 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -210,19 +210,18 @@ static noinline int test_unwind_ftraced_func(struct unwindme *u) static int test_unwind_ftrace(struct unwindme *u) { - struct ftrace_ops *fops; int ret; +#ifdef CONFIG_DYNAMIC_FTRACE + struct ftrace_ops *fops; -#ifndef CONFIG_DYNAMIC_FTRACE - kunit_skip(current_test, "requires CONFIG_DYNAMIC_FTRACE"); - fops = NULL; /* used */ -#else fops = kunit_kzalloc(current_test, sizeof(*fops), GFP_KERNEL); fops->func = test_unwind_ftrace_handler; fops->flags = FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_RECURSION | FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_PERMANENT; +#else + kunit_skip(current_test, "requires CONFIG_DYNAMIC_FTRACE"); #endif ret = ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 0, 0); From 50b7c4688da9c3c9045c4db58e8a4ace58d28603 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 4 Mar 2022 15:14:06 +0100 Subject: [PATCH 53/69] s390/asm-offsets: remove unused defines Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/asm-offsets.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index a496b08ea5d1..c253d3faf443 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -50,9 +50,7 @@ int main(void) BLANK(); /* idle data offsets */ OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter); - OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter); - OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit); OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter); BLANK(); /* hardware defined lowcore locations 0x000 - 0x1ff */ @@ -123,9 +121,6 @@ int main(void) OFFSET(__LC_USER_ASCE, lowcore, user_asce); OFFSET(__LC_LPP, lowcore, lpp); OFFSET(__LC_CURRENT_PID, lowcore, current_pid); - OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset); - OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); - OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline); OFFSET(__LC_LAST_BREAK, lowcore, last_break); From 0ecf337fa2e4a6d1f35d7fbb9efb7ca0069a1683 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 4 Mar 2022 15:15:33 +0100 Subject: [PATCH 54/69] s390/signal: fix typo in comments Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/compat_signal.c | 2 +- arch/s390/kernel/signal.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index cca142fbb516..eee1ad3e1b29 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -89,7 +89,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) _sigregs32 user_sregs; int i; - /* Alwys make any pending restarted system call return -EINTR */ + /* Always make any pending restarted system call return -EINTR */ current->restart_block.fn = do_no_restart_syscall; if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs))) diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 307f5d99514d..5ff8d915ec7a 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -141,7 +141,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { _sigregs user_sregs; - /* Alwys make any pending restarted system call return -EINTR */ + /* Always make any pending restarted system call return -EINTR */ current->restart_block.fn = do_no_restart_syscall; if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs))) From 52b739e2780c7a15e5be06f1691d92ba5f962f79 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 6 Mar 2022 11:15:27 +0100 Subject: [PATCH 55/69] s390/traps: get rid of magic cast for program interruption code Add a proper union in lowcore to reflect architecture and get rid of a "magic" cast in order to read the full program interruption code. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/lowcore.h | 9 +++++++-- arch/s390/kernel/traps.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 1262f5003acf..28a2c6ba795e 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -34,8 +34,13 @@ struct lowcore { __u32 ext_int_code_addr; }; __u32 svc_int_code; /* 0x0088 */ - __u16 pgm_ilc; /* 0x008c */ - __u16 pgm_code; /* 0x008e */ + union { + struct { + __u16 pgm_ilc; /* 0x008c */ + __u16 pgm_code; /* 0x008e */ + }; + __u32 pgm_int_code; + }; __u32 data_exc_code; /* 0x0090 */ __u16 mon_class_num; /* 0x0094 */ __u8 per_code; /* 0x0096 */ diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 7f0fadd10d68..a3c94dfcbe16 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -297,7 +297,7 @@ void noinstr __do_pgm_check(struct pt_regs *regs) unsigned int trapnr; irqentry_state_t state; - regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc; + regs->int_code = S390_lowcore.pgm_int_code; regs->int_parm_long = S390_lowcore.trans_exc_code; state = irqentry_enter(regs); From 998e78004fe4dd9219b039efe763e19d10d9a6f9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 6 Mar 2022 10:59:05 +0100 Subject: [PATCH 56/69] s390/traps: get rid of magic cast for per code Add a proper union in lowcore to reflect architecture and get rid of a "magic" cast in order to read the full per code. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/lowcore.h | 9 +++++++-- arch/s390/kernel/traps.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 28a2c6ba795e..9829d6b44a20 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -43,8 +43,13 @@ struct lowcore { }; __u32 data_exc_code; /* 0x0090 */ __u16 mon_class_num; /* 0x0094 */ - __u8 per_code; /* 0x0096 */ - __u8 per_atmid; /* 0x0097 */ + union { + struct { + __u8 per_code; /* 0x0096 */ + __u8 per_atmid; /* 0x0097 */ + }; + __u16 per_code_combined; + }; __u64 per_address; /* 0x0098 */ __u8 exc_access_id; /* 0x00a0 */ __u8 per_access_id; /* 0x00a1 */ diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index a3c94dfcbe16..674c65019434 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -322,7 +322,7 @@ void noinstr __do_pgm_check(struct pt_regs *regs) set_thread_flag(TIF_PER_TRAP); ev->address = S390_lowcore.per_address; - ev->cause = *(u16 *)&S390_lowcore.per_code; + ev->cause = S390_lowcore.per_code_combined; ev->paid = S390_lowcore.per_access_id; } else { /* PER event in kernel is kprobes */ From 7d8484c4151d94f3c1d257e65e0c1a2ec3c945e7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 6 Mar 2022 11:11:05 +0100 Subject: [PATCH 57/69] s390/irq: use assignment instead of cast Change struct ext_code to contain a union which allows to simply assign the int_code instead of using a cast. In order to keep the patch small the anonymous union is embedded within the existing struct instead of changing the struct ext_code to a union. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/irq.h | 9 +++++++-- arch/s390/kernel/irq.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 9f75d67b8c20..89902f754740 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -81,8 +81,13 @@ static __always_inline void inc_irq_stat(enum interruption_class irq) } struct ext_code { - unsigned short subcode; - unsigned short code; + union { + struct { + unsigned short subcode; + unsigned short code; + }; + unsigned int int_code; + }; }; typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long); diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index cb7099682340..3033f616e256 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -342,7 +342,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy) struct ext_int_info *p; int index; - ext_code = *(struct ext_code *) ®s->int_code; + ext_code.int_code = regs->int_code; if (ext_code.code != EXT_IRQ_CLK_COMP) set_cpu_flag(CIF_NOHZ_DELAY); From 2268169c14e5f00377512932cf2b7bc78e4577ad Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 24 Feb 2022 16:43:23 +0100 Subject: [PATCH 58/69] s390: remove unused expoline to BC instructions This reverts commit 6deaa3bbca80 ("s390: extend expoline to BC instructions"). Expolines to BC instructions were added to be utilized by commit de5cb6eb514e ("s390: use expoline thunks in the BPF JIT"). But corresponding code has been removed by commit e1cf4befa297 ("bpf, s390x: remove ld_abs/ld_ind"). And compiler does not generate such expolines as well. Compared to regular expolines, expolines to BC instructions contain displacement and all possible variations cannot be generated in advance, making kpatch support more complicated. So, remove those to avoid future usages. Acked-by: Heiko Carstens Acked-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/nospec-insn.h | 57 ----------------------------- arch/s390/kernel/nospec-branch.c | 25 +++---------- 2 files changed, 5 insertions(+), 77 deletions(-) diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index 0033dcd663b1..c419c9e87265 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -35,18 +35,10 @@ _LC_BR_R1 = __LC_BR_R1 __THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 .endm - .macro __THUNK_PROLOG_BC d0,r1,r2 - __THUNK_PROLOG_NAME __s390_indirect_branch_\d0\()_\r2\()use_\r1 - .endm - .macro __THUNK_BR r1,r2 jg __s390_indirect_jump_r\r2\()use_r\r1 .endm - .macro __THUNK_BC d0,r1,r2 - jg __s390_indirect_branch_\d0\()_\r2\()use_\r1 - .endm - .macro __THUNK_BRASL r1,r2,r3 brasl \r1,__s390_indirect_jump_r\r3\()use_r\r2 .endm @@ -89,23 +81,6 @@ _LC_BR_R1 = __LC_BR_R1 .endif .endm - .macro __DECODE_DRR expand,disp,reg,ruse - .set __decode_fail,1 - .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \reg,%r\r1 - .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \ruse,%r\r2 - \expand \disp,\r1,\r2 - .set __decode_fail,0 - .endif - .endr - .endif - .endr - .if __decode_fail == 1 - .error "__DECODE_DRR failed" - .endif - .endm - .macro __THUNK_EX_BR reg,ruse # Be very careful when adding instructions to this macro! # The ALTERNATIVE replacement code has a .+10 which targets @@ -126,42 +101,17 @@ _LC_BR_R1 = __LC_BR_R1 555: br \reg .endm - .macro __THUNK_EX_BC disp,reg,ruse -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - exrl 0,556f - j . -#else - larl \ruse,556f - ex 0,0(\ruse) - j . -#endif -556: b \disp(\reg) - .endm - .macro GEN_BR_THUNK reg,ruse=%r1 __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse __THUNK_EX_BR \reg,\ruse __THUNK_EPILOG .endm - .macro GEN_B_THUNK disp,reg,ruse=%r1 - __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse - __THUNK_EX_BC \disp,\reg,\ruse - __THUNK_EPILOG - .endm - .macro BR_EX reg,ruse=%r1 557: __DECODE_RR __THUNK_BR,\reg,\ruse .pushsection .s390_indirect_branches,"a",@progbits .long 557b-. .popsection - .endm - - .macro B_EX disp,reg,ruse=%r1 -558: __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse - .pushsection .s390_indirect_branches,"a",@progbits - .long 558b-. - .popsection .endm .macro BASR_EX rsave,rtarget,ruse=%r1 @@ -173,17 +123,10 @@ _LC_BR_R1 = __LC_BR_R1 #else .macro GEN_BR_THUNK reg,ruse=%r1 - .endm - - .macro GEN_B_THUNK disp,reg,ruse=%r1 .endm .macro BR_EX reg,ruse=%r1 br \reg - .endm - - .macro B_EX disp,reg,ruse=%r1 - b \disp(\reg) .endm .macro BASR_EX rsave,rtarget,ruse=%r1 diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 60e6fec27bba..c302e0a7d38f 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -105,6 +105,7 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) s32 *epo; /* Second part of the instruction replace is always a nop */ + memcpy(insnbuf + 2, branch, sizeof(branch)); for (epo = start; epo < end; epo++) { instr = (u8 *) epo + *epo; if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04) @@ -125,34 +126,18 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) br = thunk + (*(int *)(thunk + 2)) * 2; else continue; - /* Check for unconditional branch 0x07f? or 0x47f???? */ - if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0) + if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0) continue; - - memcpy(insnbuf + 2, branch, sizeof(branch)); switch (type) { case BRCL_EXPOLINE: + /* brcl to thunk, replace with br + nop */ insnbuf[0] = br[0]; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); - if (br[0] == 0x47) { - /* brcl to b, replace with bc + nopr */ - insnbuf[2] = br[2]; - insnbuf[3] = br[3]; - } else { - /* brcl to br, replace with bcr + nop */ - } break; case BRASL_EXPOLINE: + /* brasl to thunk, replace with basr + nop */ + insnbuf[0] = 0x0d; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); - if (br[0] == 0x47) { - /* brasl to b, replace with bas + nopr */ - insnbuf[0] = 0x4d; - insnbuf[2] = br[2]; - insnbuf[3] = br[3]; - } else { - /* brasl to br, replace with basr + nop */ - insnbuf[0] = 0x0d; - } break; } From 6c2797cd51218a451a52edc2ca533f89d1a2af10 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 8 Mar 2022 10:49:58 +0100 Subject: [PATCH 59/69] s390/pci: make zpci_set_irq()/zpci_clear_irq() static Commit c1e18c17bda68 ("s390/pci: add zpci_set_irq()/zpci_clear_irq()") made zpci_set_irq()/zpci_clear_irq() non-static in preparation for using them in zpci_hot_reset_device(). The version of zpci_hot_reset_device() that was finally merged however exploits the fact that IRQs and DMA is implicitly disabled by clp_disable_fh() so the call to zpci_clear_irq() was never added. There are no other calls outside pci_irq.c so lets make both functions static. Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 3 --- arch/s390/pci/pci_irq.c | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 90824be5ce9a..fdb9745ee998 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -283,9 +283,6 @@ int zpci_dma_exit_device(struct zpci_dev *zdev); int __init zpci_irq_init(void); void __init zpci_irq_exit(void); -int zpci_set_irq(struct zpci_dev *zdev); -int zpci_clear_irq(struct zpci_dev *zdev); - /* FMB */ int zpci_fmb_enable_device(struct zpci_dev *); int zpci_fmb_disable_device(struct zpci_dev *); diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 2b6062c486f5..500cd2dbdf53 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -99,7 +99,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev) } /* Register adapter interruptions */ -int zpci_set_irq(struct zpci_dev *zdev) +static int zpci_set_irq(struct zpci_dev *zdev) { int rc; @@ -115,7 +115,7 @@ int zpci_set_irq(struct zpci_dev *zdev) } /* Clear adapter interruptions */ -int zpci_clear_irq(struct zpci_dev *zdev) +static int zpci_clear_irq(struct zpci_dev *zdev) { int rc; From eed38cd2f46f6c4f97c730abd8f23a19db0afbc8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 28 Feb 2022 13:15:59 +0100 Subject: [PATCH 60/69] s390/nospec: generate single register thunks if possible Currently assembler generated expoline thunks are always in a form __s390_indirect_jump_rXuse_rX even when exrl instruction is available and no additional register is utilized. Generate __s390_indirect_jump_rX versions using a single register if the kernel is built for z10 or newer machine, which have exrl instruction available. Thunks generated are identical to the ones generated by the compiler. This helps to reduce the number of thunks for newer machines generations. Acked-by: Sumanth Korikkar Acked-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/nospec-insn.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index c419c9e87265..4397eae15e34 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -31,6 +31,19 @@ _LC_BR_R1 = __LC_BR_R1 .popsection .endm +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + .macro __THUNK_PROLOG_BR r1,r2 + __THUNK_PROLOG_NAME __s390_indirect_jump_r\r1 + .endm + + .macro __THUNK_BR r1,r2 + jg __s390_indirect_jump_r\r1 + .endm + + .macro __THUNK_BRASL r1,r2,r3 + brasl \r1,__s390_indirect_jump_r\r2 + .endm +#else .macro __THUNK_PROLOG_BR r1,r2 __THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 .endm @@ -42,6 +55,7 @@ _LC_BR_R1 = __LC_BR_R1 .macro __THUNK_BRASL r1,r2,r3 brasl \r1,__s390_indirect_jump_r\r3\()use_r\r2 .endm +#endif .macro __DECODE_RR expand,reg,ruse .set __decode_fail,1 From 1d2ad084800edad81cdc955304272742b10721c7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 6 Mar 2022 20:56:07 +0100 Subject: [PATCH 61/69] s390/nospec: add an option to use thunk-extern Currently with -mindirect-branch=thunk and -mfunction-return=thunk compiler options expoline thunks are put into individual COMDAT group sections. s390 is the only architecture which has group sections and it has implications for kpatch and objtool tools support. Using -mindirect-branch=thunk-extern and -mfunction-return=thunk-extern is an alternative, which comes with a need to generate all required expoline thunks manually. Unfortunately modules area is too far away from the kernel image, and expolines from the kernel image cannon be used. But since all new distributions (except Debian) build kernels for machine generations newer than z10, where "exrl" instruction is available, that leaves only 16 expolines thunks possible. Provide an option to build the kernel with -mindirect-branch=thunk-extern and -mfunction-return=thunk-extern for z10 or newer. This also requires to postlink expoline thunks into all modules explicitly. Currently modules already contain most expolines anyhow. Unfortunately -mindirect-branch=thunk-extern and -mfunction-return=thunk-extern options support is broken in gcc <= 11.2. Additional compile test is required to verify proper gcc support. Acked-by: Ilya Leoshkevich Co-developed-by: Sumanth Korikkar Signed-off-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 15 +++++++++++++++ arch/s390/Makefile | 14 +++++++++----- arch/s390/include/asm/nospec-insn.h | 10 ++++++++++ arch/s390/lib/Makefile | 1 + arch/s390/lib/expoline.S | 12 ++++++++++++ arch/s390/tools/gcc-thunk-extern.sh | 24 ++++++++++++++++++++++++ scripts/mod/modpost.c | 5 +++++ 7 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 arch/s390/lib/expoline.S create mode 100755 arch/s390/tools/gcc-thunk-extern.sh diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a492376d6e3f..115b8cddefee 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -585,6 +585,7 @@ config KERNEL_NOBP config EXPOLINE def_bool n + depends on $(cc-option,-mindirect-branch=thunk) prompt "Avoid speculative indirect branches in the kernel" help Compile the kernel with the expoline compiler options to guard @@ -595,6 +596,20 @@ config EXPOLINE If unsure, say N. +config EXPOLINE_EXTERN + def_bool n + depends on EXPOLINE + depends on HAVE_MARCH_Z10_FEATURES + depends on CC_IS_GCC && GCC_VERSION >= 110200 + depends on $(success,$(srctree)/arch/s390/tools/gcc-thunk-extern.sh $(CC)) + prompt "Generate expolines as extern functions." + help + This option is required for some tooling like kpatch. The kernel is + compiled with -mindirect-branch=thunk-extern and requires a newer + compiler. + + If unsure, say N. + choice prompt "Expoline default" depends on EXPOLINE diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 2edf25b44c4b..bd75128b7d79 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -81,14 +81,18 @@ ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),) endif ifdef CONFIG_EXPOLINE - ifneq ($(call cc-option,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),) + ifdef CONFIG_EXPOLINE_EXTERN + KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline.o + CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern + CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern + else CC_FLAGS_EXPOLINE := -mindirect-branch=thunk CC_FLAGS_EXPOLINE += -mfunction-return=thunk - CC_FLAGS_EXPOLINE += -mindirect-branch-table - export CC_FLAGS_EXPOLINE - cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE - aflags-y += -DCC_USING_EXPOLINE endif + CC_FLAGS_EXPOLINE += -mindirect-branch-table + export CC_FLAGS_EXPOLINE + cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE + aflags-y += -DCC_USING_EXPOLINE endif ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index 4397eae15e34..bbb5c4d84db9 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -18,7 +18,11 @@ _LC_BR_R1 = __LC_BR_R1 * the various thunks are merged into a single copy. */ .macro __THUNK_PROLOG_NAME name +#ifdef CONFIG_EXPOLINE_EXTERN + .pushsection .text,"ax",@progbits +#else .pushsection .text.\name,"axG",@progbits,\name,comdat +#endif .globl \name .hidden \name .type \name,@function @@ -115,7 +119,13 @@ _LC_BR_R1 = __LC_BR_R1 555: br \reg .endm +#ifdef CONFIG_EXPOLINE_EXTERN .macro GEN_BR_THUNK reg,ruse=%r1 + .endm + .macro GEN_BR_THUNK_EXTERN reg,ruse=%r1 +#else + .macro GEN_BR_THUNK reg,ruse=%r1 +#endif __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse __THUNK_EX_BR \reg,\ruse __THUNK_EPILOG diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 69feb8ed3312..5d415b3db6d1 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -7,6 +7,7 @@ lib-y += delay.o string.o uaccess.o find.o spinlock.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o +obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o diff --git a/arch/s390/lib/expoline.S b/arch/s390/lib/expoline.S new file mode 100644 index 000000000000..92ed8409a7a4 --- /dev/null +++ b/arch/s390/lib/expoline.S @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include + +.macro GEN_ALL_BR_THUNK_EXTERN + .irp r1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + GEN_BR_THUNK_EXTERN %r\r1 + .endr +.endm + +GEN_ALL_BR_THUNK_EXTERN diff --git a/arch/s390/tools/gcc-thunk-extern.sh b/arch/s390/tools/gcc-thunk-extern.sh new file mode 100755 index 000000000000..20bcbf6dd7ab --- /dev/null +++ b/arch/s390/tools/gcc-thunk-extern.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Borrowed from gcc: gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c +# Checks that we don't get error: section type conflict with ‘put_page’. + +cat << "END" | $@ -x c - -fno-PIE -march=z10 -mindirect-branch=thunk-extern -mfunction-return=thunk-extern -mindirect-branch-table -O2 -c -o /dev/null +int a; +int b (void); +void c (int); + +static void +put_page (void) +{ + if (b ()) + c (a); +} + +__attribute__ ((__section__ (".init.text"), __cold__)) void +d (void) +{ + put_page (); + put_page (); +} +END diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6bfa33217914..dbc0aaf69e43 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -658,6 +658,11 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname) strstarts(symname, "_savevr_") || strcmp(symname, ".TOC.") == 0) return 1; + + if (info->hdr->e_machine == EM_S390) + /* Expoline thunks are linked on all kernel modules during final link of .ko */ + if (strstarts(symname, "__s390_indirect_jump_r")) + return 1; /* Do not ignore this symbol */ return 0; } From 602bf1687e6f475de2fe29bb1ed81d03bdc06b6d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 6 Mar 2022 22:30:42 +0100 Subject: [PATCH 62/69] s390/nospec: align and size extern thunks Kernel has full control over how extern thunks generated by arch/s390/lib/expoline.S look like. Align them to 16 bytes like other symbols. Also set proper symbols size which is important for tooling. Acked-by: Ilya Leoshkevich Acked-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/nospec-insn.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index bbb5c4d84db9..c670c1dd1293 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -20,6 +20,7 @@ _LC_BR_R1 = __LC_BR_R1 .macro __THUNK_PROLOG_NAME name #ifdef CONFIG_EXPOLINE_EXTERN .pushsection .text,"ax",@progbits + .align 16,0x07 #else .pushsection .text.\name,"axG",@progbits,\name,comdat #endif @@ -30,8 +31,11 @@ _LC_BR_R1 = __LC_BR_R1 CFI_STARTPROC .endm - .macro __THUNK_EPILOG + .macro __THUNK_EPILOG_NAME name CFI_ENDPROC +#ifdef CONFIG_EXPOLINE_EXTERN + .size \name, .-\name +#endif .popsection .endm @@ -40,6 +44,10 @@ _LC_BR_R1 = __LC_BR_R1 __THUNK_PROLOG_NAME __s390_indirect_jump_r\r1 .endm + .macro __THUNK_EPILOG_BR r1,r2 + __THUNK_EPILOG_NAME __s390_indirect_jump_r\r1 + .endm + .macro __THUNK_BR r1,r2 jg __s390_indirect_jump_r\r1 .endm @@ -52,6 +60,10 @@ _LC_BR_R1 = __LC_BR_R1 __THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 .endm + .macro __THUNK_EPILOG_BR r1,r2 + __THUNK_EPILOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 + .endm + .macro __THUNK_BR r1,r2 jg __s390_indirect_jump_r\r2\()use_r\r1 .endm @@ -128,7 +140,7 @@ _LC_BR_R1 = __LC_BR_R1 #endif __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse __THUNK_EX_BR \reg,\ruse - __THUNK_EPILOG + __DECODE_RR __THUNK_EPILOG_BR,\reg,\ruse .endm .macro BR_EX reg,ruse=%r1 From 432b1cc78e985d3c783f1accb2507fbf5a87583d Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:06 +0100 Subject: [PATCH 63/69] s390/uaccess: Add copy_from/to_user_key functions Add copy_from/to_user_key functions, which perform storage key checking. These functions can be used by KVM for emulating instructions that need to be key checked. These functions differ from their non _key counterparts in include/linux/uaccess.h only in the additional key argument and must be kept in sync with those. Since the existing uaccess implementation on s390 makes use of move instructions that support having an additional access key supplied, we can implement raw_copy_from/to_user_key by enhancing the existing implementation. Signed-off-by: Janis Schoetterl-Glausch Acked-by: Heiko Carstens Reviewed-by: Christian Borntraeger Acked-by: Janosch Frank Link: https://lore.kernel.org/r/20220211182215.2730017-2-scgl@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uaccess.h | 22 +++++++++ arch/s390/lib/uaccess.c | 81 +++++++++++++++++++++++++-------- 2 files changed, 85 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 44b18800721a..70916e26777d 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -45,6 +45,28 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); #define INLINE_COPY_TO_USER #endif +unsigned long __must_check +_copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key); + +static __always_inline unsigned long __must_check +copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key) +{ + if (likely(check_copy_size(to, n, false))) + n = _copy_from_user_key(to, from, n, key); + return n; +} + +unsigned long __must_check +_copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key); + +static __always_inline unsigned long __must_check +copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key) +{ + if (likely(check_copy_size(from, n, true))) + n = _copy_to_user_key(to, from, n, key); + return n; +} + int __put_user_bad(void) __attribute__((noreturn)); int __get_user_bad(void) __attribute__((noreturn)); diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index f846ebe038fa..fe7803c653a2 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -60,11 +60,13 @@ static inline int copy_with_mvcos(void) #endif static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; union oac spec = { + .oac2.key = key, .oac2.as = PSW_BITS_AS_SECONDARY, + .oac2.k = 1, .oac2.a = 1, }; @@ -95,19 +97,19 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr } static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; tmp1 = -256UL; asm volatile( " sacf 0\n" - "0: mvcp 0(%0,%2),0(%1),%3\n" + "0: mvcp 0(%0,%2),0(%1),%[key]\n" "7: jz 5f\n" "1: algr %0,%3\n" " la %1,256(%1)\n" " la %2,256(%2)\n" - "2: mvcp 0(%0,%2),0(%1),%3\n" + "2: mvcp 0(%0,%2),0(%1),%[key]\n" "8: jnz 1b\n" " j 5f\n" "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ @@ -116,7 +118,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 6f\n" - "4: mvcp 0(%4,%2),0(%1),%3\n" + "4: mvcp 0(%4,%2),0(%1),%[key]\n" "9: slgr %0,%4\n" " j 6f\n" "5: slgr %0,%0\n" @@ -124,24 +126,49 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); + : [key] "d" (key << 4) + : "cc", "memory"); return size; } +static unsigned long raw_copy_from_user_key(void *to, const void __user *from, + unsigned long n, unsigned long key) +{ + if (copy_with_mvcos()) + return copy_from_user_mvcos(to, from, n, key); + return copy_from_user_mvcp(to, from, n, key); +} + unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - if (copy_with_mvcos()) - return copy_from_user_mvcos(to, from, n); - return copy_from_user_mvcp(to, from, n); + return raw_copy_from_user_key(to, from, n, 0); } EXPORT_SYMBOL(raw_copy_from_user); +unsigned long _copy_from_user_key(void *to, const void __user *from, + unsigned long n, unsigned long key) +{ + unsigned long res = n; + + might_fault(); + if (!should_fail_usercopy()) { + instrument_copy_from_user(to, from, n); + res = raw_copy_from_user_key(to, from, n, key); + } + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; +} +EXPORT_SYMBOL(_copy_from_user_key); + static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; union oac spec = { + .oac1.key = key, .oac1.as = PSW_BITS_AS_SECONDARY, + .oac1.k = 1, .oac1.a = 1, }; @@ -172,19 +199,19 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, } static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; tmp1 = -256UL; asm volatile( " sacf 0\n" - "0: mvcs 0(%0,%1),0(%2),%3\n" + "0: mvcs 0(%0,%1),0(%2),%[key]\n" "7: jz 5f\n" "1: algr %0,%3\n" " la %1,256(%1)\n" " la %2,256(%2)\n" - "2: mvcs 0(%0,%1),0(%2),%3\n" + "2: mvcs 0(%0,%1),0(%2),%[key]\n" "8: jnz 1b\n" " j 5f\n" "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ @@ -193,7 +220,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 6f\n" - "4: mvcs 0(%4,%1),0(%2),%3\n" + "4: mvcs 0(%4,%1),0(%2),%[key]\n" "9: slgr %0,%4\n" " j 6f\n" "5: slgr %0,%0\n" @@ -201,18 +228,36 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); + : [key] "d" (key << 4) + : "cc", "memory"); return size; } +static unsigned long raw_copy_to_user_key(void __user *to, const void *from, + unsigned long n, unsigned long key) +{ + if (copy_with_mvcos()) + return copy_to_user_mvcos(to, from, n, key); + return copy_to_user_mvcs(to, from, n, key); +} + unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - if (copy_with_mvcos()) - return copy_to_user_mvcos(to, from, n); - return copy_to_user_mvcs(to, from, n); + return raw_copy_to_user_key(to, from, n, 0); } EXPORT_SYMBOL(raw_copy_to_user); +unsigned long _copy_to_user_key(void __user *to, const void *from, + unsigned long n, unsigned long key) +{ + might_fault(); + if (should_fail_usercopy()) + return n; + instrument_copy_to_user(to, from, n); + return raw_copy_to_user_key(to, from, n, key); +} +EXPORT_SYMBOL(_copy_to_user_key); + static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { unsigned long tmp1, tmp2; From 4efd417f298bc23bc8b6ac5db5ff79af5ec92ac5 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 24 Feb 2022 22:43:31 +0100 Subject: [PATCH 64/69] s390: raise minimum supported machine generation to z10 Machine generations up to z9 (released in May 2006) have been officially out of service for several years now (z9 end of service - January 31, 2019). No distributions build kernels supporting those old machine generations anymore, except Debian, which seems to pick the oldest supported generation. The team supporting Debian on s390 has been notified about the change. Raising minimum supported machine generation to z10 helps to reduce maintenance cost and effectively remove code, which is not getting enough testing coverage due to lack of older hardware and distributions support. Besides that this unblocks some optimization opportunities and allows to use wider instruction set in asm files for future features implementation. Due to this change spectre mitigation and usercopy implementations could be drastically simplified and many newer instructions could be converted from ".insn" encoding to instruction names. Acked-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 68 +---------- arch/s390/Makefile | 6 - arch/s390/include/asm/bitops.h | 12 -- arch/s390/include/asm/lowcore.h | 8 +- arch/s390/include/asm/nospec-insn.h | 34 ------ arch/s390/include/asm/timex.h | 4 - arch/s390/include/asm/uaccess.h | 18 --- arch/s390/kernel/asm-offsets.c | 1 - arch/s390/kernel/cache.c | 6 - arch/s390/kernel/ftrace.c | 17 +-- arch/s390/kernel/ftrace.h | 2 - arch/s390/kernel/mcount.S | 11 -- arch/s390/kernel/module.c | 12 +- arch/s390/kernel/nospec-branch.c | 6 - arch/s390/kernel/perf_cpum_cf.c | 11 +- arch/s390/kernel/processor.c | 22 +--- arch/s390/kernel/setup.c | 1 - arch/s390/kernel/smp.c | 1 - arch/s390/kernel/uprobes.c | 16 +-- arch/s390/lib/uaccess.c | 170 ++-------------------------- arch/s390/mm/vmem.c | 8 +- arch/s390/net/bpf_jit_comp.c | 31 ++--- arch/s390/tools/gen_facilities.c | 8 -- 23 files changed, 38 insertions(+), 435 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 115b8cddefee..fe8d89176feb 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -120,7 +120,6 @@ config S390 select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 - select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select DMA_OPS if PCI select DYNAMIC_FTRACE if FUNCTION_TRACER select GENERIC_ALLOCATOR @@ -230,20 +229,8 @@ source "kernel/livepatch/Kconfig" menu "Processor type and features" -config HAVE_MARCH_Z900_FEATURES - def_bool n - -config HAVE_MARCH_Z990_FEATURES - def_bool n - select HAVE_MARCH_Z900_FEATURES - -config HAVE_MARCH_Z9_109_FEATURES - def_bool n - select HAVE_MARCH_Z990_FEATURES - config HAVE_MARCH_Z10_FEATURES def_bool n - select HAVE_MARCH_Z9_109_FEATURES config HAVE_MARCH_Z196_FEATURES def_bool n @@ -269,41 +256,13 @@ choice prompt "Processor type" default MARCH_Z196 -config MARCH_Z900 - bool "IBM zSeries model z800 and z900" - select HAVE_MARCH_Z900_FEATURES - depends on $(cc-option,-march=z900) - help - Select this to enable optimizations for model z800/z900 (2064 and - 2066 series). This will enable some optimizations that are not - available on older ESA/390 (31 Bit) only CPUs. - -config MARCH_Z990 - bool "IBM zSeries model z890 and z990" - select HAVE_MARCH_Z990_FEATURES - depends on $(cc-option,-march=z990) - help - Select this to enable optimizations for model z890/z990 (2084 and - 2086 series). The kernel will be slightly faster but will not work - on older machines. - -config MARCH_Z9_109 - bool "IBM System z9" - select HAVE_MARCH_Z9_109_FEATURES - depends on $(cc-option,-march=z9-109) - help - Select this to enable optimizations for IBM System z9 (2094 and - 2096 series). The kernel will be slightly faster but will not work - on older machines. - config MARCH_Z10 bool "IBM System z10" select HAVE_MARCH_Z10_FEATURES depends on $(cc-option,-march=z10) help - Select this to enable optimizations for IBM System z10 (2097 and - 2098 series). The kernel will be slightly faster but will not work - on older machines. + Select this to enable optimizations for IBM System z10 (2097 and 2098 + series). This is the oldest machine generation currently supported. config MARCH_Z196 bool "IBM zEnterprise 114 and 196" @@ -352,15 +311,6 @@ config MARCH_Z15 endchoice -config MARCH_Z900_TUNE - def_bool TUNE_Z900 || MARCH_Z900 && TUNE_DEFAULT - -config MARCH_Z990_TUNE - def_bool TUNE_Z990 || MARCH_Z990 && TUNE_DEFAULT - -config MARCH_Z9_109_TUNE - def_bool TUNE_Z9_109 || MARCH_Z9_109 && TUNE_DEFAULT - config MARCH_Z10_TUNE def_bool TUNE_Z10 || MARCH_Z10 && TUNE_DEFAULT @@ -396,21 +346,8 @@ config TUNE_DEFAULT Tune the generated code for the target processor for which the kernel will be compiled. -config TUNE_Z900 - bool "IBM zSeries model z800 and z900" - depends on $(cc-option,-mtune=z900) - -config TUNE_Z990 - bool "IBM zSeries model z890 and z990" - depends on $(cc-option,-mtune=z990) - -config TUNE_Z9_109 - bool "IBM System z9" - depends on $(cc-option,-mtune=z9-109) - config TUNE_Z10 bool "IBM System z10" - depends on $(cc-option,-mtune=z10) config TUNE_Z196 bool "IBM zEnterprise 114 and 196" @@ -599,7 +536,6 @@ config EXPOLINE config EXPOLINE_EXTERN def_bool n depends on EXPOLINE - depends on HAVE_MARCH_Z10_FEATURES depends on CC_IS_GCC && GCC_VERSION >= 110200 depends on $(success,$(srctree)/arch/s390/tools/gcc-thunk-extern.sh $(CC)) prompt "Generate expolines as extern functions." diff --git a/arch/s390/Makefile b/arch/s390/Makefile index bd75128b7d79..7a65bca1e5af 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -36,9 +36,6 @@ CHECKFLAGS += -D__s390__ -D__s390x__ export LD_BFD -mflags-$(CONFIG_MARCH_Z900) := -march=z900 -mflags-$(CONFIG_MARCH_Z990) := -march=z990 -mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109 mflags-$(CONFIG_MARCH_Z10) := -march=z10 mflags-$(CONFIG_MARCH_Z196) := -march=z196 mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 @@ -51,9 +48,6 @@ export CC_FLAGS_MARCH := $(mflags-y) aflags-y += $(mflags-y) cflags-y += $(mflags-y) -cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900 -cflags-$(CONFIG_MARCH_Z990_TUNE) += -mtune=z990 -cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109 cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10 cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196 cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12 diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 1d40630128a5..191dc7898b0f 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -256,8 +256,6 @@ static inline bool test_bit_inv(unsigned long nr, return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); } -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES - /** * __flogr - find leftmost one * @word - The word to search @@ -376,16 +374,6 @@ static inline int fls(unsigned int word) return fls64(word); } -#else /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */ - -#include -#include -#include -#include -#include - -#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */ - #include #include #include diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 9829d6b44a20..56002aeacabf 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -163,11 +163,9 @@ struct lowcore { __u64 gmap; /* 0x03d0 */ __u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */ - /* br %r1 trampoline */ - __u16 br_r1_trampoline; /* 0x0400 */ - __u32 return_lpswe; /* 0x0402 */ - __u32 return_mcck_lpswe; /* 0x0406 */ - __u8 pad_0x040a[0x0e00-0x040a]; /* 0x040a */ + __u32 return_lpswe; /* 0x0400 */ + __u32 return_mcck_lpswe; /* 0x0404 */ + __u8 pad_0x040a[0x0e00-0x0408]; /* 0x0408 */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index c670c1dd1293..62a99095f380 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -10,8 +10,6 @@ #ifdef CC_USING_EXPOLINE -_LC_BR_R1 = __LC_BR_R1 - /* * The expoline macros are used to create thunks in the same format * as gcc generates them. The 'comdat' section flag makes sure that @@ -39,7 +37,6 @@ _LC_BR_R1 = __LC_BR_R1 .popsection .endm -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES .macro __THUNK_PROLOG_BR r1,r2 __THUNK_PROLOG_NAME __s390_indirect_jump_r\r1 .endm @@ -55,23 +52,6 @@ _LC_BR_R1 = __LC_BR_R1 .macro __THUNK_BRASL r1,r2,r3 brasl \r1,__s390_indirect_jump_r\r2 .endm -#else - .macro __THUNK_PROLOG_BR r1,r2 - __THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 - .endm - - .macro __THUNK_EPILOG_BR r1,r2 - __THUNK_EPILOG_NAME __s390_indirect_jump_r\r2\()use_r\r1 - .endm - - .macro __THUNK_BR r1,r2 - jg __s390_indirect_jump_r\r2\()use_r\r1 - .endm - - .macro __THUNK_BRASL r1,r2,r3 - brasl \r1,__s390_indirect_jump_r\r3\()use_r\r2 - .endm -#endif .macro __DECODE_RR expand,reg,ruse .set __decode_fail,1 @@ -112,22 +92,8 @@ _LC_BR_R1 = __LC_BR_R1 .endm .macro __THUNK_EX_BR reg,ruse - # Be very careful when adding instructions to this macro! - # The ALTERNATIVE replacement code has a .+10 which targets - # the "br \reg" after the code has been patched. -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES exrl 0,555f j . -#else - .ifc \reg,%r1 - ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35 - j . - .else - larl \ruse,555f - ex 0,0(\ruse) - j . - .endif -#endif 555: br \reg .endm diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 50d9b04ecbd1..ca9a8ab1261a 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -187,14 +187,10 @@ static inline unsigned long get_tod_clock(void) static inline unsigned long get_tod_clock_fast(void) { -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES unsigned long clk; asm volatile("stckf %0" : "=Q" (clk) : : "cc"); return clk; -#else - return get_tod_clock(); -#endif } static inline cycles_t get_cycles(void) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 70916e26777d..e8b4b1a7ec82 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -92,8 +92,6 @@ union oac { }; }; -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - #define __put_get_user_asm(to, from, size, oac_spec) \ ({ \ int __rc; \ @@ -187,22 +185,6 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign return rc; } -#else /* CONFIG_HAVE_MARCH_Z10_FEATURES */ - -static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) -{ - size = raw_copy_to_user(ptr, x, size); - return size ? -EFAULT : 0; -} - -static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) -{ - size = raw_copy_from_user(x, ptr, size); - return size ? -EFAULT : 0; -} - -#endif /* CONFIG_HAVE_MARCH_Z10_FEATURES */ - /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index c253d3faf443..7c74f0e17e5a 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -122,7 +122,6 @@ int main(void) OFFSET(__LC_LPP, lowcore, lpp); OFFSET(__LC_CURRENT_PID, lowcore, current_pid); OFFSET(__LC_GMAP, lowcore, gmap); - OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline); OFFSET(__LC_LAST_BREAK, lowcore, last_break); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 8a9c3bf69f48..7ee3651d00ab 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -70,8 +70,6 @@ void show_cacheinfo(struct seq_file *m) struct cacheinfo *cache; int idx; - if (!test_facility(34)) - return; this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask)); for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { cache = this_cpu_ci->info_list + idx; @@ -131,8 +129,6 @@ int init_cache_level(unsigned int cpu) union cache_topology ct; enum cache_type ctype; - if (!test_facility(34)) - return -EOPNOTSUPP; if (!this_cpu_ci) return -EINVAL; ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); @@ -156,8 +152,6 @@ int populate_cache_leaves(unsigned int cpu) union cache_topology ct; enum cache_type ctype; - if (!test_facility(34)) - return -EOPNOTSUPP; ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); for (idx = 0, level = 0; level < this_cpu_ci->num_levels && idx < this_cpu_ci->num_leaves; idx++, level++) { diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index d8b96c5923c0..5e1f7bc00a25 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -58,15 +58,6 @@ asm( ); #ifdef CONFIG_EXPOLINE -asm( - " .align 16\n" - "ftrace_shared_hotpatch_trampoline_ex:\n" - " lmg %r0,%r1,2(%r1)\n" - " ex %r0," __stringify(__LC_BR_R1) "(%r0)\n" - " j .\n" - "ftrace_shared_hotpatch_trampoline_ex_end:\n" -); - asm( " .align 16\n" "ftrace_shared_hotpatch_trampoline_exrl:\n" @@ -90,12 +81,8 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end) tend = ftrace_shared_hotpatch_trampoline_br_end; #ifdef CONFIG_EXPOLINE if (!nospec_disable) { - tstart = ftrace_shared_hotpatch_trampoline_ex; - tend = ftrace_shared_hotpatch_trampoline_ex_end; - if (test_facility(35)) { /* exrl */ - tstart = ftrace_shared_hotpatch_trampoline_exrl; - tend = ftrace_shared_hotpatch_trampoline_exrl_end; - } + tstart = ftrace_shared_hotpatch_trampoline_exrl; + tend = ftrace_shared_hotpatch_trampoline_exrl_end; } #endif /* CONFIG_EXPOLINE */ if (end) diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h index 69e416f4c6b0..7f75a9616406 100644 --- a/arch/s390/kernel/ftrace.h +++ b/arch/s390/kernel/ftrace.h @@ -16,8 +16,6 @@ extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_start[]; extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_end[]; extern const char ftrace_shared_hotpatch_trampoline_br[]; extern const char ftrace_shared_hotpatch_trampoline_br_end[]; -extern const char ftrace_shared_hotpatch_trampoline_ex[]; -extern const char ftrace_shared_hotpatch_trampoline_ex_end[]; extern const char ftrace_shared_hotpatch_trampoline_exrl[]; extern const char ftrace_shared_hotpatch_trampoline_exrl_end[]; extern const char ftrace_plt_template[]; diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 6ace43d7e8d2..1326927a17b5 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -35,14 +35,8 @@ ENDPROC(ftrace_stub) .if \allregs == 1 # save psw mask # don't put any instructions clobbering CC before this point -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES epsw %r1,%r14 risbg %r14,%r1,0,31,32 -#else - epsw %r14,%r1 - sllg %r14,%r14,32 - lr %r14,%r1 -#endif .endif lgr %r1,%r15 @@ -58,12 +52,7 @@ ENDPROC(ftrace_stub) .if \allregs == 1 stg %r14,(STACK_PTREGS_PSW)(%r15) -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES mvghi STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS -#else - lghi %r14,_PIF_FTRACE_FULL_REGS - stg %r14,STACK_PTREGS_FLAGS(%r15) -#endif .else xc STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15) .endif diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index b032e556eeb7..c0dd72db77b8 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -517,15 +517,9 @@ int module_finalize(const Elf_Ehdr *hdr, ij = me->core_layout.base + me->arch.plt_offset + me->arch.plt_size - PLT_ENTRY_SIZE; - if (test_facility(35)) { - ij[0] = 0xc6000000; /* exrl %r0,.+10 */ - ij[1] = 0x0005a7f4; /* j . */ - ij[2] = 0x000007f1; /* br %r1 */ - } else { - ij[0] = 0x44000000 | (unsigned int) - offsetof(struct lowcore, br_r1_trampoline); - ij[1] = 0xa7f40000; /* j . */ - } + ij[0] = 0xc6000000; /* exrl %r0,.+10 */ + ij[1] = 0x0005a7f4; /* j . */ + ij[2] = 0x000007f1; /* br %r1 */ } secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index c302e0a7d38f..717bbcc056e5 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -118,12 +118,6 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) if (thunk[0] == 0xc6 && thunk[1] == 0x00) /* exrl %r0, */ br = thunk + (*(int *)(thunk + 2)) * 2; - else if (thunk[0] == 0xc0 && (thunk[1] & 0x0f) == 0x00 && - thunk[6] == 0x44 && thunk[7] == 0x00 && - (thunk[8] & 0x0f) == 0x00 && thunk[9] == 0x00 && - (thunk[1] & 0xf0) == (thunk[8] & 0xf0)) - /* larl %rx, + ex %r0,0(%rx) */ - br = thunk + (*(int *)(thunk + 2)) * 2; else continue; if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index ee8707abdb6a..483ab5e10164 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1451,6 +1451,8 @@ static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) /* Get the CPU speed, try sampling facility first and CPU attributes second. */ static void cfdiag_get_cpu_speed(void) { + unsigned long mhz; + if (cpum_sf_avail()) { /* Sampling facility first */ struct hws_qsi_info_block si; @@ -1464,12 +1466,9 @@ static void cfdiag_get_cpu_speed(void) /* Fallback: CPU speed extract static part. Used in case * CPU Measurement Sampling Facility is turned off. */ - if (test_facility(34)) { - unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); - - if (mhz != -1UL) - cfdiag_cpu_speed = mhz & 0xffffffff; - } + mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); + if (mhz != -1UL) + cfdiag_cpu_speed = mhz & 0xffffffff; } static int cfset_init(void) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index d9d4a806979e..7a74ea5f7531 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -172,8 +172,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) static int __init setup_hwcaps(void) { /* instructions named N3, "backported" to esa-mode */ - if (test_facility(0)) - elf_hwcap |= HWCAP_ESAN3; + elf_hwcap |= HWCAP_ESAN3; /* z/Architecture mode active */ elf_hwcap |= HWCAP_ZARCH; @@ -191,8 +190,7 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_LDISP; /* extended-immediate */ - if (test_facility(21)) - elf_hwcap |= HWCAP_EIMM; + elf_hwcap |= HWCAP_EIMM; /* extended-translation facility 3 enhancement */ if (test_facility(22) && test_facility(30)) @@ -262,21 +260,7 @@ static int __init setup_elf_platform(void) get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { - case 0x2064: - case 0x2066: - default: /* Use "z900" as default for 64 bit kernels. */ - strcpy(elf_platform, "z900"); - break; - case 0x2084: - case 0x2086: - strcpy(elf_platform, "z990"); - break; - case 0x2094: - case 0x2096: - strcpy(elf_platform, "z9-109"); - break; - case 0x2097: - case 0x2098: + default: /* Use "z10" as default. */ strcpy(elf_platform, "z10"); break; case 0x2817: diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 05327be3a982..84e23fcc1106 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -490,7 +490,6 @@ static void __init setup_lowcore_dat_off(void) lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; arch_spin_lock_setup(0); - lc->br_r1_trampoline = 0x07f1; /* br %r1 */ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); lc->preempt_count = PREEMPT_DISABLED; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 368b58e4c2e7..127da1850b06 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -207,7 +207,6 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; - lc->br_r1_trampoline = 0x07f1; /* br %r1 */ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); lc->preempt_count = PREEMPT_DISABLED; diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index bd3ef121c379..b88345ef8bd9 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -177,9 +177,7 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len) __typeof__(*(ptr)) input; \ int __rc = 0; \ \ - if (!test_facility(34)) \ - __rc = EMU_ILLEGAL_OP; \ - else if ((u64 __force)ptr & mask) \ + if ((u64 __force)ptr & mask) \ __rc = EMU_SPECIFICATION; \ else if (get_user(input, ptr)) \ __rc = EMU_ADDRESSING; \ @@ -194,9 +192,7 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len) __typeof__(ptr) __ptr = (ptr); \ int __rc = 0; \ \ - if (!test_facility(34)) \ - __rc = EMU_ILLEGAL_OP; \ - else if ((u64 __force)__ptr & mask) \ + if ((u64 __force)__ptr & mask) \ __rc = EMU_SPECIFICATION; \ else if (put_user(*(input), __ptr)) \ __rc = EMU_ADDRESSING; \ @@ -213,9 +209,7 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len) __typeof__(*(ptr)) input; \ int __rc = 0; \ \ - if (!test_facility(34)) \ - __rc = EMU_ILLEGAL_OP; \ - else if ((u64 __force)ptr & mask) \ + if ((u64 __force)ptr & mask) \ __rc = EMU_SPECIFICATION; \ else if (get_user(input, ptr)) \ __rc = EMU_ADDRESSING; \ @@ -327,10 +321,6 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) break; case 0xc6: switch (insn->opc1) { - case 0x02: /* pfdrl */ - if (!test_facility(34)) - rc = EMU_ILLEGAL_OP; - break; case 0x04: /* cghrl */ rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64); break; diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index fe7803c653a2..a37f6fdc01e8 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -8,14 +8,10 @@ * Gerald Schaefer (gerald.schaefer@de.ibm.com) */ -#include #include #include -#include #include #include -#include -#include #ifdef CONFIG_DEBUG_ENTRY void debug_user_asce(int exit) @@ -35,32 +31,8 @@ void debug_user_asce(int exit) } #endif /*CONFIG_DEBUG_ENTRY */ -#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES -static DEFINE_STATIC_KEY_FALSE(have_mvcos); - -static int __init uaccess_init(void) -{ - if (test_facility(27)) - static_branch_enable(&have_mvcos); - return 0; -} -early_initcall(uaccess_init); - -static inline int copy_with_mvcos(void) -{ - if (static_branch_likely(&have_mvcos)) - return 1; - return 0; -} -#else -static inline int copy_with_mvcos(void) -{ - return 1; -} -#endif - -static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, - unsigned long size, unsigned long key) +static unsigned long raw_copy_from_user_key(void *to, const void __user *from, + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; union oac spec = { @@ -90,55 +62,12 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr "4: slgr %0,%0\n" "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "+a" (size), "+a" (from), "+a" (to), "+a" (tmp1), "=a" (tmp2) : [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } -static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, - unsigned long size, unsigned long key) -{ - unsigned long tmp1, tmp2; - - tmp1 = -256UL; - asm volatile( - " sacf 0\n" - "0: mvcp 0(%0,%2),0(%1),%[key]\n" - "7: jz 5f\n" - "1: algr %0,%3\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "2: mvcp 0(%0,%2),0(%1),%[key]\n" - "8: jnz 1b\n" - " j 5f\n" - "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ - " lghi %3,-4096\n" - " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ - " slgr %4,%1\n" - " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "4: mvcp 0(%4,%2),0(%1),%[key]\n" - "9: slgr %0,%4\n" - " j 6f\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) - EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : [key] "d" (key << 4) - : "cc", "memory"); - return size; -} - -static unsigned long raw_copy_from_user_key(void *to, const void __user *from, - unsigned long n, unsigned long key) -{ - if (copy_with_mvcos()) - return copy_from_user_mvcos(to, from, n, key); - return copy_from_user_mvcp(to, from, n, key); -} - unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { return raw_copy_from_user_key(to, from, n, 0); @@ -161,8 +90,8 @@ unsigned long _copy_from_user_key(void *to, const void __user *from, } EXPORT_SYMBOL(_copy_from_user_key); -static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, - unsigned long size, unsigned long key) +static unsigned long raw_copy_to_user_key(void __user *to, const void *from, + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; union oac spec = { @@ -192,55 +121,12 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, "4: slgr %0,%0\n" "5:\n" EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) + : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) : [spec] "d" (spec.val) : "cc", "memory", "0"); return size; } -static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, - unsigned long size, unsigned long key) -{ - unsigned long tmp1, tmp2; - - tmp1 = -256UL; - asm volatile( - " sacf 0\n" - "0: mvcs 0(%0,%1),0(%2),%[key]\n" - "7: jz 5f\n" - "1: algr %0,%3\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "2: mvcs 0(%0,%1),0(%2),%[key]\n" - "8: jnz 1b\n" - " j 5f\n" - "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ - " lghi %3,-4096\n" - " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */ - " slgr %4,%1\n" - " clgr %0,%4\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "4: mvcs 0(%4,%1),0(%2),%[key]\n" - "9: slgr %0,%4\n" - " j 6f\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) - EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) - : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : [key] "d" (key << 4) - : "cc", "memory"); - return size; -} - -static unsigned long raw_copy_to_user_key(void __user *to, const void *from, - unsigned long n, unsigned long key) -{ - if (copy_with_mvcos()) - return copy_to_user_mvcos(to, from, n, key); - return copy_to_user_mvcs(to, from, n, key); -} - unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { return raw_copy_to_user_key(to, from, n, 0); @@ -258,7 +144,7 @@ unsigned long _copy_to_user_key(void __user *to, const void *from, } EXPORT_SYMBOL(_copy_to_user_key); -static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) +unsigned long __clear_user(void __user *to, unsigned long size) { unsigned long tmp1, tmp2; union oac spec = { @@ -290,46 +176,4 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size : "cc", "memory", "0"); return size; } - -static inline unsigned long clear_user_xc(void __user *to, unsigned long size) -{ - unsigned long tmp1, tmp2; - - asm volatile( - " sacf 256\n" - " aghi %0,-1\n" - " jo 5f\n" - " bras %3,3f\n" - " xc 0(1,%1),0(%1)\n" - "0: aghi %0,257\n" - " la %2,255(%1)\n" /* %2 = ptr + 255 */ - " srl %2,12\n" - " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */ - " slgr %2,%1\n" - " clgr %0,%2\n" /* clear crosses next page boundary? */ - " jnh 5f\n" - " aghi %2,-1\n" - "1: ex %2,0(%3)\n" - " aghi %2,1\n" - " slgr %0,%2\n" - " j 5f\n" - "2: xc 0(256,%1),0(%1)\n" - " la %1,256(%1)\n" - "3: aghi %0,-256\n" - " jnm 2b\n" - "4: ex %0,0(%3)\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) - : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); - return size; -} - -unsigned long __clear_user(void __user *to, unsigned long size) -{ - if (copy_with_mvcos()) - return clear_user_mvcos(to, size); - return clear_user_xc(to, size); -} EXPORT_SYMBOL(__clear_user); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 72c525ce7221..c2583f921ca8 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -584,13 +584,9 @@ void __init vmem_map_init(void) __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); - if (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) { - /* - * Lowcore must be executable for LPSWE - * and expoline trampoline branch instructions. - */ + /* lowcore must be executable for LPSWE */ + if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); - } pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e1e57a30ac66..aede9a3ca3f7 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -570,15 +570,8 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) if (nospec_uses_trampoline()) { jit->r14_thunk_ip = jit->prg; /* Generate __s390_indirect_jump_r14 thunk */ - if (test_facility(35)) { - /* exrl %r0,.+10 */ - EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); - } else { - /* larl %r1,.+14 */ - EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); - /* ex 0,0(%r1) */ - EMIT4_DISP(0x44000000, REG_0, REG_1, 0); - } + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); /* j . */ EMIT4_PCREL(0xa7f40000, 0); } @@ -589,20 +582,12 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) { jit->r1_thunk_ip = jit->prg; /* Generate __s390_indirect_jump_r1 thunk */ - if (test_facility(35)) { - /* exrl %r0,.+10 */ - EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); - /* j . */ - EMIT4_PCREL(0xa7f40000, 0); - /* br %r1 */ - _EMIT2(0x07f1); - } else { - /* ex 0,S390_lowcore.br_r1_tampoline */ - EMIT4_DISP(0x44000000, REG_0, REG_0, - offsetof(struct lowcore, br_r1_trampoline)); - /* j . */ - EMIT4_PCREL(0xa7f40000, 0); - } + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + /* br %r1 */ + _EMIT2(0x07f1); } } diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 606324e56e4e..530dd941d140 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -27,24 +27,16 @@ static struct facility_def facility_defs[] = { */ .name = "FACILITIES_ALS", .bits = (int[]){ -#ifdef CONFIG_HAVE_MARCH_Z900_FEATURES 0, /* N3 instructions */ 1, /* z/Arch mode installed */ -#endif -#ifdef CONFIG_HAVE_MARCH_Z990_FEATURES 18, /* long displacement facility */ -#endif -#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES 21, /* extended-immediate facility */ 25, /* store clock fast */ -#endif -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES 27, /* mvcos */ 32, /* compare and swap and store */ 33, /* compare and swap and store 2 */ 34, /* general instructions extension */ 35, /* execute extensions */ -#endif #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES 45, /* fast-BCR, etc. */ #endif From bedc96698f482ac5904998b16ba7a800ec71e220 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Feb 2022 10:18:14 +0100 Subject: [PATCH 65/69] s390/nospec: move to single register thunks Assembler generated expoline thunks were in a form __s390_indirect_jump_rXuse_rX when exrl instruction has not been available. Now with z10 as minimum supported machine generation there is no need for 2 register thunks, always generate __s390_indirect_jump_rX versions. Acked-by: Heiko Carstens Acked-by: Sumanth Korikkar Acked-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/nospec-insn.h | 74 +++++++++++++---------------- 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index 62a99095f380..2cfcd5ac3a8b 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -37,28 +37,41 @@ .popsection .endm - .macro __THUNK_PROLOG_BR r1,r2 + .macro __THUNK_PROLOG_BR r1 __THUNK_PROLOG_NAME __s390_indirect_jump_r\r1 .endm - .macro __THUNK_EPILOG_BR r1,r2 + .macro __THUNK_EPILOG_BR r1 __THUNK_EPILOG_NAME __s390_indirect_jump_r\r1 .endm - .macro __THUNK_BR r1,r2 + .macro __THUNK_BR r1 jg __s390_indirect_jump_r\r1 .endm - .macro __THUNK_BRASL r1,r2,r3 + .macro __THUNK_BRASL r1,r2 brasl \r1,__s390_indirect_jump_r\r2 .endm - .macro __DECODE_RR expand,reg,ruse + .macro __DECODE_R expand,reg .set __decode_fail,1 .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 .ifc \reg,%r\r1 + \expand \r1 + .set __decode_fail,0 + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_R failed" + .endif + .endm + + .macro __DECODE_RR expand,rsave,rtarget + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \rsave,%r\r1 .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \ruse,%r\r2 + .ifc \rtarget,%r\r2 \expand \r1,\r2 .set __decode_fail,0 .endif @@ -70,68 +83,47 @@ .endif .endm - .macro __DECODE_RRR expand,rsave,rtarget,ruse - .set __decode_fail,1 - .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \rsave,%r\r1 - .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \rtarget,%r\r2 - .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 - .ifc \ruse,%r\r3 - \expand \r1,\r2,\r3 - .set __decode_fail,0 - .endif - .endr - .endif - .endr - .endif - .endr - .if __decode_fail == 1 - .error "__DECODE_RRR failed" - .endif - .endm - - .macro __THUNK_EX_BR reg,ruse + .macro __THUNK_EX_BR reg exrl 0,555f j . 555: br \reg .endm #ifdef CONFIG_EXPOLINE_EXTERN - .macro GEN_BR_THUNK reg,ruse=%r1 + .macro GEN_BR_THUNK reg .endm - .macro GEN_BR_THUNK_EXTERN reg,ruse=%r1 + .macro GEN_BR_THUNK_EXTERN reg #else - .macro GEN_BR_THUNK reg,ruse=%r1 + .macro GEN_BR_THUNK reg #endif - __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse - __THUNK_EX_BR \reg,\ruse - __DECODE_RR __THUNK_EPILOG_BR,\reg,\ruse + __DECODE_R __THUNK_PROLOG_BR,\reg + __THUNK_EX_BR \reg + __DECODE_R __THUNK_EPILOG_BR,\reg .endm - .macro BR_EX reg,ruse=%r1 -557: __DECODE_RR __THUNK_BR,\reg,\ruse + .macro BR_EX reg +557: __DECODE_R __THUNK_BR,\reg .pushsection .s390_indirect_branches,"a",@progbits .long 557b-. .popsection .endm - .macro BASR_EX rsave,rtarget,ruse=%r1 -559: __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse + .macro BASR_EX rsave,rtarget +559: __DECODE_RR __THUNK_BRASL,\rsave,\rtarget .pushsection .s390_indirect_branches,"a",@progbits .long 559b-. .popsection .endm #else - .macro GEN_BR_THUNK reg,ruse=%r1 + .macro GEN_BR_THUNK reg .endm - .macro BR_EX reg,ruse=%r1 + .macro BR_EX reg br \reg .endm - .macro BASR_EX rsave,rtarget,ruse=%r1 + .macro BASR_EX rsave,rtarget basr \rsave,\rtarget .endm #endif /* CC_USING_EXPOLINE */ From 10bc15ba3a853723267016274b58be6c9a6e64da Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Feb 2022 10:38:23 +0100 Subject: [PATCH 66/69] s390: assume stckf is always present With z10 as minimum supported machine generation the store-clock-fast facility (25) is always present and checked in als code. Drop alternatives and always use stckf. Acked-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 11 +++-------- arch/s390/kernel/vtime.c | 13 ++++++------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3781de26f207..8f15e418968a 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -98,11 +98,6 @@ _LPP_OFFSET = __LC_LPP #endif .endm - .macro STCK savearea - ALTERNATIVE ".insn s,0xb2050000,\savearea", \ - ".insn s,0xb27c0000,\savearea", 25 - .endm - /* * The TSTMSK macro generates a test-under-mask instruction by * calculating the memory offset for the specified mask value. @@ -442,7 +437,7 @@ ENDPROC(pgm_check_handler) */ .macro INT_HANDLER name,lc_old_psw,handler ENTRY(\name) - STCK __LC_INT_CLOCK + stckf __LC_INT_CLOCK stpt __LC_SYS_ENTER_TIMER STBEAR __LC_LAST_BREAK BPOFF @@ -514,7 +509,7 @@ ENTRY(psw_idle) .Lpsw_idle_stcctm: oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT BPON - STCK __CLOCK_IDLE_ENTER(%r2) + stckf __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) lpswe __SF_EMPTY(%r15) .globl psw_idle_exit @@ -526,7 +521,7 @@ ENDPROC(psw_idle) * Machine check handler routines */ ENTRY(mcck_int_handler) - STCK __LC_MCCK_CLOCK + stckf __LC_MCCK_CLOCK BPOFF la %r1,4095 # validate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index f216a1b2f825..9436f3053b88 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -128,13 +128,12 @@ static int do_account_vtime(struct task_struct *tsk) timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; - /* Use STORE CLOCK by default, STORE CLOCK FAST if available. */ - alternative_io("stpt %0\n .insn s,0xb2050000,%1\n", - "stpt %0\n .insn s,0xb27c0000,%1\n", - 25, - ASM_OUTPUT2("=Q" (S390_lowcore.last_update_timer), - "=Q" (S390_lowcore.last_update_clock)), - ASM_NO_INPUT_CLOBBER("cc")); + asm volatile( + " stpt %0\n" /* Store current cpu timer value */ + " stckf %1" /* Store current tod clock value */ + : "=Q" (S390_lowcore.last_update_timer), + "=Q" (S390_lowcore.last_update_clock) + : : "cc"); clock = S390_lowcore.last_update_clock - clock; timer -= S390_lowcore.last_update_timer; From 731efc9613ee073c8944d0d56616d421cf906b0b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Feb 2022 10:39:02 +0100 Subject: [PATCH 67/69] s390: convert ".insn" encoding to instruction names With z10 as minimum supported machine generation many ".insn" encodings could be now converted to instruction names. There are couple of exceptions - stfle is used from the als code built for z900 and cannot be converted - few ".insn" directives encode unsupported instruction formats The generated code is identical before/after this change. Acked-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/crypto/chacha-s390.S | 20 ++++++++++---------- arch/s390/include/asm/cpu_mf.h | 12 ++++++------ arch/s390/include/asm/pgtable.h | 16 ++++++++-------- arch/s390/include/asm/processor.h | 3 +-- arch/s390/include/asm/timex.h | 2 +- arch/s390/include/asm/tlbflush.h | 4 +--- arch/s390/kernel/entry.S | 4 ++-- arch/s390/kernel/ftrace.c | 2 +- arch/s390/lib/uaccess.c | 12 ++++++------ arch/s390/mm/gmap.c | 2 +- 10 files changed, 37 insertions(+), 40 deletions(-) diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S index badf5c49717d..9b033622191c 100644 --- a/arch/s390/crypto/chacha-s390.S +++ b/arch/s390/crypto/chacha-s390.S @@ -312,7 +312,7 @@ ENTRY(chacha20_vx_4x) VPERM XC0,XC0,XC0,BEPERM VPERM XD0,XD0,XD0,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_4x VLM XT0,XT3,0,INP,0 @@ -339,7 +339,7 @@ ENTRY(chacha20_vx_4x) VPERM XC0,XC0,XC0,BEPERM VPERM XD0,XD0,XD0,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_4x VLM XT0,XT3,0,INP,0 @@ -366,7 +366,7 @@ ENTRY(chacha20_vx_4x) VPERM XC0,XC0,XC0,BEPERM VPERM XD0,XD0,XD0,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_4x VLM XT0,XT3,0,INP,0 @@ -472,7 +472,7 @@ ENDPROC(chacha20_vx_4x) #define T3 %v30 ENTRY(chacha20_vx) - .insn rilu,0xc20e00000000,LEN,256 # clgfi LEN,256 + clgfi LEN,256 jle chacha20_vx_4x stmg %r6,%r7,6*8(SP) @@ -725,7 +725,7 @@ ENTRY(chacha20_vx) VPERM C0,C0,C0,BEPERM VPERM D0,D0,D0,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VAF D2,D2,T2 # +K[3]+2 @@ -754,7 +754,7 @@ ENTRY(chacha20_vx) VPERM C0,C1,C1,BEPERM VPERM D0,D1,D1,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VLM A1,D1,0,INP,0 @@ -780,7 +780,7 @@ ENTRY(chacha20_vx) VPERM C0,C2,C2,BEPERM VPERM D0,D2,D2,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VLM A1,D1,0,INP,0 @@ -807,7 +807,7 @@ ENTRY(chacha20_vx) VPERM C0,C3,C3,BEPERM VPERM D0,D3,D3,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VAF D3,D2,T1 # K[3]+4 @@ -837,7 +837,7 @@ ENTRY(chacha20_vx) VPERM C0,C4,C4,BEPERM VPERM D0,D4,D4,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VLM A1,D1,0,INP,0 @@ -864,7 +864,7 @@ ENTRY(chacha20_vx) VPERM C0,C5,C5,BEPERM VPERM D0,D5,D5,BEPERM - .insn rilu,0xc20e00000000,LEN,0x40 # clgfi LEN,0x40 + clgfi LEN,0x40 jl .Ltail_vx VLM A1,D1,0,INP,0 diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 78bb336600bf..feaba12dbecb 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -160,7 +160,7 @@ struct hws_trailer_entry { /* Load program parameter */ static inline void lpp(void *pp) { - asm volatile(".insn s,0xb2800000,0(%0)\n":: "a" (pp) : "memory"); + asm volatile("lpp 0(%0)\n" :: "a" (pp) : "memory"); } /* Query counter information */ @@ -169,7 +169,7 @@ static inline int qctri(struct cpumf_ctr_info *info) int rc = -EINVAL; asm volatile ( - "0: .insn s,0xb28e0000,%1\n" + "0: qctri %1\n" "1: lhi %0,0\n" "2:\n" EX_TABLE(1b, 2b) @@ -183,7 +183,7 @@ static inline int lcctl(u64 ctl) int cc; asm volatile ( - " .insn s,0xb2840000,%1\n" + " lcctl %1\n" " ipm %0\n" " srl %0,28\n" : "=d" (cc) : "Q" (ctl) : "cc"); @@ -197,7 +197,7 @@ static inline int __ecctr(u64 ctr, u64 *content) int cc; asm volatile ( - " .insn rre,0xb2e40000,%0,%2\n" + " ecctr %0,%2\n" " ipm %1\n" " srl %1,28\n" : "=d" (_content), "=d" (cc) : "d" (ctr) : "cc"); @@ -247,7 +247,7 @@ static inline int qsi(struct hws_qsi_info_block *info) int cc = 1; asm volatile( - "0: .insn s,0xb2860000,%1\n" + "0: qsi %1\n" "1: lhi %0,0\n" "2:\n" EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) @@ -262,7 +262,7 @@ static inline int lsctl(struct hws_lsctl_request_block *req) cc = 1; asm volatile( - "0: .insn s,0xb2870000,0(%1)\n" + "0: lsctl 0(%1)\n" "1: ipm %0\n" " srl %0,28\n" "2:\n" diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index a3f26e3aa8b5..9df679152620 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -600,7 +600,7 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new unsigned long address = (unsigned long)ptr | 1; asm volatile( - " .insn rre,0xb98a0000,%[r1],%[address]" + " cspg %[r1],%[address]" : [r1] "+&d" (r1.pair), "+m" (*ptr) : [address] "d" (address) : "cc"); @@ -1052,7 +1052,7 @@ static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, if (__builtin_constant_p(opt) && opt == 0) { /* Invalidation + TLB flush for the pte */ asm volatile( - " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" + " ipte %[r1],%[r2],0,%[m4]" : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), [m4] "i" (local)); return; @@ -1061,7 +1061,7 @@ static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, /* Invalidate ptes with options + TLB flush of the ptes */ opt = opt | (asce & _ASCE_ORIGIN); asm volatile( - " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]" + " ipte %[r1],%[r2],%[r3],%[m4]" : [r2] "+a" (address), [r3] "+a" (opt) : [r1] "a" (pto), [m4] "i" (local) : "memory"); } @@ -1074,7 +1074,7 @@ static __always_inline void __ptep_ipte_range(unsigned long address, int nr, /* Invalidate a range of ptes + TLB flush of the ptes */ do { asm volatile( - " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]" + " ipte %[r1],%[r2],%[r3],%[m4]" : [r2] "+a" (address), [r3] "+a" (nr) : [r1] "a" (pto), [m4] "i" (local) : "memory"); } while (nr != 255); @@ -1535,7 +1535,7 @@ static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, if (__builtin_constant_p(opt) && opt == 0) { /* flush without guest asce */ asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" + " idte %[r1],0,%[r2],%[m4]" : "+m" (*pmdp) : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)), [m4] "i" (local) @@ -1543,7 +1543,7 @@ static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp, } else { /* flush with guest asce */ asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]" + " idte %[r1],%[r3],%[r2],%[m4]" : "+m" (*pmdp) : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt), [r3] "a" (asce), [m4] "i" (local) @@ -1562,7 +1562,7 @@ static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp, if (__builtin_constant_p(opt) && opt == 0) { /* flush without guest asce */ asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" + " idte %[r1],0,%[r2],%[m4]" : "+m" (*pudp) : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)), [m4] "i" (local) @@ -1570,7 +1570,7 @@ static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp, } else { /* flush with guest asce */ asm volatile( - " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]" + " idte %[r1],%[r3],%[r2],%[m4]" : "+m" (*pudp) : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt), [r3] "a" (asce), [m4] "i" (local) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 022cf0925e56..84ec63145325 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -225,8 +225,7 @@ static inline unsigned long __ecag(unsigned int asi, unsigned char parm) { unsigned long val; - asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */ - : "=d" (val) : "a" (asi << 8 | parm)); + asm volatile("ecag %0,0,0(%1)" : "=d" (val) : "a" (asi << 8 | parm)); return val; } diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index ca9a8ab1261a..2cfce42aa7fc 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -148,7 +148,7 @@ struct ptff_qui { asm volatile( \ " lgr 0,%[reg0]\n" \ " lgr 1,%[reg1]\n" \ - " .insn e,0x0104\n" \ + " ptff\n" \ " ipm %[rc]\n" \ " srl %[rc],28\n" \ : [rc] "=&d" (rc), "+m" (*(struct addrtype *)reg1) \ diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 6448bb5be10c..a6e2cd89b609 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -25,9 +25,7 @@ static inline void __tlb_flush_idte(unsigned long asce) if (MACHINE_HAS_TLB_GUEST) opt |= IDTE_GUEST_ASCE; /* Global TLB flush for the mm */ - asm volatile( - " .insn rrf,0xb98e0000,0,%0,%1,0" - : : "a" (opt), "a" (asce) : "cc"); + asm volatile("idte 0,%1,%0" : : "a" (opt), "a" (asce) : "cc"); } /* diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 8f15e418968a..a601a518b569 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -226,7 +226,7 @@ ENTRY(__switch_to) aghi %r3,__TASK_pid mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 + ALTERNATIVE "", "lpp _LPP_OFFSET", 40 BR_EX %r14 ENDPROC(__switch_to) @@ -648,7 +648,7 @@ ENTRY(mcck_int_handler) ENDPROC(mcck_int_handler) ENTRY(restart_int_handler) - ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 + ALTERNATIVE "", "lpp _LPP_OFFSET", 40 stg %r15,__LC_SAVE_AREA_RESTART TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 jz 0f diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 5e1f7bc00a25..1852d46babb1 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -62,7 +62,7 @@ asm( " .align 16\n" "ftrace_shared_hotpatch_trampoline_exrl:\n" " lmg %r0,%r1,2(%r1)\n" - " .insn ril,0xc60000000000,%r0,0f\n" /* exrl */ + " exrl %r0,0f\n" " j .\n" "0: br %r1\n" "ftrace_shared_hotpatch_trampoline_exrl_end:\n" diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index a37f6fdc01e8..d7b3b193d108 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -45,7 +45,7 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, tmp1 = -4096UL; asm volatile( " lr 0,%[spec]\n" - "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" + "0: mvcos 0(%2),0(%1),%0\n" "6: jz 4f\n" "1: algr %0,%3\n" " slgr %1,%3\n" @@ -56,7 +56,7 @@ static unsigned long raw_copy_from_user_key(void *to, const void __user *from, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n" + "3: mvcos 0(%2),0(%1),%4\n" "7: slgr %0,%4\n" " j 5f\n" "4: slgr %0,%0\n" @@ -104,7 +104,7 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, tmp1 = -4096UL; asm volatile( " lr 0,%[spec]\n" - "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" + "0: mvcos 0(%1),0(%2),%0\n" "6: jz 4f\n" "1: algr %0,%3\n" " slgr %1,%3\n" @@ -115,7 +115,7 @@ static unsigned long raw_copy_to_user_key(void __user *to, const void *from, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n" + "3: mvcos 0(%1),0(%2),%4\n" "7: slgr %0,%4\n" " j 5f\n" "4: slgr %0,%0\n" @@ -155,7 +155,7 @@ unsigned long __clear_user(void __user *to, unsigned long size) tmp1 = -4096UL; asm volatile( " lr 0,%[spec]\n" - "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" + "0: mvcos 0(%1),0(%4),%0\n" " jz 4f\n" "1: algr %0,%2\n" " slgr %1,%2\n" @@ -165,7 +165,7 @@ unsigned long __clear_user(void __user *to, unsigned long size) " slgr %3,%1\n" " clgr %0,%3\n" /* copy crosses next page boundary? */ " jnh 5f\n" - "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n" + "3: mvcos 0(%1),0(%4),%3\n" " slgr %0,%3\n" " j 5f\n" "4: slgr %0,%0\n" diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index a57224a4c141..af03cacf34ec 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1278,7 +1278,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr) { asm volatile( - " .insn rrf,0xb98e0000,%0,%1,0,0" + " idte %0,0,%1" : : "a" (asce), "a" (vaddr) : "cc", "memory"); } From 63bf38ff5bc3d8cee6c6a089657876d0b669cae1 Mon Sep 17 00:00:00 2001 From: Tobias Huschle Date: Mon, 21 Feb 2022 12:55:52 +0100 Subject: [PATCH 68/69] s390/kprobes: Avoid additional kprobe in kretprobe handling So far, s390 registered a krobe on __kretprobe_trampoline which is called everytime a kretprobe fires. This kprobe would then determine the correct return address and adjust the psw accordingly, such that the kretprobe would branch to the appropriate address after completion. Some other archs handle kretprobes without such an additional kprobe. This approach is adopted to s390 with this patch. Furthermore, the __kretprobe_trampoline now uses an assembler function to correctly gather the register and psw content to be passed to the registered kretprobe handler as struct pt_regs. After completion, the register content and the psw are set based on the contents of said pt_regs struct. Note that a change to the psw address in struct pt_regs will not have an impact, as the probe will still return to the original return address of the probed function. The return address is now recovered by using the appropriate function arch_kretprobe_fixup_return. The no longer needed kprobe is removed. Reviewed-by: Vasily Gorbik Signed-off-by: Tobias Huschle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/kprobes.h | 1 + arch/s390/kernel/Makefile | 4 ++- arch/s390/kernel/kprobes.c | 38 +++++++++--------------- arch/s390/kernel/mcount.S | 51 +++++++++++++++++++++++++++------ 4 files changed, 60 insertions(+), 34 deletions(-) diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 5eb722c984e4..598095f4b924 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -71,6 +71,7 @@ struct kprobe_ctlblk { void arch_remove_kprobe(struct kprobe *p); void __kretprobe_trampoline(void); +void trampoline_probe_handler(struct pt_regs *regs); int kprobe_fault_handler(struct pt_regs *regs, int trapnr); int kprobe_exceptions_notify(struct notifier_block *self, diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index be8007f367aa..c8d1b6aa823e 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -57,7 +57,9 @@ obj-$(CONFIG_COMPAT) += $(compat-obj-y) obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KPROBES) += kprobes_insn_page.o -obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o +obj-$(CONFIG_KPROBES) += mcount.o +obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += mcount.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 7e2910e4172b..e32c14fd1282 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -372,33 +372,26 @@ static int kprobe_handler(struct pt_regs *regs) } NOKPROBE_SYMBOL(kprobe_handler); -/* - * Function return probe trampoline: - * - init_kprobes() establishes a probepoint here - * - When the probed function returns, this probe - * causes the handlers to fire - */ -static void __used kretprobe_trampoline_holder(void) +void arch_kretprobe_fixup_return(struct pt_regs *regs, + kprobe_opcode_t *correct_ret_addr) { - asm volatile(".global __kretprobe_trampoline\n" - "__kretprobe_trampoline: bcr 0,0\n"); + /* Replace fake return address with real one. */ + regs->gprs[14] = (unsigned long)correct_ret_addr; } +NOKPROBE_SYMBOL(arch_kretprobe_fixup_return); /* - * Called when the probe at kretprobe trampoline is hit + * Called from __kretprobe_trampoline */ -static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +void trampoline_probe_handler(struct pt_regs *regs) { - regs->psw.addr = __kretprobe_trampoline_handler(regs, NULL); - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we don't want the post_handler - * to run (and have re-enabled preemption) - */ - return 1; + kretprobe_trampoline_handler(regs, NULL); } NOKPROBE_SYMBOL(trampoline_probe_handler); +/* assembler function that handles the kretprobes must not be probed itself */ +NOKPROBE_SYMBOL(__kretprobe_trampoline); + /* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "breakpoint" @@ -551,18 +544,13 @@ int kprobe_exceptions_notify(struct notifier_block *self, } NOKPROBE_SYMBOL(kprobe_exceptions_notify); -static struct kprobe trampoline = { - .addr = (kprobe_opcode_t *) &__kretprobe_trampoline, - .pre_handler = trampoline_probe_handler -}; - int __init arch_init_kprobes(void) { - return register_kprobe(&trampoline); + return 0; } int arch_trampoline_kprobe(struct kprobe *p) { - return p->addr == (kprobe_opcode_t *) &__kretprobe_trampoline; + return 0; } NOKPROBE_SYMBOL(arch_trampoline_kprobe); diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 1326927a17b5..4786bfe02144 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -11,14 +11,6 @@ #include #include - GEN_BR_THUNK %r1 - GEN_BR_THUNK %r14 - - .section .kprobes.text, "ax" - -ENTRY(ftrace_stub) - BR_EX %r14 -ENDPROC(ftrace_stub) #define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) @@ -29,6 +21,17 @@ ENDPROC(ftrace_stub) /* packed stack: allocate just enough for r14, r15 and backchain */ #define TRACED_FUNC_FRAME_SIZE 24 +#ifdef CONFIG_FUNCTION_TRACER + + GEN_BR_THUNK %r1 + GEN_BR_THUNK %r14 + + .section .kprobes.text, "ax" + +ENTRY(ftrace_stub) + BR_EX %r14 +ENDPROC(ftrace_stub) + .macro ftrace_regs_entry, allregs=0 stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller @@ -130,3 +133,35 @@ SYM_FUNC_START(return_to_handler) SYM_FUNC_END(return_to_handler) #endif +#endif /* CONFIG_FUNCTION_TRACER */ + +#ifdef CONFIG_KPROBES + +SYM_FUNC_START(__kretprobe_trampoline) + + stg %r14,(__SF_GPRS+8*8)(%r15) + lay %r15,-STACK_FRAME_SIZE(%r15) + stmg %r0,%r14,STACK_PTREGS_GPRS(%r15) + + # store original stack pointer in backchain and pt_regs + lay %r7,STACK_FRAME_SIZE(%r15) + stg %r7,__SF_BACKCHAIN(%r15) + stg %r7,STACK_PTREGS_GPRS+(15*8)(%r15) + + # store full psw + epsw %r2,%r3 + risbg %r3,%r2,0,31,32 + stg %r3,STACK_PTREGS_PSW(%r15) + larl %r1,__kretprobe_trampoline + stg %r1,STACK_PTREGS_PSW+8(%r15) + + lay %r2,STACK_PTREGS(%r15) + brasl %r14,trampoline_probe_handler + + mvc __SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15) + lmg %r0,%r15,STACK_PTREGS_GPRS(%r15) + lpswe __SF_EMPTY(%r15) + +SYM_FUNC_END(__kretprobe_trampoline) + +#endif /* CONFIG_KPROBES */ From c65f677b62d6180cc174e06f953f7fe860adf6d1 Mon Sep 17 00:00:00 2001 From: Tobias Huschle Date: Thu, 24 Feb 2022 16:14:21 +0100 Subject: [PATCH 69/69] s390/test_unwind: add kretprobe tests Add tests to verify that s390 kretprobes maintain a correct stack chain and ensure their proper function. Reviewed-by: Vasily Gorbik Signed-off-by: Tobias Huschle Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 56 +++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 07ef89eeb85a..c01f02887de4 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -132,6 +132,7 @@ static struct unwindme *unwindme; #define UWM_PGM 0x40 /* Unwind from program check handler */ #define UWM_KPROBE_ON_FTRACE 0x80 /* Unwind from kprobe handler called via ftrace. */ #define UWM_FTRACE 0x100 /* Unwind from ftrace handler. */ +#define UWM_KRETPROBE 0x200 /* Unwind kretprobe handlers. */ static __always_inline unsigned long get_psw_addr(void) { @@ -143,6 +144,55 @@ static __always_inline unsigned long get_psw_addr(void) return psw_addr; } +static int kretprobe_ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + struct unwindme *u = unwindme; + + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL, + (u->flags & UWM_SP) ? u->sp : 0); + + return 0; +} + +static noinline notrace void test_unwind_kretprobed_func(void) +{ + asm volatile(" nop\n"); +} + +static noinline void test_unwind_kretprobed_func_caller(void) +{ + test_unwind_kretprobed_func(); +} + +static int test_unwind_kretprobe(struct unwindme *u) +{ + int ret; + struct kretprobe my_kretprobe; + + if (!IS_ENABLED(CONFIG_KPROBES)) + kunit_skip(current_test, "requires CONFIG_KPROBES"); + + u->ret = -1; /* make sure kprobe is called */ + unwindme = u; + + memset(&my_kretprobe, 0, sizeof(my_kretprobe)); + my_kretprobe.handler = kretprobe_ret_handler; + my_kretprobe.maxactive = 1; + my_kretprobe.kp.addr = (kprobe_opcode_t *)test_unwind_kretprobed_func; + + ret = register_kretprobe(&my_kretprobe); + + if (ret < 0) { + kunit_err(current_test, "register_kretprobe failed %d\n", ret); + return -EINVAL; + } + + test_unwind_kretprobed_func_caller(); + unregister_kretprobe(&my_kretprobe); + unwindme = NULL; + return u->ret; +} + static int kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct unwindme *u = unwindme; @@ -254,6 +304,8 @@ static noinline int unwindme_func4(struct unwindme *u) return 0; } else if (u->flags & (UWM_PGM | UWM_KPROBE_ON_FTRACE)) { return test_unwind_kprobe(u); + } else if (u->flags & (UWM_KRETPROBE)) { + return test_unwind_kretprobe(u); } else if (u->flags & UWM_FTRACE) { return test_unwind_ftrace(u); } else { @@ -396,6 +448,10 @@ static const struct test_params param_list[] = { TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP), TEST_WITH_FLAGS(UWM_FTRACE | UWM_REGS), TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_KRETPROBE), + TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP), + TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_REGS), + TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP | UWM_REGS), }; /*