From 36d014d37d59065087e51b8381e37993f1ca99bc Mon Sep 17 00:00:00 2001 From: Fabiano Rosas <farosas@linux.ibm.com> Date: Tue, 25 Jan 2022 18:56:51 -0300 Subject: [PATCH 001/179] KVM: PPC: Book3S HV: Stop returning internal values to userspace Our kvm_arch_vcpu_ioctl_run currently returns the RESUME_HOST values to userspace, against the API of the KVM_RUN ioctl which returns 0 on success. Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220125215655.1026224-2-farosas@linux.ibm.com --- arch/powerpc/kvm/powerpc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2ad0ccd202d5..50414fb2a5ea 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1841,6 +1841,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) #ifdef CONFIG_ALTIVEC out: #endif + + /* + * We're already returning to userspace, don't pass the + * RESUME_HOST flags along. + */ + if (r > 0) + r = 0; + vcpu_put(vcpu); return r; } From b99234b918c6e36b9aa0a5b2981e86b6bd11f8e2 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas <farosas@linux.ibm.com> Date: Tue, 25 Jan 2022 18:56:52 -0300 Subject: [PATCH 002/179] KVM: PPC: Fix vmx/vsx mixup in mmio emulation The MMIO emulation code for vector instructions is duplicated between VSX and VMX. When emulating VMX we should check the VMX copy size instead of the VSX one. Fixes: acc9eb9305fe ("KVM: PPC: Reimplement LOAD_VMX/STORE_VMX instruction ...") Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220125215655.1026224-3-farosas@linux.ibm.com --- arch/powerpc/kvm/powerpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 50414fb2a5ea..c2bd29e90314 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1499,7 +1499,7 @@ int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, { enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; while (vcpu->arch.mmio_vmx_copy_nums) { @@ -1596,7 +1596,7 @@ int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu, unsigned int index = rs & KVM_MMIO_REG_MASK; enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; vcpu->arch.io_gpr = rs; From 3f831504482ab0d0d53d1966987959d1485345cc Mon Sep 17 00:00:00 2001 From: Fabiano Rosas <farosas@linux.ibm.com> Date: Tue, 25 Jan 2022 18:56:53 -0300 Subject: [PATCH 003/179] KVM: PPC: mmio: Reject instructions that access more than mmio.data size The MMIO interface between the kernel and userspace uses a structure that supports a maximum of 8-bytes of data. Instructions that access more than that need to be emulated in parts. We currently don't have generic support for splitting the emulation in parts and each set of instructions needs to be explicitly included. There's already an error message being printed when a load or store exceeds the mmio.data buffer but we don't fail the emulation until later at kvmppc_complete_mmio_load and even then we allow userspace to make a partial copy of the data, which ends up overwriting some fields of the mmio structure. This patch makes the emulation fail earlier at kvmppc_handle_load|store, which will send a Program interrupt to the guest. This is better than allowing the guest to proceed with partial data. Note that this was caught in a somewhat artificial scenario using quadword instructions (lq/stq), there's no account of an actual guest in the wild running instructions that are not properly emulated. (While here, remove the "bad MMIO" messages. The caller already has an error message.) Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com> Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220125215655.1026224-4-farosas@linux.ibm.com --- arch/powerpc/kvm/powerpc.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c2bd29e90314..27fb2b70f631 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1114,10 +1114,8 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; u64 gpr; - if (run->mmio.len > sizeof(gpr)) { - printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); + if (run->mmio.len > sizeof(gpr)) return; - } if (!vcpu->arch.mmio_host_swabbed) { switch (run->mmio.len) { @@ -1236,10 +1234,8 @@ static int __kvmppc_handle_load(struct kvm_vcpu *vcpu, host_swabbed = !is_default_endian; } - if (bytes > sizeof(run->mmio.data)) { - printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - } + if (bytes > sizeof(run->mmio.data)) + return EMULATE_FAIL; run->mmio.phys_addr = vcpu->arch.paddr_accessed; run->mmio.len = bytes; @@ -1325,10 +1321,8 @@ int kvmppc_handle_store(struct kvm_vcpu *vcpu, host_swabbed = !is_default_endian; } - if (bytes > sizeof(run->mmio.data)) { - printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - } + if (bytes > sizeof(run->mmio.data)) + return EMULATE_FAIL; run->mmio.phys_addr = vcpu->arch.paddr_accessed; run->mmio.len = bytes; From 349fbfe9b918e6dea00734f07c0fbaf4c2e2df5e Mon Sep 17 00:00:00 2001 From: Fabiano Rosas <farosas@linux.ibm.com> Date: Tue, 25 Jan 2022 18:56:54 -0300 Subject: [PATCH 004/179] KVM: PPC: mmio: Return to guest after emulation failure If MMIO emulation fails we don't want to crash the whole guest by returning to userspace. The original commit bbf45ba57eae ("KVM: ppc: PowerPC 440 KVM implementation") added a todo: /* XXX Deliver Program interrupt to guest. */ and later the commit d69614a295ae ("KVM: PPC: Separate loadstore emulation from priv emulation") added the Program interrupt injection but in another file, so I'm assuming it was missed that this block needed to be altered. Also change the message to a ratelimited one since we're letting the guest run and it could flood the host logs. Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220125215655.1026224-5-farosas@linux.ibm.com --- arch/powerpc/kvm/powerpc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 27fb2b70f631..acb0d2a4bdb9 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -307,9 +307,9 @@ int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu) u32 last_inst; kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); - /* XXX Deliver Program interrupt to guest. */ - pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst); - r = RESUME_HOST; + kvm_debug_ratelimited("Guest access to device memory using unsupported instruction (opcode: %#08x)\n", + last_inst); + r = RESUME_GUEST; break; } default: From c1c8a66367a35aabbad9bd629b105b9fb49f2c1f Mon Sep 17 00:00:00 2001 From: Fabiano Rosas <farosas@linux.ibm.com> Date: Tue, 25 Jan 2022 18:56:55 -0300 Subject: [PATCH 005/179] KVM: PPC: Book3s: mmio: Deliver DSI after emulation failure MMIO emulation can fail if the guest uses an instruction that we are not prepared to emulate. Since these instructions can be and most likely are valid ones, this is (slightly) closer to an access fault than to an illegal instruction, so deliver a Data Storage interrupt instead of a Program interrupt. BookE ignores bad faults, so it will keep using a Program interrupt because a DSI would cause a fault loop in the guest. Suggested-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220125215655.1026224-6-farosas@linux.ibm.com --- arch/powerpc/kvm/emulate_loadstore.c | 10 +++------- arch/powerpc/kvm/powerpc.c | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 48272a9b9c30..cfc9114b87d0 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -73,7 +73,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) { u32 inst; enum emulation_result emulated = EMULATE_FAIL; - int advance = 1; struct instruction_op op; /* this default type might be overwritten by subcategories */ @@ -98,6 +97,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) int type = op.type & INSTR_TYPE_MASK; int size = GETSIZE(op.type); + vcpu->mmio_is_write = OP_IS_STORE(type); + switch (type) { case LOAD: { int instr_byte_swap = op.type & BYTEREV; @@ -355,15 +356,10 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) } } - if (emulated == EMULATE_FAIL) { - advance = 0; - kvmppc_core_queue_program(vcpu, 0); - } - trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated); /* Advance past emulated instruction. */ - if (advance) + if (emulated != EMULATE_FAIL) kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); return emulated; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index acb0d2a4bdb9..82d889db2b6b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -309,6 +309,28 @@ int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu) kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst); kvm_debug_ratelimited("Guest access to device memory using unsupported instruction (opcode: %#08x)\n", last_inst); + + /* + * Injecting a Data Storage here is a bit more + * accurate since the instruction that caused the + * access could still be a valid one. + */ + if (!IS_ENABLED(CONFIG_BOOKE)) { + ulong dsisr = DSISR_BADACCESS; + + if (vcpu->mmio_is_write) + dsisr |= DSISR_ISSTORE; + + kvmppc_core_queue_data_storage(vcpu, vcpu->arch.vaddr_accessed, dsisr); + } else { + /* + * BookE does not send a SIGBUS on a bad + * fault, so use a Program interrupt instead + * to avoid a fault loop. + */ + kvmppc_core_queue_program(vcpu, 0); + } + r = RESUME_GUEST; break; } From 279d1a72c0f8021520f68ddb0a1346ff9ba1ea8c Mon Sep 17 00:00:00 2001 From: Sachin Sant <sachinp@linux.ibm.com> Date: Thu, 6 Jan 2022 16:33:53 +0530 Subject: [PATCH 006/179] powerpc/xive: Export XIVE IPI information for online-only processors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cédric pointed out that XIVE IPI information exported via sysfs (debug/powerpc/xive) display empty lines for processors which are not online. Switch to using for_each_online_cpu() so that information is displayed for online-only processors. Reported-by: Cédric Le Goater <clg@kaod.org> Signed-off-by: Sachin Sant <sachinp@linux.ibm.com> Reviewed-by: Cédric Le Goater <clg@kaod.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/164146703333.19039.10920919226094771665.sendpatchset@MacBook-Pro.local --- arch/powerpc/sysdev/xive/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 1ca5564bda9d..32863b4daf72 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1791,7 +1791,7 @@ static int xive_ipi_debug_show(struct seq_file *m, void *private) if (xive_ops->debug_show) xive_ops->debug_show(m, private); - for_each_possible_cpu(cpu) + for_each_online_cpu(cpu) xive_debug_show_ipi(m, cpu); return 0; } From b2a6f6043577e09d51a4b5577fff9fc9f5b14b1c Mon Sep 17 00:00:00 2001 From: Michal Suchanek <msuchanek@suse.de> Date: Wed, 27 Nov 2019 23:09:59 +0100 Subject: [PATCH 007/179] powerpc: add link stack flush mitigation status in debugfs. The link stack flush status is not visible in debugfs. It can be enabled even when count cache flush is disabled. Add separate file for its status. Signed-off-by: Michal Suchanek <msuchanek@suse.de> [mpe: Update for change to link_stack_flush_type] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20191127220959.6208-1-msuchanek@suse.de --- arch/powerpc/kernel/security.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index e159d4093d98..d96fd14bd7c9 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -747,14 +747,29 @@ static int count_cache_flush_get(void *data, u64 *val) return 0; } +static int link_stack_flush_get(void *data, u64 *val) +{ + if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) + *val = 0; + else + *val = 1; + + return 0; +} + DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get, count_cache_flush_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_link_stack_flush, link_stack_flush_get, + count_cache_flush_set, "%llu\n"); static __init int count_cache_flush_debugfs_init(void) { debugfs_create_file_unsafe("count_cache_flush", 0600, arch_debugfs_dir, NULL, &fops_count_cache_flush); + debugfs_create_file_unsafe("link_stack_flush", 0600, + arch_debugfs_dir, NULL, + &fops_link_stack_flush); return 0; } device_initcall(count_cache_flush_debugfs_init); From f529edd1b69ddf832c3257dcd34e15100038d6b7 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz <tobias@waldekranz.com> Date: Wed, 12 Jan 2022 12:24:59 +0100 Subject: [PATCH 008/179] powerpc/e500/qemu-e500: allow core to idle without waiting This means an idle guest won't needlessly consume an entire core on the host, waiting for work to show up. Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com> Signed-off-by: Joachim Wiberg <troglobit@gmail.com> Acked-by: Scott Wood <oss@buserror.net> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220112112459.1033754-1-troglobit@gmail.com --- arch/powerpc/platforms/85xx/qemu_e500.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index a4127b0b161f..4c4d577effd9 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -67,4 +67,9 @@ define_machine(qemu_e500) { .get_irq = mpic_get_coreint_irq, .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, +#ifdef CONFIG_PPC64 + .power_save = book3e_idle, +#else + .power_save = e500_idle, +#endif }; From 17846485dff91acce1ad47b508b633dffc32e838 Mon Sep 17 00:00:00 2001 From: Maxim Kiselev <bigunclemax@gmail.com> Date: Thu, 30 Dec 2021 18:11:21 +0300 Subject: [PATCH 009/179] powerpc: dts: t104xrdb: fix phy type for FMAN 4/5 T1040RDB has two RTL8211E-VB phys which requires setting of internal delays for correct work. Changing the phy-connection-type property to `rgmii-id` will fix this issue. Signed-off-by: Maxim Kiselev <bigunclemax@gmail.com> Reviewed-by: Maxim Kochetkov <fido_max@inbox.ru> Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20211230151123.1258321-1-bigunclemax@gmail.com --- arch/powerpc/boot/dts/fsl/t104xrdb.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index 099a598c74c0..bfe1ed5be337 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -139,12 +139,12 @@ fman@400000 { ethernet@e6000 { phy-handle = <&phy_rgmii_0>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { phy-handle = <&phy_rgmii_1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; mdio0: mdio@fc000 { From d5342fdd163ae0553a14820021a107e03eb1ea72 Mon Sep 17 00:00:00 2001 From: Thierry Reding <treding@nvidia.com> Date: Mon, 20 Dec 2021 14:40:36 +0100 Subject: [PATCH 010/179] powerpc: dts: Fix some I2C unit addresses The unit-address for the Maxim MAX1237 ADCs on XPedite5200 boards don't match the value in the "reg" property and cause a DTC warning. Signed-off-by: Thierry Reding <treding@nvidia.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20211220134036.683309-1-thierry.reding@gmail.com --- arch/powerpc/boot/dts/xpedite5200.dts | 2 +- arch/powerpc/boot/dts/xpedite5200_xmon.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/dts/xpedite5200.dts b/arch/powerpc/boot/dts/xpedite5200.dts index 840ea84bbb59..74b346f2d43c 100644 --- a/arch/powerpc/boot/dts/xpedite5200.dts +++ b/arch/powerpc/boot/dts/xpedite5200.dts @@ -132,7 +132,7 @@ reg = <0x68>; }; - dtt@48 { + dtt@34 { compatible = "maxim,max1237"; reg = <0x34>; }; diff --git a/arch/powerpc/boot/dts/xpedite5200_xmon.dts b/arch/powerpc/boot/dts/xpedite5200_xmon.dts index 449fc1b5dc23..d491c7a8f979 100644 --- a/arch/powerpc/boot/dts/xpedite5200_xmon.dts +++ b/arch/powerpc/boot/dts/xpedite5200_xmon.dts @@ -136,7 +136,7 @@ reg = <0x68>; }; - dtt@48 { + dtt@34 { compatible = "maxim,max1237"; reg = <0x34>; }; From faf01aef0570757bfbf1d655e984742c1dd38068 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy <aik@ozlabs.ru> Date: Tue, 11 Jan 2022 11:54:04 +1100 Subject: [PATCH 011/179] KVM: PPC: Merge powerpc's debugfs entry content into generic entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the moment KVM on PPC creates 4 types of entries under the kvm debugfs: 1) "%pid-%fd" per a KVM instance (for all platforms); 2) "vm%pid" (for PPC Book3s HV KVM); 3) "vm%u_vcpu%u_timing" (for PPC Book3e KVM); 4) "kvm-xive-%p" (for XIVE PPC Book3s KVM, the same for XICS); The problem with this is that multiple VMs per process is not allowed for 2) and 3) which makes it possible for userspace to trigger errors when creating duplicated debugfs entries. This merges all these into 1). This defines kvm_arch_create_kvm_debugfs() similar to kvm_arch_create_vcpu_debugfs(). This defines 2 hooks in kvmppc_ops that allow specific KVM implementations add necessary entries, this adds the _e500 suffix to kvmppc_create_vcpu_debugfs_e500() to make it clear what platform it is for. This makes use of already existing kvm_arch_create_vcpu_debugfs() on PPC. This removes no more used debugfs_dir pointers from PPC kvm_arch structs. This stops removing vcpu entries as once created vcpus stay around for the entire life of a VM and removed when the KVM instance is closed, see commit d56f5136b010 ("KVM: let kvm_destroy_vm_debugfs clean up vCPU debugfs directories"). Suggested-by: Fabiano Rosas <farosas@linux.ibm.com> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Reviewed-by: Cédric Le Goater <clg@kaod.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220111005404.162219-1-aik@ozlabs.ru --- arch/powerpc/include/asm/kvm_host.h | 6 ++--- arch/powerpc/include/asm/kvm_ppc.h | 2 ++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 31 ++++++++++---------------- arch/powerpc/kvm/book3s_xics.c | 13 ++--------- arch/powerpc/kvm/book3s_xive.c | 13 ++--------- arch/powerpc/kvm/book3s_xive_native.c | 13 ++--------- arch/powerpc/kvm/e500.c | 1 + arch/powerpc/kvm/e500mc.c | 1 + arch/powerpc/kvm/powerpc.c | 16 ++++++++++--- arch/powerpc/kvm/timing.c | 21 +++++------------ arch/powerpc/kvm/timing.h | 12 +++++----- 13 files changed, 51 insertions(+), 82 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d9bf60bf0816..faf301d0dec0 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -26,6 +26,8 @@ #include <asm/hvcall.h> #include <asm/mce.h> +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS + #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS @@ -295,7 +297,6 @@ struct kvm_arch { bool dawr1_enabled; pgd_t *pgtable; u64 process_table; - struct dentry *debugfs_dir; struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE @@ -673,7 +674,6 @@ struct kvm_vcpu_arch { u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES]; u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES]; u64 timing_last_exit; - struct dentry *debugfs_exit_timing; #endif #ifdef CONFIG_PPC_BOOK3S @@ -831,8 +831,6 @@ struct kvm_vcpu_arch { struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ struct kvmhv_tb_accumulator guest_time; /* guest execution */ struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ - - struct dentry *debugfs_dir; #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a14dbcd1b8ce..c583d0c37f31 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -314,6 +314,8 @@ struct kvmppc_ops { int (*svm_off)(struct kvm *kvm); int (*enable_dawr1)(struct kvm *kvm); bool (*hash_v3_possible)(void); + int (*create_vm_debugfs)(struct kvm *kvm); + int (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 213232914367..0aeb51738ca9 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -2112,7 +2112,7 @@ static const struct file_operations debugfs_htab_fops = { void kvmppc_mmu_debugfs_init(struct kvm *kvm) { - debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm, + debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm, &debugfs_htab_fops); } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 8cebe5542256..e4ce2a35483f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -1454,7 +1454,7 @@ static const struct file_operations debugfs_radix_fops = { void kvmhv_radix_debugfs_init(struct kvm *kvm) { - debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm, + debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm, &debugfs_radix_fops); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 84c89f08ae9a..c7e44d75f8aa 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2767,20 +2767,17 @@ static const struct file_operations debugfs_timings_ops = { }; /* Create a debugfs directory for the vcpu */ -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) +static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { - char buf[16]; - struct kvm *kvm = vcpu->kvm; - - snprintf(buf, sizeof(buf), "vcpu%u", id); - vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir); - debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, vcpu, + debugfs_create_file("timings", 0444, debugfs_dentry, vcpu, &debugfs_timings_ops); + return 0; } #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ -static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) +static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { + return 0; } #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ @@ -2903,8 +2900,6 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) vcpu->arch.cpu_type = KVM_CPU_3S_64; kvmppc_sanity_check(vcpu); - debugfs_vcpu_init(vcpu, id); - return 0; } @@ -5223,7 +5218,6 @@ void kvmppc_free_host_rm_ops(void) static int kvmppc_core_init_vm_hv(struct kvm *kvm) { unsigned long lpcr, lpid; - char buf[32]; int ret; mutex_init(&kvm->arch.uvmem_lock); @@ -5356,15 +5350,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvm->arch.smt_mode = 1; kvm->arch.emul_smt_mode = 1; - /* - * Create a debugfs directory for the VM - */ - snprintf(buf, sizeof(buf), "vm%d", current->pid); - kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir); + return 0; +} + +static int kvmppc_arch_create_vm_debugfs_hv(struct kvm *kvm) +{ kvmppc_mmu_debugfs_init(kvm); if (radix_enabled()) kvmhv_radix_debugfs_init(kvm); - return 0; } @@ -5379,8 +5372,6 @@ static void kvmppc_free_vcores(struct kvm *kvm) static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) { - debugfs_remove_recursive(kvm->arch.debugfs_dir); - if (!cpu_has_feature(CPU_FTR_ARCH_300)) kvm_hv_vm_deactivated(); @@ -6042,6 +6033,8 @@ static struct kvmppc_ops kvm_ops_hv = { .svm_off = kvmhv_svm_off, .enable_dawr1 = kvmhv_enable_dawr1, .hash_v3_possible = kvmppc_hash_v3_possible, + .create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv, + .create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv, }; static int kvm_init_subcore_bitmap(void) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 9cc466006e8b..306c85e70eea 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1016,19 +1016,10 @@ DEFINE_SHOW_ATTRIBUTE(xics_debug); static void xics_debugfs_init(struct kvmppc_xics *xics) { - char *name; - - name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); - if (!name) { - pr_err("%s: no memory for name\n", __func__); - return; - } - - xics->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir, + xics->dentry = debugfs_create_file("xics", 0444, xics->kvm->debugfs_dentry, xics, &xics_debug_fops); - pr_debug("%s: created %s\n", __func__, name); - kfree(name); + pr_debug("%s: created\n", __func__); } static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index e216c068075d..37a56cbb1701 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -2354,19 +2354,10 @@ DEFINE_SHOW_ATTRIBUTE(xive_debug); static void xive_debugfs_init(struct kvmppc_xive *xive) { - char *name; - - name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); - if (!name) { - pr_err("%s: no memory for name\n", __func__); - return; - } - - xive->dentry = debugfs_create_file(name, S_IRUGO, arch_debugfs_dir, + xive->dentry = debugfs_create_file("xive", S_IRUGO, xive->kvm->debugfs_dentry, xive, &xive_debug_fops); - pr_debug("%s: created %s\n", __func__, name); - kfree(name); + pr_debug("%s: created\n", __func__); } static void kvmppc_xive_init(struct kvm_device *dev) diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 561a5bfe0468..3c2b128e5f0f 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1259,19 +1259,10 @@ DEFINE_SHOW_ATTRIBUTE(xive_native_debug); static void xive_native_debugfs_init(struct kvmppc_xive *xive) { - char *name; - - name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); - if (!name) { - pr_err("%s: no memory for name\n", __func__); - return; - } - - xive->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir, + xive->dentry = debugfs_create_file("xive", 0444, xive->kvm->debugfs_dentry, xive, &xive_native_debug_fops); - pr_debug("%s: created %s\n", __func__, name); - kfree(name); + pr_debug("%s: created\n", __func__); } static void kvmppc_xive_native_init(struct kvm_device *dev) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 7e8b69015d20..c8b2b4478545 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -495,6 +495,7 @@ static struct kvmppc_ops kvm_ops_e500 = { .emulate_op = kvmppc_core_emulate_op_e500, .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, + .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs_e500, }; static int __init kvmppc_e500_init(void) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 1c189b5aadcc..fa0d8dbbe484 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -381,6 +381,7 @@ static struct kvmppc_ops kvm_ops_e500mc = { .emulate_op = kvmppc_core_emulate_op_e500, .emulate_mtspr = kvmppc_core_emulate_mtspr_e500, .emulate_mfspr = kvmppc_core_emulate_mfspr_e500, + .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs_e500, }; static int __init kvmppc_e500mc_init(void) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 82d889db2b6b..1d06b68739b0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -777,7 +777,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) rcuwait_init(&vcpu->arch.wait); vcpu->arch.waitp = &vcpu->arch.wait; - kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id); return 0; out_vcpu_uninit: @@ -794,8 +793,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) /* Make sure we're not using the vcpu anymore */ hrtimer_cancel(&vcpu->arch.dec_timer); - kvmppc_remove_vcpu_debugfs(vcpu); - switch (vcpu->arch.irq_type) { case KVMPPC_IRQ_MPIC: kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); @@ -2521,3 +2518,16 @@ int kvm_arch_init(void *opaque) } EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); + +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) +{ + if (vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs) + vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, debugfs_dentry); +} + +int kvm_arch_create_vm_debugfs(struct kvm *kvm) +{ + if (kvm->arch.kvm_ops->create_vm_debugfs) + kvm->arch.kvm_ops->create_vm_debugfs(kvm); + return 0; +} diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c index ba56a5cbba97..25071331f8c1 100644 --- a/arch/powerpc/kvm/timing.c +++ b/arch/powerpc/kvm/timing.c @@ -204,21 +204,10 @@ static const struct file_operations kvmppc_exit_timing_fops = { .release = single_release, }; -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) +int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu, + struct dentry *debugfs_dentry) { - static char dbg_fname[50]; - struct dentry *debugfs_file; - - snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", - current->pid, id); - debugfs_file = debugfs_create_file(dbg_fname, 0666, kvm_debugfs_dir, - vcpu, &kvmppc_exit_timing_fops); - - vcpu->arch.debugfs_exit_timing = debugfs_file; -} - -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - debugfs_remove(vcpu->arch.debugfs_exit_timing); - vcpu->arch.debugfs_exit_timing = NULL; + debugfs_create_file("timing", 0666, debugfs_dentry, + vcpu, &kvmppc_exit_timing_fops); + return 0; } diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index feef7885ba82..45817ab82bb4 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -14,8 +14,8 @@ #ifdef CONFIG_KVM_EXIT_TIMING void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu); void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu); -void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id); -void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu); +int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu, + struct dentry *debugfs_dentry); static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) { @@ -26,9 +26,11 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) /* if exit timing is not configured there is no need to build the c file */ static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {} static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {} -static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, - unsigned int id) {} -static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {} +static inline int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu, + struct dentry *debugfs_dentry) +{ + return 0; +} static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {} #endif /* CONFIG_KVM_EXIT_TIMING */ From 8e0f353a44ff3d65d933b8c0e4fb15dc89d46617 Mon Sep 17 00:00:00 2001 From: Jason Wang <wangborong@cdjrlc.com> Date: Mon, 20 Dec 2021 11:02:43 +0800 Subject: [PATCH 012/179] powerpc/kvm: no need to initialise statics to 0 Static variables do not need to be initialised to 0, because compiler will initialise all uninitialised statics to 0. Thus, remove the unneeded initialization. Signed-off-by: Jason Wang <wangborong@cdjrlc.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20211220030243.603435-1-wangborong@cdjrlc.com --- arch/powerpc/kvm/book3s_64_mmu_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index c3e31fef0be1..1ae09992c9ea 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -228,7 +228,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) struct kvmppc_sid_map *map; struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); u16 sid_map_mask; - static int backwards_map = 0; + static int backwards_map; if (kvmppc_get_msr(vcpu) & MSR_PR) gvsid |= VSID_PR; From eddaa9a402758d379520f6511fb61e89990698aa Mon Sep 17 00:00:00 2001 From: Laurent Dufour <ldufour@linux.ibm.com> Date: Thu, 6 Jan 2022 17:13:39 +0100 Subject: [PATCH 013/179] powerpc/pseries: read the lpar name from the firmware The LPAR name may be changed after the LPAR has been started in the HMC. In that case lparstat command is not reporting the updated value because it reads it from the device tree which is read at boot time. However this value could be read from RTAS. Adding this value in the /proc/powerpc/lparcfg output allows to read the updated value. However the hypervisor, like Qemu/KVM, may not support this RTAS parameter. In that case the value reported in lparcfg is read from the device tree and so is not updated accordingly. Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com> Reviewed-by: Tyrel Datwyler <tyreld@linux.ibm.com> Reviewed-by: Nathan Lynch <nathanl@linux.ibm.com> [mpe: Drop doc-comment syntax, change RTAS/DT to lower case, use of_root to fix missing of_node_put(), use of_property_read_string()] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220106161339.74656-1-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/lparcfg.c | 87 ++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index c7940fcfc911..2119c003fcf9 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -311,6 +311,92 @@ static void parse_mpp_x_data(struct seq_file *m) seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); } +/* + * PAPR defines, in section "7.3.16 System Parameters Option", the token 55 to + * read the LPAR name, and the largest output data to 4000 + 2 bytes length. + */ +#define SPLPAR_LPAR_NAME_TOKEN 55 +#define GET_SYS_PARM_BUF_SIZE 4002 +#if GET_SYS_PARM_BUF_SIZE > RTAS_DATA_BUF_SIZE +#error "GET_SYS_PARM_BUF_SIZE is larger than RTAS_DATA_BUF_SIZE" +#endif + +/* + * Read the lpar name using the RTAS ibm,get-system-parameter call. + * + * The name read through this call is updated if changes are made by the end + * user on the hypervisor side. + * + * Some hypervisor (like Qemu) may not provide this value. In that case, a non + * null value is returned. + */ +static int read_rtas_lpar_name(struct seq_file *m) +{ + int rc, len, token; + union { + char raw_buffer[GET_SYS_PARM_BUF_SIZE]; + struct { + __be16 len; + char name[GET_SYS_PARM_BUF_SIZE-2]; + }; + } *local_buffer; + + token = rtas_token("ibm,get-system-parameter"); + if (token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + local_buffer = kmalloc(sizeof(*local_buffer), GFP_KERNEL); + if (!local_buffer) + return -ENOMEM; + + do { + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, sizeof(*local_buffer)); + rc = rtas_call(token, 3, 1, NULL, SPLPAR_LPAR_NAME_TOKEN, + __pa(rtas_data_buf), sizeof(*local_buffer)); + if (!rc) + memcpy(local_buffer->raw_buffer, rtas_data_buf, + sizeof(local_buffer->raw_buffer)); + spin_unlock(&rtas_data_buf_lock); + } while (rtas_busy_delay(rc)); + + if (!rc) { + /* Force end of string */ + len = min((int) be16_to_cpu(local_buffer->len), + (int) sizeof(local_buffer->name)-1); + local_buffer->name[len] = '\0'; + + seq_printf(m, "partition_name=%s\n", local_buffer->name); + } else + rc = -ENODATA; + + kfree(local_buffer); + return rc; +} + +/* + * Read the LPAR name from the Device Tree. + * + * The value read in the DT is not updated if the end-user is touching the LPAR + * name on the hypervisor side. + */ +static int read_dt_lpar_name(struct seq_file *m) +{ + const char *name; + + if (of_property_read_string(of_root, "ibm,partition-name", &name)) + return -ENOENT; + + seq_printf(m, "partition_name=%s\n", name); + return 0; +} + +static void read_lpar_name(struct seq_file *m) +{ + if (read_rtas_lpar_name(m) && read_dt_lpar_name(m)) + pr_err_once("Error can't get the LPAR name"); +} + #define SPLPAR_CHARACTERISTICS_TOKEN 20 #define SPLPAR_MAXLENGTH 1026*(sizeof(char)) @@ -496,6 +582,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) if (firmware_has_feature(FW_FEATURE_SPLPAR)) { /* this call handles the ibm,get-system-parameter contents */ + read_lpar_name(m); parse_system_parameter_string(m); parse_ppp_data(m); parse_mpp_data(m); From 5ebb74749202a25da4b3cc2eb15470225a05527c Mon Sep 17 00:00:00 2001 From: Maxim Kiselev <bigunclemax@gmail.com> Date: Fri, 21 Jan 2022 12:14:47 +0300 Subject: [PATCH 014/179] powerpc: dts: t1040rdb: fix ports names for Seville Ethernet switch On board rev A, the network interface labels for the switch ports written on the front panel are different than on rev B and later. This patch fixes network interface names for the switch ports according to labels that are written on the front panel of the board rev B. They start from ETH3 and end at ETH10. This patch also introduces a separate device tree for rev A. The main device tree is supposed to cover rev B and later. Fixes: e69eb0824d8c ("powerpc: dts: t1040rdb: add ports for Seville Ethernet switch") Signed-off-by: Maxim Kiselev <bigunclemax@gmail.com> Reviewed-by: Maxim Kochetkov <fido_max@inbox.ru> Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220121091447.3412907-1-bigunclemax@gmail.com --- arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts | 30 ++++++++++++++++++++ arch/powerpc/boot/dts/fsl/t1040rdb.dts | 8 +++--- 2 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts new file mode 100644 index 000000000000..73f8c998c64d --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * T1040RDB-REV-A Device Tree Source + * + * Copyright 2014 - 2015 Freescale Semiconductor Inc. + * + */ + +#include "t1040rdb.dts" + +/ { + model = "fsl,T1040RDB-REV-A"; + compatible = "fsl,T1040RDB-REV-A"; +}; + +&seville_port0 { + label = "ETH5"; +}; + +&seville_port2 { + label = "ETH7"; +}; + +&seville_port4 { + label = "ETH9"; +}; + +&seville_port6 { + label = "ETH11"; +}; diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts index af0c8a6f5613..b6733e7e6580 100644 --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts @@ -119,7 +119,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_0>; phy-mode = "qsgmii"; - label = "ETH5"; + label = "ETH3"; status = "okay"; }; @@ -135,7 +135,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_2>; phy-mode = "qsgmii"; - label = "ETH7"; + label = "ETH5"; status = "okay"; }; @@ -151,7 +151,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_4>; phy-mode = "qsgmii"; - label = "ETH9"; + label = "ETH7"; status = "okay"; }; @@ -167,7 +167,7 @@ managed = "in-band-status"; phy-handle = <&phy_qsgmii_6>; phy-mode = "qsgmii"; - label = "ETH11"; + label = "ETH9"; status = "okay"; }; From 2e7f1e2b30b5b8aa5de6547407c68670fd227ad8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Thu, 20 Jan 2022 12:33:20 +1100 Subject: [PATCH 015/179] powerpc/64: Move paca allocation later in boot Mahesh & Sourabh identified two problems[1][2] with ppc64_bolted_size() and paca allocation. The first is that on a Radix capable machine but with "disable_radix" on the command line, there is a window during early boot where early_radix_enabled() is true, even though it will later become false. early_init_devtree: <- early_radix_enabled() = false early_init_dt_scan_cpus: <- early_radix_enabled() = false ... check_cpu_pa_features: <- early_radix_enabled() = false ... ^ <- early_radix_enabled() = TRUE allocate_paca: | <- early_radix_enabled() = TRUE ... | ppc64_bolted_size: | <- early_radix_enabled() = TRUE if (early_radix_enabled())| <- early_radix_enabled() = TRUE return ULONG_MAX; | ... | ... | <- early_radix_enabled() = TRUE ... | <- early_radix_enabled() = TRUE mmu_early_init_devtree() V ... <- early_radix_enabled() = false This causes ppc64_bolted_size() to return ULONG_MAX for the boot CPU's paca allocation, even though later it will return a different value. This is not currently a bug because the paca allocation is also limited by the RMA size, but that is very fragile. The second issue is that when using the Hash MMU, when we call ppc64_bolted_size() for the boot CPU's paca allocation, we have not yet detected whether 1T segments are available. That causes ppc64_bolted_size() to return 256MB, even if the machine can actually support up to 1T. This is usually OK, we generally have space below 256MB for one paca, but for a kdump kernel placed above 256MB it causes the boot to fail. At boot we cannot discover all the features of the machine instantaneously, so there will always be some periods where we have incomplete knowledge of the system. However both the above problems stem from the fact that we allocate the boot CPU's paca (and paca pointers array) before we decide which MMU we are using, or discover its exact features. Moving the paca allocation slightly later still can solve both the issues described above, and means for a normal boot we don't do any permanent allocations until after we've discovered the MMU. Note that although we move the boot CPU's paca allocation later, we still have a temporary paca (boot_paca) accessible via r13, so code that does read only access to paca fields is safe. The only risk is that some code writes to the boot_paca, and that write will then be lost when we switch away from the boot_paca later in early_setup(). The additional code that runs before the paca allocation is primarily mmu_early_init_devtree(), which is scanning the device tree and populating globals and cur_cpu_spec with MMU related flags. I do not see any additional code that writes to paca fields. [1]: https://lore.kernel.org/r/20211018084434.217772-2-sourabhjain@linux.ibm.com [2]: https://lore.kernel.org/r/20211018084434.217772-3-sourabhjain@linux.ibm.com Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220124130544.408675-1-mpe@ellerman.id.au --- arch/powerpc/kernel/prom.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 3d30d40a0e9c..86c4f009563d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -352,6 +352,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, be32_to_cpu(intserv[found_thread])); boot_cpuid = found; + // Pass the boot CPU's hard CPU id back to our caller + *((u32 *)data) = be32_to_cpu(intserv[found_thread]); + /* * PAPR defines "logical" PVR values for cpus that * meet various levels of the architecture: @@ -388,9 +391,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; else if (!dt_cpu_ftrs_in_use()) cur_cpu_spec->cpu_features |= CPU_FTR_SMT; - allocate_paca(boot_cpuid); #endif - set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); return 0; } @@ -714,6 +715,7 @@ static inline void save_fscr_to_task(void) {} void __init early_init_devtree(void *params) { + u32 boot_cpu_hwid; phys_addr_t limit; DBG(" -> early_init_devtree(%px)\n", params); @@ -790,8 +792,6 @@ void __init early_init_devtree(void *params) * FIXME .. and the initrd too? */ move_device_tree(); - allocate_paca_ptrs(); - DBG("Scanning CPUs ...\n"); dt_cpu_ftrs_scan(); @@ -799,7 +799,7 @@ void __init early_init_devtree(void *params) /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ - of_scan_flat_dt(early_init_dt_scan_cpus, NULL); + of_scan_flat_dt(early_init_dt_scan_cpus, &boot_cpu_hwid); if (boot_cpuid < 0) { printk("Failed to identify boot CPU !\n"); BUG(); @@ -816,6 +816,11 @@ void __init early_init_devtree(void *params) mmu_early_init_devtree(); + // NB. paca is not installed until later in early_setup() + allocate_paca_ptrs(); + allocate_paca(boot_cpuid); + set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); + #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); From ccafe7c20b7de330d9091a114c9985305759f1ee Mon Sep 17 00:00:00 2001 From: Corentin Labbe <clabbe@baylibre.com> Date: Tue, 25 Jan 2022 13:54:21 +0000 Subject: [PATCH 016/179] macintosh: macio_asic: remove useless cast for driver.name pci_driver name is const char pointer, so the cast it not necessary. Signed-off-by: Corentin Labbe <clabbe@baylibre.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220125135421.4081740-1-clabbe@baylibre.com --- drivers/macintosh/macio_asic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index c1fdf2896021..1943a007e2d5 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -756,7 +756,7 @@ MODULE_DEVICE_TABLE (pci, pci_ids); /* pci driver glue; this is a "new style" PCI driver module */ static struct pci_driver macio_pci_driver = { - .name = (char *) "macio", + .name = "macio", .id_table = pci_ids, .probe = macio_pci_probe, From 961f649fb3ad9a9e384c695a050d776d970ddabd Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Sun, 30 Jan 2022 18:39:18 +0000 Subject: [PATCH 017/179] powerpc/ptdump: Fix sparse warning in hashpagetable.c As reported by sparse: arch/powerpc/mm/ptdump/hashpagetable.c:264:29: warning: restricted __be64 degrades to integer arch/powerpc/mm/ptdump/hashpagetable.c:265:49: warning: restricted __be64 degrades to integer arch/powerpc/mm/ptdump/hashpagetable.c:267:36: warning: incorrect type in assignment (different base types) arch/powerpc/mm/ptdump/hashpagetable.c:267:36: expected unsigned long long [usertype] arch/powerpc/mm/ptdump/hashpagetable.c:267:36: got restricted __be64 [usertype] v arch/powerpc/mm/ptdump/hashpagetable.c:268:36: warning: incorrect type in assignment (different base types) arch/powerpc/mm/ptdump/hashpagetable.c:268:36: expected unsigned long long [usertype] arch/powerpc/mm/ptdump/hashpagetable.c:268:36: got restricted __be64 [usertype] r The values returned by plpar_pte_read_4() are CPU endian, not __be64, so assigning them to struct hash_pte confuses sparse. As a minimal fix open code a struct to hold the values with CPU endian types. Reported-by: kernel test robot <lkp@intel.com> Reported-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220202053039.691917-1-mpe@ellerman.id.au --- arch/powerpc/mm/ptdump/hashpagetable.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index c7f824d294b2..9a601587836b 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -238,7 +238,10 @@ static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64 static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) { - struct hash_pte ptes[4]; + struct { + unsigned long v; + unsigned long r; + } ptes[4]; unsigned long vsid, vpn, hash, hpte_group, want_v; int i, j, ssize = mmu_kernel_ssize; long lpar_rc = 0; From 69ab6ac380a00244575de02c406dcb9491bf3368 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas <farosas@linux.ibm.com> Date: Tue, 25 Jan 2022 12:57:32 -0300 Subject: [PATCH 018/179] KVM: PPC: Book3S HV: Check return value of kvmppc_radix_init The return of the function is being shadowed by the call to kvmppc_uvmem_init. Fixes: ca9f4942670c ("KVM: PPC: Book3S HV: Support for running secure guests") Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220125155735.1018683-2-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c7e44d75f8aa..98ec92cf837b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -6130,8 +6130,11 @@ static int kvmppc_book3s_init_hv(void) if (r) return r; - if (kvmppc_radix_possible()) + if (kvmppc_radix_possible()) { r = kvmppc_radix_init(); + if (r) + return r; + } r = kvmppc_uvmem_init(); if (r < 0) From c5d0d77b45265905bba2ce6e63c9a02bbd11c43c Mon Sep 17 00:00:00 2001 From: Fabiano Rosas <farosas@linux.ibm.com> Date: Tue, 25 Jan 2022 12:57:33 -0300 Subject: [PATCH 019/179] KVM: PPC: Book3S HV: Delay setting of kvm ops Delay the setting of kvm_hv_ops until after all init code has completed. This avoids leaving the ops still accessible if the init fails. Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220125155735.1018683-3-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 98ec92cf837b..ec56970175e2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -6119,9 +6119,6 @@ static int kvmppc_book3s_init_hv(void) } #endif - kvm_ops_hv.owner = THIS_MODULE; - kvmppc_hv_ops = &kvm_ops_hv; - init_default_hcalls(); init_vcore_lists(); @@ -6137,10 +6134,15 @@ static int kvmppc_book3s_init_hv(void) } r = kvmppc_uvmem_init(); - if (r < 0) + if (r < 0) { pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r); + return r; + } - return r; + kvm_ops_hv.owner = THIS_MODULE; + kvmppc_hv_ops = &kvm_ops_hv; + + return 0; } static void kvmppc_book3s_exit_hv(void) From 175be7e5800e2782a7e38ee9e1b64633494c4b44 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas <farosas@linux.ibm.com> Date: Tue, 25 Jan 2022 12:57:34 -0300 Subject: [PATCH 020/179] KVM: PPC: Book3S HV: Free allocated memory if module init fails The module's exit function is not called when the init fails, we need to do cleanup before returning. Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220125155735.1018683-4-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ec56970175e2..c886557638a1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -6096,7 +6096,7 @@ static int kvmppc_book3s_init_hv(void) if (!cpu_has_feature(CPU_FTR_ARCH_300)) { r = kvm_init_subcore_bitmap(); if (r) - return r; + goto err; } /* @@ -6112,7 +6112,8 @@ static int kvmppc_book3s_init_hv(void) np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); if (!np) { pr_err("KVM-HV: Cannot determine method for accessing XICS\n"); - return -ENODEV; + r = -ENODEV; + goto err; } /* presence of intc confirmed - node can be dropped again */ of_node_put(np); @@ -6125,12 +6126,12 @@ static int kvmppc_book3s_init_hv(void) r = kvmppc_mmu_hv_init(); if (r) - return r; + goto err; if (kvmppc_radix_possible()) { r = kvmppc_radix_init(); if (r) - return r; + goto err; } r = kvmppc_uvmem_init(); @@ -6143,6 +6144,12 @@ static int kvmppc_book3s_init_hv(void) kvmppc_hv_ops = &kvm_ops_hv; return 0; + +err: + kvmhv_nested_exit(); + kvmppc_radix_exit(); + + return r; } static void kvmppc_book3s_exit_hv(void) From 4feb74aa64b35b21a4937f96d7a940adee286e5b Mon Sep 17 00:00:00 2001 From: Fabiano Rosas <farosas@linux.ibm.com> Date: Tue, 25 Jan 2022 12:57:35 -0300 Subject: [PATCH 021/179] KVM: PPC: Decrement module refcount if init_vm fails We increment the reference count for KVM-HV/PR before the call to kvmppc_core_init_vm. If that function fails we need to decrement the refcount. Also remove the check on kvm_ops->owner because try_module_get can handle a NULL module. Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220125155735.1018683-5-farosas@linux.ibm.com --- arch/powerpc/kvm/powerpc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1d06b68739b0..9772b176e406 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -453,6 +453,8 @@ int kvm_arch_check_processor_compat(void *opaque) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { struct kvmppc_ops *kvm_ops = NULL; + int r; + /* * if we have both HV and PR enabled, default is HV */ @@ -474,11 +476,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) } else goto err_out; - if (kvm_ops->owner && !try_module_get(kvm_ops->owner)) + if (!try_module_get(kvm_ops->owner)) return -ENOENT; kvm->arch.kvm_ops = kvm_ops; - return kvmppc_core_init_vm(kvm); + r = kvmppc_core_init_vm(kvm); + if (r) + module_put(kvm_ops->owner); + return r; err_out: return -EINVAL; } From b53c86105919d4136591e3bee198a4829c0f5062 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas <farosas@linux.ibm.com> Date: Wed, 20 Jan 2021 15:18:47 -0300 Subject: [PATCH 022/179] powerpc: Fix debug print in smp_setup_cpu_maps When figuring out the number of threads, the debug message prints "1 thread" for the first iteration of the loop, instead of the actual number of threads calculated from the length of the "ibm,ppc-interrupt-server#s" property. * /cpus/PowerPC,POWER8@20... ibm,ppc-interrupt-server#s -> 1 threads <--- WRONG thread 0 -> cpu 0 (hard id 32) thread 1 -> cpu 1 (hard id 33) thread 2 -> cpu 2 (hard id 34) thread 3 -> cpu 3 (hard id 35) thread 4 -> cpu 4 (hard id 36) thread 5 -> cpu 5 (hard id 37) thread 6 -> cpu 6 (hard id 38) thread 7 -> cpu 7 (hard id 39) * /cpus/PowerPC,POWER8@28... ibm,ppc-interrupt-server#s -> 8 threads thread 0 -> cpu 8 (hard id 40) thread 1 -> cpu 9 (hard id 41) thread 2 -> cpu 10 (hard id 42) thread 3 -> cpu 11 (hard id 43) thread 4 -> cpu 12 (hard id 44) thread 5 -> cpu 13 (hard id 45) thread 6 -> cpu 14 (hard id 46) thread 7 -> cpu 15 (hard id 47) (...) Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210120181847.952106-1-farosas@linux.ibm.com --- arch/powerpc/kernel/setup-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index f8da937df918..518ae5aa9410 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -456,8 +456,8 @@ void __init smp_setup_cpu_maps(void) intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); if (intserv) { - DBG(" ibm,ppc-interrupt-server#s -> %d threads\n", - nthreads); + DBG(" ibm,ppc-interrupt-server#s -> %lu threads\n", + (len / sizeof(int))); } else { DBG(" no ibm,ppc-interrupt-server#s -> 1 thread\n"); intserv = of_get_property(dn, "reg", &len); From a1c414093370ed50e5b952d96d4ae775c7a18420 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury <unixbhaskar@gmail.com> Date: Sun, 21 Mar 2021 03:09:32 +0530 Subject: [PATCH 023/179] powerpc/epapr: Fix parmeters typo s/parmeters/parameters/ Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com> Acked-by: Randy Dunlap <rdunlap@infradead.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210320213932.22697-1-unixbhaskar@gmail.com --- arch/powerpc/include/asm/epapr_hcalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index c99ba08a408d..cdf3c6df5123 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -65,7 +65,7 @@ * but the gcc inline assembly syntax does not allow us to specify registers * on the clobber list that are also on the input/output list. Therefore, * the lists of clobbered registers depends on the number of register - * parmeters ("+r" and "=r") passed to the hypercall. + * parameters ("+r" and "=r") passed to the hypercall. * * Each assembly block should use one of the HCALL_CLOBBERSx macros. As a * general rule, 'x' is the number of parameters passed to the assembly From 925f76c55784fdc17ab41aecde06b30439ceb73a Mon Sep 17 00:00:00 2001 From: Julia Lawall <Julia.Lawall@inria.fr> Date: Fri, 8 May 2020 09:12:56 +0000 Subject: [PATCH 024/179] powerpc/spufs: adjust list element pointer type Other uses of &gang->aff_list_head, eg in spufs_assert_affinity, indicate that the list elements have type spu_context, not spu as used here. Change the type of tmp accordingly. This has no impact on the execution, because tmp is not used in the body of the loop. Fixes: c5fc8d2a92461 ("[CELL] cell: add placement computation for scheduling of affinity contexts") Signed-off-by: Julia Lawall <Julia.Lawall@inria.fr> Reviewed-by: Jeremy Kerr <jk@ozlabs.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/1588929176-28527-1-git-send-email-Julia.Lawall@inria.fr --- arch/powerpc/platforms/cell/spufs/sched.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 369206489895..d058f6233e66 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -340,8 +340,7 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, static void aff_set_ref_point_location(struct spu_gang *gang) { int mem_aff, gs, lowest_offset; - struct spu_context *ctx; - struct spu *tmp; + struct spu_context *tmp, *ctx; mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; lowest_offset = 0; From be7be1c6c6f8bd348f0d83abe7a8f0e21bdaeac8 Mon Sep 17 00:00:00 2001 From: Mamatha Inamdar <mamatha4@linux.vnet.ibm.com> Date: Thu, 24 Sep 2020 10:44:16 +0530 Subject: [PATCH 025/179] PCI: rpaphp: Add MODULE_DESCRIPTION This patch adds a brief MODULE_DESCRIPTION to rpadlpar_io kernel modules (descriptions taken from Kconfig file). Signed-off-by: Mamatha Inamdar <mamatha4@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20200924051343.16052.9571.stgit@localhost.localdomain --- drivers/pci/hotplug/rpadlpar_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index 0a3c80ba66be..e6991ff67526 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -478,3 +478,4 @@ static void __exit rpadlpar_io_exit(void) module_init(rpadlpar_io_init); module_exit(rpadlpar_io_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RPA Dynamic Logical Partitioning driver for I/O slots"); From d4be60fe66b7380530868ceebe549f8eebccacc5 Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho <wedsonaf@google.com> Date: Wed, 2 Feb 2022 05:51:23 +0000 Subject: [PATCH 026/179] powerpc/module_64: use module_init_section instead of patching names Without this patch, module init sections are disabled by patching their names in arch-specific code when they're loaded (which prevents code in layout_sections from finding init sections). This patch uses the new arch-specific module_init_section instead. This allows modules that have .init_array sections to have the initialisers properly called (on load, before init). Without this patch, the initialisers are not called because .init_array is renamed to _init_array, and thus isn't found by code in find_module_sections(). Signed-off-by: Wedson Almeida Filho <wedsonaf@google.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220202055123.2144842-1-wedsonaf@google.com --- arch/powerpc/kernel/module_64.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 5d77d3f5fbb5..6a45e6ddbe58 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -277,6 +277,12 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs, return NULL; } +bool module_init_section(const char *name) +{ + /* We don't handle .init for the moment: always return false. */ + return false; +} + int module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs, char *secstrings, @@ -286,7 +292,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, /* Find .toc and .stubs sections, symtab and strtab */ for (i = 1; i < hdr->e_shnum; i++) { - char *p; if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) me->arch.stubs_section = i; else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) { @@ -298,10 +303,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); - /* We don't handle .init for the moment: rename to _init */ - while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init"))) - p[0] = '_'; - if (sechdrs[i].sh_type == SHT_SYMTAB) dedotify((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf64_Sym), From 0198322379c25215b2778482bf1221743a76e2b5 Mon Sep 17 00:00:00 2001 From: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Date: Wed, 2 Feb 2022 09:48:37 +0530 Subject: [PATCH 027/179] powerpc/perf: Don't use perf_hw_context for trace IMC PMU Trace IMC (In-Memory collection counters) in powerpc is useful for application level profiling. For trace_imc, presently task context (task_ctx_nr) is set to perf_hw_context. But perf_hw_context should only be used for CPU PMU. See commit 26657848502b ("perf/core: Verify we have a single perf_hw_context PMU"). So for trace_imc, even though it is per thread PMU, it is preferred to use sw_context in order to be able to do application level monitoring. Hence change the task_ctx_nr to use perf_sw_context. Fixes: 012ae244845f ("powerpc/perf: Trace imc PMU functions") Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Reviewed-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com> [mpe: Update subject & incorporate notes into change log, reflow comment] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220202041837.65968-1-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/imc-pmu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index e106909ff9c3..e7583fbcc8fa 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1457,7 +1457,11 @@ static int trace_imc_event_init(struct perf_event *event) event->hw.idx = -1; - event->pmu->task_ctx_nr = perf_hw_context; + /* + * There can only be a single PMU for perf_hw_context events which is assigned to + * core PMU. Hence use "perf_sw_context" for trace_imc. + */ + event->pmu->task_ctx_nr = perf_sw_context; event->destroy = reset_global_refc; return 0; } From e414e2938ee26e734f19e92a60cd090ebaff37e6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Date: Tue, 1 Feb 2022 13:31:16 +0100 Subject: [PATCH 028/179] powerpc/xive: Add some error handling code to 'xive_spapr_init()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'xive_irq_bitmap_add()' can return -ENOMEM. In this case, we should free the memory already allocated and return 'false' to the caller. Also add an error path which undoes the 'tima = ioremap(...)' Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Reviewed-by: Cédric Le Goater <clg@kaod.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/564998101804886b151235c8a9f93020923bfd2c.1643718324.git.christophe.jaillet@wanadoo.fr --- arch/powerpc/sysdev/xive/spapr.c | 36 +++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 928f95004501..29456c255f9f 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -67,6 +67,17 @@ static int __init xive_irq_bitmap_add(int base, int count) return 0; } +static void xive_irq_bitmap_remove_all(void) +{ + struct xive_irq_bitmap *xibm, *tmp; + + list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) { + list_del(&xibm->list); + kfree(xibm->bitmap); + kfree(xibm); + } +} + static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm) { int irq; @@ -803,7 +814,7 @@ bool __init xive_spapr_init(void) u32 val; u32 len; const __be32 *reg; - int i; + int i, err; if (xive_spapr_disabled()) return false; @@ -828,23 +839,26 @@ bool __init xive_spapr_init(void) } if (!xive_get_max_prio(&max_prio)) - return false; + goto err_unmap; /* Feed the IRQ number allocator with the ranges given in the DT */ reg = of_get_property(np, "ibm,xive-lisn-ranges", &len); if (!reg) { pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n"); - return false; + goto err_unmap; } if (len % (2 * sizeof(u32)) != 0) { pr_err("invalid 'ibm,xive-lisn-ranges' property\n"); - return false; + goto err_unmap; } - for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) - xive_irq_bitmap_add(be32_to_cpu(reg[0]), - be32_to_cpu(reg[1])); + for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) { + err = xive_irq_bitmap_add(be32_to_cpu(reg[0]), + be32_to_cpu(reg[1])); + if (err < 0) + goto err_mem_free; + } /* Iterate the EQ sizes and pick one */ of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) { @@ -855,10 +869,16 @@ bool __init xive_spapr_init(void) /* Initialize XIVE core with our backend */ if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio)) - return false; + goto err_mem_free; pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); return true; + +err_mem_free: + xive_irq_bitmap_remove_all(); +err_unmap: + iounmap(tima); + return false; } machine_arch_initcall(pseries, xive_core_debug_init); From dd75080aa8409ce10d50fb58981c6b59bf8707d3 Mon Sep 17 00:00:00 2001 From: Chen Jingwen <chenjingwen6@huawei.com> Date: Wed, 29 Dec 2021 11:52:26 +0800 Subject: [PATCH 029/179] powerpc/kasan: Fix early region not updated correctly The shadow's page table is not updated when PTE_RPN_SHIFT is 24 and PAGE_SHIFT is 12. It not only causes false positives but also false negative as shown the following text. Fix it by bringing the logic of kasan_early_shadow_page_entry here. 1. False Positive: ================================================================== BUG: KASAN: vmalloc-out-of-bounds in pcpu_alloc+0x508/0xa50 Write of size 16 at addr f57f3be0 by task swapper/0/1 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.15.0-12267-gdebe436e77c7 #1 Call Trace: [c80d1c20] [c07fe7b8] dump_stack_lvl+0x4c/0x6c (unreliable) [c80d1c40] [c02ff668] print_address_description.constprop.0+0x88/0x300 [c80d1c70] [c02ff45c] kasan_report+0x1ec/0x200 [c80d1cb0] [c0300b20] kasan_check_range+0x160/0x2f0 [c80d1cc0] [c03018a4] memset+0x34/0x90 [c80d1ce0] [c0280108] pcpu_alloc+0x508/0xa50 [c80d1d40] [c02fd7bc] __kmem_cache_create+0xfc/0x570 [c80d1d70] [c0283d64] kmem_cache_create_usercopy+0x274/0x3e0 [c80d1db0] [c2036580] init_sd+0xc4/0x1d0 [c80d1de0] [c00044a0] do_one_initcall+0xc0/0x33c [c80d1eb0] [c2001624] kernel_init_freeable+0x2c8/0x384 [c80d1ef0] [c0004b14] kernel_init+0x24/0x170 [c80d1f10] [c001b26c] ret_from_kernel_thread+0x5c/0x64 Memory state around the buggy address: f57f3a80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f57f3b00: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 >f57f3b80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ^ f57f3c00: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f57f3c80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ================================================================== 2. False Negative (with KASAN tests): ================================================================== Before fix: ok 45 - kmalloc_double_kzfree # vmalloc_oob: EXPECTATION FAILED at lib/test_kasan.c:1039 KASAN failure expected in "((volatile char *)area)[3100]", but none occurred not ok 46 - vmalloc_oob not ok 1 - kasan ================================================================== After fix: ok 1 - kasan Fixes: cbd18991e24fe ("powerpc/mm: Fix an Oops in kasan_mmu_init()") Cc: stable@vger.kernel.org # 5.4.x Signed-off-by: Chen Jingwen <chenjingwen6@huawei.com> Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20211229035226.59159-1-chenjingwen6@huawei.com --- arch/powerpc/mm/kasan/kasan_init_32.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index cf8770b1a692..f3e4d069e0ba 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -83,13 +83,12 @@ void __init kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte) { unsigned long k_cur; - phys_addr_t pa = __pa(kasan_early_shadow_page); for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_off_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) + if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page))) continue; __set_pte_at(&init_mm, k_cur, ptep, pte, 0); From 9872cbfb4558bf68219c5a8a65fd5c29b593323d Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 31 Jan 2022 07:15:12 +0000 Subject: [PATCH 030/179] powerpc/603: Remove outdated comment Since commit 84de6ab0e904 ("powerpc/603: don't handle PAGE_ACCESSED in TLB miss handlers.") page table is not updated anymore by TLB miss handlers. Remove the comment. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/38b1ffefd2146fa56bf8aa605d476ad9736bbb37.1643613296.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index fa84744d6b24..7e27b44c1d89 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -504,10 +504,6 @@ DataLoadTLBMiss: lwz r0,0(r2) /* get linux-style pte */ andc. r1,r1,r0 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ - /* - * NOTE! We are assuming this is not an SMP system, otherwise - * we would need to update the pte atomically with lwarx/stwcx. - */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ @@ -586,10 +582,6 @@ DataStoreTLBMiss: lwz r0,0(r2) /* get linux-style pte */ andc. r1,r1,r0 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ - /* - * NOTE! We are assuming this is not an SMP system, otherwise - * we would need to update the pte atomically with lwarx/stwcx. - */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ From 4634bf4455fe26f07dabf97c3585c9ccb86353c4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 31 Jan 2022 07:17:57 +0000 Subject: [PATCH 031/179] powerpc/603: Clear C bit when PTE is read only On book3s/32 MMU, PP bits don't offer kernel RO protection, kernel pages are always RW. However, on the 603 a page fault is always generated when the C bit (change bit = dirty bit) is not set. Enforce kernel RO protection by clearing C bit in TLB miss handler when the page doesn't have _PAGE_RW flag. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/bbb13848ff0100a76ee9ea95118058c30ae95f2c.1643613343.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 7e27b44c1d89..d489b965f9a6 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -507,7 +507,9 @@ DataLoadTLBMiss: /* Convert linux-style PTE to low word of PPC-style PTE */ rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r1,r0,32-3,24,24 /* _PAGE_RW -> _PAGE_DIRTY */ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION From 535bda36dbf2d271f59e06fe252c32eff452666d Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 31 Jan 2022 08:16:48 +0000 Subject: [PATCH 032/179] powerpc/nohash: Remove pte_same() arch/powerpc/include/asm/nohash/{32/64}/pgtable.h has #define __HAVE_ARCH_PTE_SAME #define pte_same(A,B) ((pte_val(A) ^ pte_val(B)) == 0) include/linux/pgtable.h has #ifndef __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { return pte_val(pte_a) == pte_val(pte_b); } #endif Remove the powerpc version which is similar to the generic one. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/83c97bd58a3596ef1b0ff28b1e41fd492d005520.1643616989.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 3 --- arch/powerpc/include/asm/nohash/64/pgtable.h | 3 --- 2 files changed, 6 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index d959c2a73fbf..a0525765c7bb 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -338,9 +338,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & _PAGE_ACCESSED; } -#define __HAVE_ARCH_PTE_SAME -#define pte_same(A,B) ((pte_val(A) ^ pte_val(B)) == 0) - /* * Note that on Book E processors, the pmd contains the kernel virtual * (lowmem) address of the pte page. The physical address is less useful diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 2816d158280a..a441056b3eba 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -281,9 +281,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, flush_tlb_page(vma, address); } -#define __HAVE_ARCH_PTE_SAME -#define pte_same(A,B) ((pte_val(A) ^ pte_val(B)) == 0) - #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ From 4291d085b0b07a78403e845c187428b038c901cd Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Sun, 30 Jan 2022 10:29:34 +0000 Subject: [PATCH 033/179] powerpc/32s: Make pte_update() non atomic on 603 core On 603 core, TLB miss handler don't do any change to the page tables so pte_update() doesn't need to be atomic. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/cc89d3c11fc9c742d0df3454a657a3a00be24046.1643538554.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 37 ++++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index f8b94f78403f..772e00dc4ef1 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -298,28 +298,35 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p unsigned long clr, unsigned long set, int huge) { pte_basic_t old; - unsigned long tmp; - __asm__ __volatile__( + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) { + unsigned long tmp; + + asm volatile( #ifndef CONFIG_PTE_64BIT -"1: lwarx %0, 0, %3\n" -" andc %1, %0, %4\n" + "1: lwarx %0, 0, %3\n" + " andc %1, %0, %4\n" #else -"1: lwarx %L0, 0, %3\n" -" lwz %0, -4(%3)\n" -" andc %1, %L0, %4\n" + "1: lwarx %L0, 0, %3\n" + " lwz %0, -4(%3)\n" + " andc %1, %L0, %4\n" #endif -" or %1, %1, %5\n" -" stwcx. %1, 0, %3\n" -" bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p) + " or %1, %1, %5\n" + " stwcx. %1, 0, %3\n" + " bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*p) #ifndef CONFIG_PTE_64BIT - : "r" (p), + : "r" (p), #else - : "b" ((unsigned long)(p) + 4), + : "b" ((unsigned long)(p) + 4), #endif - "r" (clr), "r" (set), "m" (*p) - : "cc" ); + "r" (clr), "r" (set), "m" (*p) + : "cc" ); + } else { + old = pte_val(*p); + + *p = __pte((old & ~(pte_basic_t)clr) | set); + } return old; } From 7c5ed82b800d8615cdda00729e7b62e5899f0b13 Mon Sep 17 00:00:00 2001 From: Sourabh Jain <sourabhjain@linux.ibm.com> Date: Fri, 4 Feb 2022 14:26:01 +0530 Subject: [PATCH 034/179] powerpc: Set crashkernel offset to mid of RMA region On large config LPARs (having 192 and more cores), Linux fails to boot due to insufficient memory in the first memblock. It is due to the memory reservation for the crash kernel which starts at 128MB offset of the first memblock. This memory reservation for the crash kernel doesn't leave enough space in the first memblock to accommodate other essential system resources. The crash kernel start address was set to 128MB offset by default to ensure that the crash kernel get some memory below the RMA region which is used to be of size 256MB. But given that the RMA region size can be 512MB or more, setting the crash kernel offset to mid of RMA size will leave enough space for the kernel to allocate memory for other system resources. Since the above crash kernel offset change is only applicable to the LPAR platform, the LPAR feature detection is pushed before the crash kernel reservation. The rest of LPAR specific initialization will still be done during pseries_probe_fw_features as usual. This patch is dependent on changes to paca allocation for boot CPU. It expect boot CPU to discover 1T segment support which is introduced by the patch posted here: https://lists.ozlabs.org/pipermail/linuxppc-dev/2022-January/239175.html Reported-by: Abdul haleem <abdhalee@linux.vnet.ibm.com> Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220204085601.107257-1-sourabhjain@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 6 ++++++ arch/powerpc/kexec/core.c | 15 +++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 733e6ef36758..1f42aabbbab3 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1313,6 +1313,12 @@ int __init early_init_dt_scan_rtas(unsigned long node, entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); sizep = of_get_flat_dt_prop(node, "rtas-size", NULL); +#ifdef CONFIG_PPC64 + /* need this feature to decide the crashkernel offset */ + if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL)) + powerpc_firmware_features |= FW_FEATURE_LPAR; +#endif + if (basep && entryp && sizep) { rtas.base = *basep; rtas.entry = *entryp; diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 8b68d9f91a03..abf5897ae88c 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -134,11 +134,18 @@ void __init reserve_crashkernel(void) if (!crashk_res.start) { #ifdef CONFIG_PPC64 /* - * On 64bit we split the RMO in half but cap it at half of - * a small SLB (128MB) since the crash kernel needs to place - * itself and some stacks to be in the first segment. + * On the LPAR platform place the crash kernel to mid of + * RMA size (512MB or more) to ensure the crash kernel + * gets enough space to place itself and some stack to be + * in the first segment. At the same time normal kernel + * also get enough space to allocate memory for essential + * system resource in the first segment. Keep the crash + * kernel starts at 128MB offset on other platforms. */ - crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); + if (firmware_has_feature(FW_FEATURE_LPAR)) + crashk_res.start = ppc64_rma_size / 2; + else + crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); #else crashk_res.start = KDUMP_KERNELBASE; #endif From d6a6c725a20467f52a41270bdaad9565c66f3b7a Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 3 Sep 2021 11:18:38 +0000 Subject: [PATCH 035/179] powerpc/machdep: Remove CONFIG_PPC_HAS_FEATURE_CALLS Last user was removed by commit 7bbd827750e6 ("[PATCH] ppc64: very basic desktop g5 sound support"). Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/803779fffb4ee0801746b2173d37cea3b273f821.1630667612.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/machdep.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e821037f74f0..75687e1f994a 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -10,11 +10,6 @@ #include <asm/setup.h> -/* We export this macro for external modules like Alsa to know if - * ppc_md.feature_call is implemented or not - */ -#define CONFIG_PPC_HAS_FEATURE_CALLS - struct pt_regs; struct pci_bus; struct device_node; From e6d03ac156db84422519aa8628efc210d24bf889 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 3 Sep 2021 11:18:40 +0000 Subject: [PATCH 036/179] powerpc/machdep: Move sys_ctrler_t definition into pmac_feature.h sys_ctrler_t definitions are tied to pmac. Move it into pmac_feature.h Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> [mpe: Move to pmac_feature.h to fix some build errors] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/7dd5ead4bbca749e2da089ff6fe2b1878d6bf40e.1630667612.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/machdep.h | 15 --------------- arch/powerpc/include/asm/pmac_feature.h | 12 ++++++++++++ arch/powerpc/platforms/powermac/pmac.h | 2 ++ drivers/macintosh/via-cuda.c | 1 + sound/ppc/pmac.h | 1 + 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 75687e1f994a..06ac7ef07c85 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -230,21 +230,6 @@ extern struct machdep_calls *machine_id; machine_id == &mach_##name; \ }) -#ifdef CONFIG_PPC_PMAC -/* - * Power macintoshes have either a CUDA, PMU or SMU controlling - * system reset, power, NVRAM, RTC. - */ -typedef enum sys_ctrler_kind { - SYS_CTRLER_UNKNOWN = 0, - SYS_CTRLER_CUDA = 1, - SYS_CTRLER_PMU = 2, - SYS_CTRLER_SMU = 3, -} sys_ctrler_t; -extern sys_ctrler_t sys_ctrler; - -#endif /* CONFIG_PPC_PMAC */ - static inline void log_error(char *buf, unsigned int err_type, int fatal) { if (ppc_md.log_error) diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h index e08e829261b6..2495866f2e97 100644 --- a/arch/powerpc/include/asm/pmac_feature.h +++ b/arch/powerpc/include/asm/pmac_feature.h @@ -401,5 +401,17 @@ extern u32 __iomem *uninorth_base; */ extern int pmac_get_uninorth_variant(void); +/* + * Power macintoshes have either a CUDA, PMU or SMU controlling + * system reset, power, NVRAM, RTC. + */ +typedef enum sys_ctrler_kind { + SYS_CTRLER_UNKNOWN = 0, + SYS_CTRLER_CUDA = 1, + SYS_CTRLER_PMU = 2, + SYS_CTRLER_SMU = 3, +} sys_ctrler_t; +extern sys_ctrler_t sys_ctrler; + #endif /* __ASM_POWERPC_PMAC_FEATURE_H */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h index 29d2036dcc9d..ba8d4e97095b 100644 --- a/arch/powerpc/platforms/powermac/pmac.h +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -5,6 +5,8 @@ #include <linux/pci.h> #include <linux/irq.h> +#include <asm/pmac_feature.h> + /* * Declaration for the various functions exported by the * pmac_* files. Mostly for use by pmac_setup diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c index cd267392289c..3d0d0b9d471d 100644 --- a/drivers/macintosh/via-cuda.c +++ b/drivers/macintosh/via-cuda.c @@ -21,6 +21,7 @@ #ifdef CONFIG_PPC #include <asm/prom.h> #include <asm/machdep.h> +#include <asm/pmac_feature.h> #else #include <asm/macintosh.h> #include <asm/macints.h> diff --git a/sound/ppc/pmac.h b/sound/ppc/pmac.h index a758caf689d2..b6f454130463 100644 --- a/sound/ppc/pmac.h +++ b/sound/ppc/pmac.h @@ -26,6 +26,7 @@ #include <asm/dbdma.h> #include <asm/prom.h> #include <asm/machdep.h> +#include <asm/pmac_feature.h> /* maximum number of fragments */ #define PMAC_MAX_FRAGS 32 From fae65a9ac8fd2221dbf034019fa18d72b2b0c8e9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 3 Sep 2021 11:18:42 +0000 Subject: [PATCH 037/179] powerpc/mpc86xx_hpcn: Remove obsolete statement Comment says "Delete this in 2.6.27". Do so now. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/a47bb6a69c68156bc2d555152dab5a23733856b7.1630667612.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/86xx/mpc86xx_hpcn.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index b697918b727d..a6b8ffcbf01a 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -95,12 +95,6 @@ static int __init mpc86xx_hpcn_probe(void) if (of_machine_is_compatible("fsl,mpc8641hpcn")) return 1; /* Looks good */ - /* Be nice and don't give silent boot death. Delete this in 2.6.27 */ - if (of_machine_is_compatible("mpc86xx")) { - pr_warn("WARNING: your dts/dtb is old. You must update before the next kernel release.\n"); - return 1; - } - return 0; } From 66ada2907864cafa4578b92926cb8bc0a4bc8c9c Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 3 Sep 2021 11:18:43 +0000 Subject: [PATCH 038/179] powerpc/corenet: Change criteria to set MPIC_ENABLE_COREINT Don't use ppc_md function comparison. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/c8ef82ee5f2713f4c36eb5d2d49b0905c7472801.1630667612.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/85xx/corenet_generic.c | 2 +- arch/powerpc/sysdev/mpic.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 8d6029099848..17ae75d62518 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -37,7 +37,7 @@ void __init corenet_gen_pic_init(void) unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU | MPIC_NO_RESET; - if (ppc_md.get_irq == mpic_get_coreint_irq) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) && !IS_ENABLED(CONFIG_KEXEC_CORE)) flags |= MPIC_ENABLE_COREINT; mpic = mpic_alloc(NULL, 0, flags, 0, 512, " OpenPIC "); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index d5cb48b61bbd..dbcbaa4c0663 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1404,10 +1404,8 @@ struct mpic * __init mpic_alloc(struct device_node *node, * with device trees generated by older versions of QEMU. * fsl_version will be zero if MPIC_FSL is not set. */ - if (fsl_version < 0x400 && (flags & MPIC_ENABLE_COREINT)) { - WARN_ON(ppc_md.get_irq != mpic_get_coreint_irq); + if (fsl_version < 0x400 && (flags & MPIC_ENABLE_COREINT)) ppc_md.get_irq = mpic_get_irq; - } /* Reset */ From 12318163737cd8808d13faa6e2393774191a6182 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 30 Nov 2021 13:04:49 +0100 Subject: [PATCH 039/179] powerpc/32: Remove remaining .stabs annotations STABS debug format has been superseded long time ago by DWARF. Remove the few remaining .stabs annotations from old 32 bits code. Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/68932ec2ba6b868d35006b96e90f0890f3da3c05.1638273868.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 1 - arch/powerpc/kernel/head_book3s_32.S | 3 --- arch/powerpc/lib/checksum_32.S | 3 --- arch/powerpc/lib/copy_32.S | 3 --- 4 files changed, 10 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index f21e6bde17a1..c4b074263bf9 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -208,7 +208,6 @@ GLUE(.,name): n: #define _GLOBAL(n) \ - .stabs __stringify(n:F-1),N_FUN,0,0,n;\ .globl n; \ n: diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index d489b965f9a6..94f88dc8269d 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -50,9 +50,6 @@ mtspr SPRN_DBAT##n##L,RB __HEAD - .stabs "arch/powerpc/kernel/",N_SO,0,0,0f - .stabs "head_book3s_32.S",N_SO,0,0,0f -0: _ENTRY(_stext); /* diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 27d9070617df..4541e8e29467 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -116,9 +116,6 @@ EXPORT_SYMBOL(__csum_partial) EX_TABLE(8 ## n ## 7b, fault); .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "checksum_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index a3bcf4786e4a..3e9c27c46331 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -57,9 +57,6 @@ EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) .text - .stabs "arch/powerpc/lib/",N_SO,0,0,0f - .stabs "copy_32.S",N_SO,0,0,0f -0: CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT From 27e21e8f128a56d3462f0fe2fd3a59c02cc002b1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 30 Nov 2021 13:04:50 +0100 Subject: [PATCH 040/179] powerpc/32: Remove _ENTRY() macro _ENTRY() is now redundant with _GLOBAL(). Remove it. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/62a35f8dde2bb74c8d0d7a5430cce07a5a3a6fb6.1638273868.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 4 ---- arch/powerpc/kernel/head_40x.S | 18 +++++++++--------- arch/powerpc/kernel/head_44x.S | 4 ++-- arch/powerpc/kernel/head_8xx.S | 4 ++-- arch/powerpc/kernel/head_book3s_32.S | 8 ++++---- arch/powerpc/kernel/head_fsl_booke.S | 6 +++--- 6 files changed, 20 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index c4b074263bf9..3c06a33b5da4 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -203,10 +203,6 @@ GLUE(.,name): #else /* 32-bit */ -#define _ENTRY(n) \ - .globl n; \ -n: - #define _GLOBAL(n) \ .globl n; \ n: diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index b6c6d1de5fd5..088f500896c7 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -53,8 +53,8 @@ * This is all going to change RSN when we add bi_recs....... -- Dan */ __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); mr r31,r3 /* save device tree ptr */ @@ -82,19 +82,19 @@ turn_on_mmu: */ . = 0xc0 crit_save: -_ENTRY(crit_r10) +_GLOBAL(crit_r10) .space 4 -_ENTRY(crit_r11) +_GLOBAL(crit_r11) .space 4 -_ENTRY(crit_srr0) +_GLOBAL(crit_srr0) .space 4 -_ENTRY(crit_srr1) +_GLOBAL(crit_srr1) .space 4 -_ENTRY(crit_r1) +_GLOBAL(crit_r1) .space 4 -_ENTRY(crit_dear) +_GLOBAL(crit_dear) .space 4 -_ENTRY(crit_esr) +_GLOBAL(crit_esr) .space 4 /* diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index b73a56466903..f15cb9fdb692 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -52,8 +52,8 @@ * */ __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); /* * Reserve a word at a fixed location to store the address * of abatron_pteptrs diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0d073b9fd52c..0b05f2be66b9 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -53,8 +53,8 @@ #define PAGE_SHIFT_8M 23 __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); /* MPC8xx * This port was done on an MBX board with an 860. Right now I only diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 94f88dc8269d..519b60695167 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -50,13 +50,13 @@ mtspr SPRN_DBAT##n##L,RB __HEAD -_ENTRY(_stext); +_GLOBAL(_stext); /* * _start is defined this way because the XCOFF loader in the OpenFirmware * on the powermac expects the entry point to be a procedure descriptor. */ -_ENTRY(_start); +_GLOBAL(_start); /* * These are here for legacy reasons, the kernel used to * need to look like a coff function entry for the pmac @@ -775,7 +775,7 @@ relocate_kernel: * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. */ -_ENTRY(copy_and_flush) +_GLOBAL(copy_and_flush) addi r5,r5,-4 addi r6,r6,-4 4: li r0,L1_CACHE_BYTES/4 @@ -1073,7 +1073,7 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr -_ENTRY(update_bats) +_GLOBAL(update_bats) lis r4, 1f@h ori r4, r4, 1f@l tophys(r4, r4) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index ac2b4dcf5fd3..f0db4f52bc00 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -54,8 +54,8 @@ * */ __HEAD -_ENTRY(_stext); -_ENTRY(_start); +_GLOBAL(_stext); +_GLOBAL(_start); /* * Reserve a word at a fixed location to store the address * of abatron_pteptrs @@ -154,7 +154,7 @@ _ENTRY(_start); * if needed */ -_ENTRY(__early_start) +_GLOBAL(__early_start) LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr) lwz r20,0(r20) From 2f293651eca3eacaeb56747dede31edace7329d2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 20 Dec 2021 16:38:02 +0000 Subject: [PATCH 041/179] livepatch: Fix build failure on 32 bits processors Trying to build livepatch on powerpc/32 results in: kernel/livepatch/core.c: In function 'klp_resolve_symbols': kernel/livepatch/core.c:221:23: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] 221 | sym = (Elf64_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); | ^ kernel/livepatch/core.c:221:21: error: assignment to 'Elf32_Sym *' {aka 'struct elf32_sym *'} from incompatible pointer type 'Elf64_Sym *' {aka 'struct elf64_sym *'} [-Werror=incompatible-pointer-types] 221 | sym = (Elf64_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); | ^ kernel/livepatch/core.c: In function 'klp_apply_section_relocs': kernel/livepatch/core.c:312:35: error: passing argument 1 of 'klp_resolve_symbols' from incompatible pointer type [-Werror=incompatible-pointer-types] 312 | ret = klp_resolve_symbols(sechdrs, strtab, symndx, sec, sec_objname); | ^~~~~~~ | | | Elf32_Shdr * {aka struct elf32_shdr *} kernel/livepatch/core.c:193:44: note: expected 'Elf64_Shdr *' {aka 'struct elf64_shdr *'} but argument is of type 'Elf32_Shdr *' {aka 'struct elf32_shdr *'} 193 | static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, | ~~~~~~~~~~~~^~~~~~~ Fix it by using the right types instead of forcing 64 bits types. Fixes: 7c8e2bdd5f0d ("livepatch: Apply vmlinux-specific KLP relocations early") Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Acked-by: Petr Mladek <pmladek@suse.com> Acked-by: Joe Lawrence <joe.lawrence@redhat.com> Acked-by: Miroslav Benes <mbenes@suse.cz> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/5288e11b018a762ea3351cc8fb2d4f15093a4457.1640017960.git.christophe.leroy@csgroup.eu --- kernel/livepatch/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 585494ec464f..bc475e62279d 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -190,7 +190,7 @@ static int klp_find_object_symbol(const char *objname, const char *name, return -EINVAL; } -static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, +static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab, unsigned int symndx, Elf_Shdr *relasec, const char *sec_objname) { @@ -218,7 +218,7 @@ static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, relas = (Elf_Rela *) relasec->sh_addr; /* For each rela in this klp relocation section */ for (i = 0; i < relasec->sh_size / sizeof(Elf_Rela); i++) { - sym = (Elf64_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); + sym = (Elf_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); if (sym->st_shndx != SHN_LIVEPATCH) { pr_err("symbol %s is not marked as a livepatch symbol\n", strtab + sym->st_name); From 0c850965d6909d39fd69d6a3602bb62b48cad417 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 20 Dec 2021 16:38:09 +0000 Subject: [PATCH 042/179] powerpc/module_32: Fix livepatching for RO modules Livepatching a loaded module involves applying relocations through apply_relocate_add(), which attempts to write to read-only memory when CONFIG_STRICT_MODULE_RWX=y. R_PPC_ADDR16_LO, R_PPC_ADDR16_HI, R_PPC_ADDR16_HA and R_PPC_REL24 are the types generated by the kpatch-build userspace tool or klp-convert kernel tree observed applying a relocation to a post-init module. Use patch_instruction() to patch those relocations. Commit 8734b41b3efe ("powerpc/module_64: Fix livepatching for RO modules") did similar change in module_64. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Acked-by: Joe Lawrence <joe.lawrence@redhat.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/d5697157cb7dba3927e19aa17c915a83bc550bb2.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/module_32.c | 44 ++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index a491ad481d85..a0432ef46967 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -18,6 +18,7 @@ #include <linux/bug.h> #include <linux/sort.h> #include <asm/setup.h> +#include <asm/code-patching.h> /* Count how many different relocations (different symbol, different addend) */ @@ -174,15 +175,25 @@ static uint32_t do_plt_call(void *location, entry++; } - entry->jump[0] = PPC_RAW_LIS(_R12, PPC_HA(val)); - entry->jump[1] = PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)); - entry->jump[2] = PPC_RAW_MTCTR(_R12); - entry->jump[3] = PPC_RAW_BCTR(); + if (patch_instruction(&entry->jump[0], ppc_inst(PPC_RAW_LIS(_R12, PPC_HA(val))))) + return 0; + if (patch_instruction(&entry->jump[1], ppc_inst(PPC_RAW_ADDI(_R12, _R12, PPC_LO(val))))) + return 0; + if (patch_instruction(&entry->jump[2], ppc_inst(PPC_RAW_MTCTR(_R12)))) + return 0; + if (patch_instruction(&entry->jump[3], ppc_inst(PPC_RAW_BCTR()))) + return 0; pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; } +static int patch_location_16(uint32_t *loc, u16 value) +{ + loc = PTR_ALIGN_DOWN(loc, sizeof(u32)); + return patch_instruction(loc, ppc_inst((*loc & 0xffff0000) | value)); +} + int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, @@ -216,37 +227,42 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, case R_PPC_ADDR16_LO: /* Low half of the symbol */ - *(uint16_t *)location = value; + if (patch_location_16(location, PPC_LO(value))) + return -EFAULT; break; case R_PPC_ADDR16_HI: /* Higher half of the symbol */ - *(uint16_t *)location = (value >> 16); + if (patch_location_16(location, PPC_HI(value))) + return -EFAULT; break; case R_PPC_ADDR16_HA: - /* Sign-adjusted lower 16 bits: PPC ELF ABI says: - (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF. - This is the same, only sane. - */ - *(uint16_t *)location = (value + 0x8000) >> 16; + if (patch_location_16(location, PPC_HA(value))) + return -EFAULT; break; case R_PPC_REL24: if ((int)(value - (uint32_t)location) < -0x02000000 - || (int)(value - (uint32_t)location) >= 0x02000000) + || (int)(value - (uint32_t)location) >= 0x02000000) { value = do_plt_call(location, value, sechdrs, module); + if (!value) + return -EFAULT; + } /* Only replace bits 2 through 26 */ pr_debug("REL24 value = %08X. location = %08X\n", value, (uint32_t)location); pr_debug("Location before: %08X.\n", *(uint32_t *)location); - *(uint32_t *)location - = (*(uint32_t *)location & ~0x03fffffc) + value = (*(uint32_t *)location & ~0x03fffffc) | ((value - (uint32_t)location) & 0x03fffffc); + + if (patch_instruction(location, ppc_inst(value))) + return -EFAULT; + pr_debug("Location after: %08X.\n", *(uint32_t *)location); pr_debug("ie. jump to %08X+%08X = %08X\n", From a4520b25276500f1abcfc55d24f1251b7b08eff6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 20 Dec 2021 16:38:12 +0000 Subject: [PATCH 043/179] powerpc/ftrace: Add support for livepatch to PPC32 PPC64 needs some special logic to properly set up the TOC. See commit 85baa095497f ("powerpc/livepatch: Add live patching support on ppc64le") for details. PPC32 doesn't have TOC so it doesn't need that logic, so adding LIVEPATCH support is straight forward. Add CONFIG_LIVEPATCH_64 and move livepatch stack logic into that item. Livepatch sample modules all work. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/63cb094125b6a6038c65eeac2abaabbabe63addd.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 6 +++++- arch/powerpc/include/asm/livepatch.h | 8 +++++--- arch/powerpc/include/asm/thread_info.h | 2 +- arch/powerpc/kernel/asm-offsets.c | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b779603978e1..1d027ce94ab1 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -9,6 +9,10 @@ config 64BIT bool default y if PPC64 +config LIVEPATCH_64 + def_bool PPC64 + depends on LIVEPATCH + config MMU bool default y @@ -221,7 +225,7 @@ config PPC select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES select HAVE_LD_DEAD_CODE_DATA_ELIMINATION - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS && PPC64 + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_OPTPROBES diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 4fe018cc207b..37af961eb74c 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -23,12 +23,14 @@ static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) static inline unsigned long klp_get_ftrace_location(unsigned long faddr) { /* - * Live patch works only with -mprofile-kernel on PPC. In this case, - * the ftrace location is always within the first 16 bytes. + * Live patch works on PPC32 and only with -mprofile-kernel on PPC64. In + * both cases, the ftrace location is always within the first 16 bytes. */ return ftrace_location_range(faddr, faddr + 16); } +#endif /* CONFIG_LIVEPATCH */ +#ifdef CONFIG_LIVEPATCH_64 static inline void klp_init_thread_info(struct task_struct *p) { /* + 1 to account for STACK_END_MAGIC */ @@ -36,6 +38,6 @@ static inline void klp_init_thread_info(struct task_struct *p) } #else static inline void klp_init_thread_info(struct task_struct *p) { } -#endif /* CONFIG_LIVEPATCH */ +#endif #endif /* _ASM_POWERPC_LIVEPATCH_H */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index d6e649b3c70b..125328d1b980 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -51,7 +51,7 @@ struct thread_info { unsigned int cpu; #endif unsigned long local_flags; /* private flags for thread */ -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 unsigned long *livepatch_sp; #endif #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 7582f3e3a330..eec536aef83a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -94,7 +94,7 @@ int main(void) OFFSET(TASK_CPU, task_struct, thread_info.cpu); #endif -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 OFFSET(TI_livepatch_sp, thread_info, livepatch_sp); #endif From 7875bc9b07cde868784195e215f4deaa0fa928a2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 20 Dec 2021 16:38:16 +0000 Subject: [PATCH 044/179] powerpc/ftrace: Don't save again LR in ftrace_regs_caller() on PPC32 PPC32 mcount() caller already saves LR on stack, no need to save it again. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/eadcfc770b4f1e35535ffb85e28e858a2c31dec4.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_32.S | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index 0a02c0cb12d9..7e2fd729116b 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -53,9 +53,6 @@ _GLOBAL(ftrace_stub) blr _GLOBAL(ftrace_regs_caller) - /* Save the original return address in A's stack frame */ - stw r0,LRSAVE(r1) - /* Create our stack frame + pt_regs */ stwu r1,-INT_FRAME_SIZE(r1) From 7bdb478c1d15cfd3a92db6331cb2d3dd3a8b9436 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 20 Dec 2021 16:38:19 +0000 Subject: [PATCH 045/179] powerpc/ftrace: Simplify PPC32's return_to_handler() return_to_handler() was copied from PPC64. For PPC32 it just needs to save r3 and r4, and doesn't require any nop after the bl. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/aab39b77b34fb2c4ed08ed01c547b6ed13643788.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_32.S | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index 7e2fd729116b..95ffea2bdc29 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -162,22 +162,18 @@ _GLOBAL(ftrace_graph_caller) _GLOBAL(return_to_handler) /* need to save return values */ - stwu r1, -32(r1) - stw r3, 20(r1) - stw r4, 16(r1) - stw r31, 12(r1) - mr r31, r1 + stwu r1, -16(r1) + stw r3, 8(r1) + stw r4, 12(r1) bl ftrace_return_to_handler - nop /* return value has real return address */ mtlr r3 - lwz r3, 20(r1) - lwz r4, 16(r1) - lwz r31,12(r1) - lwz r1, 0(r1) + lwz r3, 8(r1) + lwz r4, 12(r1) + addi r1, r1, 16 /* Jump back to real return address */ blr From d95bf254be5f74c1e4c8f7cb64e2e21b9cc91717 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 20 Dec 2021 16:38:22 +0000 Subject: [PATCH 046/179] powerpc/ftrace: Prepare PPC32's ftrace_caller() for CONFIG_DYNAMIC_FTRACE_WITH_ARGS In order to implement CONFIG_DYNAMIC_FTRACE_WITH_ARGS, change ftrace_caller() stack layout to match struct pt_regs. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/da9734eba504998fb914aca12131c9f6bf6120a8.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ftrace.h | 39 +-------------------------- arch/powerpc/kernel/trace/ftrace_32.S | 29 +++++++++++++++++--- 2 files changed, 26 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index debe8c4f7062..b3f6184f77ea 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -10,44 +10,7 @@ #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR -#ifdef __ASSEMBLY__ - -/* Based off of objdump output from glibc */ - -#define MCOUNT_SAVE_FRAME \ - stwu r1,-48(r1); \ - stw r3, 12(r1); \ - stw r4, 16(r1); \ - stw r5, 20(r1); \ - stw r6, 24(r1); \ - mflr r3; \ - lwz r4, 52(r1); \ - mfcr r5; \ - stw r7, 28(r1); \ - stw r8, 32(r1); \ - stw r9, 36(r1); \ - stw r10,40(r1); \ - stw r3, 44(r1); \ - stw r5, 8(r1) - -#define MCOUNT_RESTORE_FRAME \ - lwz r6, 8(r1); \ - lwz r0, 44(r1); \ - lwz r3, 12(r1); \ - mtctr r0; \ - lwz r4, 16(r1); \ - mtcr r6; \ - lwz r5, 20(r1); \ - lwz r6, 24(r1); \ - lwz r0, 52(r1); \ - lwz r7, 28(r1); \ - lwz r8, 32(r1); \ - mtlr r0; \ - lwz r9, 36(r1); \ - lwz r10,40(r1); \ - addi r1, r1, 48 - -#else /* !__ASSEMBLY__ */ +#ifndef __ASSEMBLY__ extern void _mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index 95ffea2bdc29..c4055b41af5f 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -27,17 +27,38 @@ _GLOBAL(_mcount) EXPORT_SYMBOL(_mcount) _GLOBAL(ftrace_caller) - MCOUNT_SAVE_FRAME - /* r3 ends up with link register */ + stwu r1, -INT_FRAME_SIZE(r1) + + SAVE_GPRS(3, 10, r1) + + addi r8, r1, INT_FRAME_SIZE + stw r8, GPR1(r1) + + mflr r3 + stw r3, _NIP(r1) subi r3, r3, MCOUNT_INSN_SIZE + + stw r0, _LINK(r1) + mr r4, r0 + lis r5,function_trace_op@ha lwz r5,function_trace_op@l(r5) - li r6, 0 + + addi r6, r1, STACK_FRAME_OVERHEAD .globl ftrace_call ftrace_call: bl ftrace_stub nop - MCOUNT_RESTORE_FRAME + + lwz r3, _NIP(r1) + mtctr r3 + + REST_GPRS(3, 10, r1) + + lwz r0, _LINK(r1) + mtlr r0 + + addi r1, r1, INT_FRAME_SIZE ftrace_caller_common: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call From c75388a8ceffbf1bf72c61afe66a72e58aa20c74 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 20 Dec 2021 16:38:25 +0000 Subject: [PATCH 047/179] powerpc/ftrace: Prepare PPC64's ftrace_caller() for CONFIG_DYNAMIC_FTRACE_WITH_ARGS In order to implement CONFIG_DYNAMIC_FTRACE_WITH_ARGS, change ftrace_caller() to handle LIVEPATCH the same way as frace_caller_regs(). Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/850817333cc76593699032e8e9a70d8c36e1af1e.1640017960.git.christophe.leroy@csgroup.eu --- .../powerpc/kernel/trace/ftrace_64_mprofile.S | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index d636fc755f60..f6f787819273 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -172,14 +172,19 @@ _GLOBAL(ftrace_caller) addi r3, r3, function_trace_op@toc@l ld r5, 0(r3) +#ifdef CONFIG_LIVEPATCH_64 + SAVE_GPR(14, r1) + mr r14,r7 /* remember old NIP */ +#endif /* Calculate ip from nip-4 into r3 for call below */ subi r3, r7, MCOUNT_INSN_SIZE /* Put the original return address in r4 as parent_ip */ + std r0, _LINK(r1) mr r4, r0 - /* Set pt_regs to NULL */ - li r6, 0 + /* Load &pt_regs in r6 for call below */ + addi r6, r1 ,STACK_FRAME_OVERHEAD /* ftrace_call(r3, r4, r5, r6) */ .globl ftrace_call @@ -189,6 +194,10 @@ ftrace_call: ld r3, _NIP(r1) mtctr r3 +#ifdef CONFIG_LIVEPATCH_64 + cmpd r14, r3 /* has NIP been altered? */ + REST_GPR(14, r1) +#endif /* Restore gprs */ REST_GPRS(3, 10, r1) @@ -196,13 +205,17 @@ ftrace_call: /* Restore callee's TOC */ ld r2, 24(r1) + /* Restore possibly modified LR */ + ld r0, _LINK(r1) + mtlr r0 + /* Pop our stack frame */ addi r1, r1, SWITCH_FRAME_SIZE - /* Reload original LR */ - ld r0, LRSAVE(r1) - mtlr r0 - +#ifdef CONFIG_LIVEPATCH_64 + /* Based on the cmpd above, if the NIP was altered handle livepatch */ + bne- livepatch_handler +#endif /* Handle function_graph or go back */ b ftrace_caller_common From 40b035efe288f42bbf4483236cde652584ccb64e Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 20 Dec 2021 16:38:28 +0000 Subject: [PATCH 048/179] powerpc/ftrace: Implement CONFIG_DYNAMIC_FTRACE_WITH_ARGS Implement CONFIG_DYNAMIC_FTRACE_WITH_ARGS. It accelerates the call of livepatching. Also note that powerpc being the last one to convert to CONFIG_DYNAMIC_FTRACE_WITH_ARGS, it will now be possible to remove klp_arch_set_pc() on all architectures. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/5831f711a778fcd6eb51eb5898f1faae4378b35b.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/ftrace.h | 17 +++++++++++++++++ arch/powerpc/include/asm/livepatch.h | 4 +--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1d027ce94ab1..2a851d31f120 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -201,6 +201,7 @@ config PPC select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_ARGS if MPROFILE_KERNEL || PPC32 select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index b3f6184f77ea..45c3d6f11daa 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -22,6 +22,23 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) struct dyn_arch_ftrace { struct module *mod; }; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + return &fregs->regs; +} + +static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs, + unsigned long ip) +{ + regs_set_return_ip(&fregs->regs, ip); +} +#endif #endif /* __ASSEMBLY__ */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 37af961eb74c..6f10de6af6e3 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -14,9 +14,7 @@ #ifdef CONFIG_LIVEPATCH static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { - struct pt_regs *regs = ftrace_get_regs(fregs); - - regs_set_return_ip(regs, ip); + ftrace_instruction_pointer_set(fregs, ip); } #define klp_get_ftrace_location klp_get_ftrace_location From 0c81ed5ed43863d313cf253b0ebada6ea2f17676 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 20 Dec 2021 16:38:31 +0000 Subject: [PATCH 049/179] powerpc/ftrace: Refactor ftrace_{en/dis}able_ftrace_graph_caller ftrace_enable_ftrace_graph_caller() and ftrace_disable_ftrace_graph_caller() have common code. They will have even more common code after following patch. Refactor into a single ftrace_modify_ftrace_graph_caller() function. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/f37785a531f1a8f201e1b3da45997a5c77e9d820.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 80b6285769f2..ce673764cb69 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -910,30 +910,27 @@ int __init ftrace_dyn_arch_init(void) extern void ftrace_graph_call(void); extern void ftrace_graph_stub(void); -int ftrace_enable_ftrace_graph_caller(void) +static int ftrace_modify_ftrace_graph_caller(bool enable) { unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); ppc_inst_t old, new; - old = ftrace_call_replace(ip, stub, 0); - new = ftrace_call_replace(ip, addr, 0); + old = ftrace_call_replace(ip, enable ? stub : addr, 0); + new = ftrace_call_replace(ip, enable ? addr : stub, 0); return ftrace_modify_code(ip, old, new); } +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_ftrace_graph_caller(true); +} + int ftrace_disable_ftrace_graph_caller(void) { - unsigned long ip = (unsigned long)(&ftrace_graph_call); - unsigned long addr = (unsigned long)(&ftrace_graph_caller); - unsigned long stub = (unsigned long)(&ftrace_graph_stub); - ppc_inst_t old, new; - - old = ftrace_call_replace(ip, addr, 0); - new = ftrace_call_replace(ip, stub, 0); - - return ftrace_modify_code(ip, old, new); + return ftrace_modify_ftrace_graph_caller(false); } /* From 830213786c498b0c488fedd2abc15a7ce442b42f Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 20 Dec 2021 16:38:35 +0000 Subject: [PATCH 050/179] powerpc/ftrace: directly call of function graph tracer by ftrace caller Modify function graph tracer to be handled directly by the standard ftrace caller. This is made possible as powerpc now supports CONFIG_DYNAMIC_FTRACE_WITH_ARGS. This change simplifies the call of function graph ftrace. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/04d196585ff81bde06a000bd9c633a33a5b21130.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ftrace.h | 6 ++ arch/powerpc/kernel/trace/ftrace.c | 11 ++++ arch/powerpc/kernel/trace/ftrace_32.S | 53 +-------------- .../powerpc/kernel/trace/ftrace_64_mprofile.S | 64 +------------------ 4 files changed, 20 insertions(+), 114 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 45c3d6f11daa..70b457097098 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -38,6 +38,12 @@ static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *f { regs_set_return_ip(&fregs->regs, ip); } + +struct ftrace_ops; + +#define ftrace_graph_func ftrace_graph_func +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index ce673764cb69..74a176e394ef 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -917,6 +917,9 @@ static int ftrace_modify_ftrace_graph_caller(bool enable) unsigned long stub = (unsigned long)(&ftrace_graph_stub); ppc_inst_t old, new; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS)) + return 0; + old = ftrace_call_replace(ip, enable ? stub : addr, 0); new = ftrace_call_replace(ip, enable ? addr : stub, 0); @@ -955,6 +958,14 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, out: return parent; } + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + fregs->regs.link = prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); +} +#endif #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #ifdef PPC64_ELF_ABI_v1 diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index c4055b41af5f..2b425da97a6b 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -59,13 +59,6 @@ ftrace_call: mtlr r0 addi r1, r1, INT_FRAME_SIZE -ftrace_caller_common: -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif /* old link register ends up in ctr reg */ bctr @@ -135,52 +128,10 @@ ftrace_regs_call: /* Pop our stack frame */ addi r1, r1, INT_FRAME_SIZE - - b ftrace_caller_common - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - stwu r1,-48(r1) - stw r3, 12(r1) - stw r4, 16(r1) - stw r5, 20(r1) - stw r6, 24(r1) - stw r7, 28(r1) - stw r8, 32(r1) - stw r9, 36(r1) - stw r10,40(r1) - - addi r5, r1, 48 - mfctr r4 /* ftrace_caller has moved local addr here */ - stw r4, 44(r1) - mflr r3 /* ftrace_caller has restored LR from stack */ - subi r4, r4, MCOUNT_INSN_SIZE - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR in the callers stack frame to this. - */ - stw r3,52(r1) - mtlr r3 - lwz r0,44(r1) - mtctr r0 - - lwz r3, 12(r1) - lwz r4, 16(r1) - lwz r5, 20(r1) - lwz r6, 24(r1) - lwz r7, 28(r1) - lwz r8, 32(r1) - lwz r9, 36(r1) - lwz r10,40(r1) - - addi r1, r1, 48 - + /* old link register ends up in ctr reg */ bctr +#ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(return_to_handler) /* need to save return values */ stwu r1, -16(r1) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index f6f787819273..6071e0122797 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -124,15 +124,6 @@ ftrace_regs_call: /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif - -ftrace_caller_common: -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - bctr /* jump after _mcount site */ _GLOBAL(ftrace_stub) @@ -216,8 +207,7 @@ ftrace_call: /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif - /* Handle function_graph or go back */ - b ftrace_caller_common + bctr /* jump after _mcount site */ #ifdef CONFIG_LIVEPATCH /* @@ -286,55 +276,3 @@ livepatch_handler: /* Return to original caller of live patched function */ blr #endif /* CONFIG_LIVEPATCH */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - stdu r1, -112(r1) - /* with -mprofile-kernel, parameter regs are still alive at _mcount */ - std r10, 104(r1) - std r9, 96(r1) - std r8, 88(r1) - std r7, 80(r1) - std r6, 72(r1) - std r5, 64(r1) - std r4, 56(r1) - std r3, 48(r1) - - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2, PACATOC(r13) /* get kernel TOC in r2 */ - - addi r5, r1, 112 - mfctr r4 /* ftrace_caller has moved local addr here */ - std r4, 40(r1) - mflr r3 /* ftrace_caller has restored LR from stack */ - subi r4, r4, MCOUNT_INSN_SIZE - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR to this. - */ - mtlr r3 - - ld r0, 40(r1) - mtctr r0 - ld r10, 104(r1) - ld r9, 96(r1) - ld r8, 88(r1) - ld r7, 80(r1) - ld r6, 72(r1) - ld r5, 64(r1) - ld r4, 56(r1) - ld r3, 48(r1) - - /* Restore callee's TOC */ - ld r2, 24(r1) - - addi r1, r1, 112 - mflr r0 - std r0, LRSAVE(r1) - bctr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From 41315494beed087011f256b4f1439bb3d8236904 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 20 Dec 2021 16:38:40 +0000 Subject: [PATCH 051/179] powerpc/ftrace: Prepare ftrace_64_mprofile.S for reuse by PPC32 PPC64 mprofile versions and PPC32 are very similar. Modify PPC64 version so that if can be reused for PPC32. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/82a732915dc71ee766e31809350939331944006d.1640017960.git.christophe.leroy@csgroup.eu --- .../powerpc/kernel/trace/ftrace_64_mprofile.S | 73 +++++++++++++------ 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index 6071e0122797..56da60e98327 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -34,13 +34,16 @@ */ _GLOBAL(ftrace_regs_caller) /* Save the original return address in A's stack frame */ - std r0,LRSAVE(r1) +#ifdef CONFIG_MPROFILE_KERNEL + PPC_STL r0,LRSAVE(r1) +#endif /* Create our stack frame + pt_regs */ - stdu r1,-SWITCH_FRAME_SIZE(r1) + PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) +#ifdef CONFIG_PPC64 SAVE_GPRS(2, 11, r1) /* Ok to continue? */ @@ -49,10 +52,13 @@ _GLOBAL(ftrace_regs_caller) beq ftrace_no_trace SAVE_GPRS(12, 31, r1) +#else + stmw r2, GPR2(r1) +#endif /* Save previous stack pointer (r1) */ addi r8, r1, SWITCH_FRAME_SIZE - std r8, GPR1(r1) + PPC_STL r8, GPR1(r1) /* Load special regs for save below */ mfmsr r8 @@ -63,10 +69,11 @@ _GLOBAL(ftrace_regs_caller) /* Get the _mcount() call site out of LR */ mflr r7 /* Save it as pt_regs->nip */ - std r7, _NIP(r1) + PPC_STL r7, _NIP(r1) /* Save the read LR in pt_regs->link */ - std r0, _LINK(r1) + PPC_STL r0, _LINK(r1) +#ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ std r2, 24(r1) ld r2,PACATOC(r13) /* get kernel TOC in r2 */ @@ -74,8 +81,12 @@ _GLOBAL(ftrace_regs_caller) addis r3,r2,function_trace_op@toc@ha addi r3,r3,function_trace_op@toc@l ld r5,0(r3) +#else + lis r3,function_trace_op@ha + lwz r5,function_trace_op@l(r3) +#endif -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 mr r14,r7 /* remember old NIP */ #endif /* Calculate ip from nip-4 into r3 for call below */ @@ -85,10 +96,10 @@ _GLOBAL(ftrace_regs_caller) mr r4, r0 /* Save special regs */ - std r8, _MSR(r1) - std r9, _CTR(r1) - std r10, _XER(r1) - std r11, _CCR(r1) + PPC_STL r8, _MSR(r1) + PPC_STL r9, _CTR(r1) + PPC_STL r10, _XER(r1) + PPC_STL r11, _CCR(r1) /* Load &pt_regs in r6 for call below */ addi r6, r1 ,STACK_FRAME_OVERHEAD @@ -100,27 +111,32 @@ ftrace_regs_call: nop /* Load ctr with the possibly modified NIP */ - ld r3, _NIP(r1) + PPC_LL r3, _NIP(r1) mtctr r3 -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ #endif /* Restore gprs */ - REST_GPR(0, r1) +#ifdef CONFIG_PPC64 REST_GPRS(2, 31, r1) +#else + lmw r2, GPR2(r1) +#endif /* Restore possibly modified LR */ - ld r0, _LINK(r1) + PPC_LL r0, _LINK(r1) mtlr r0 +#ifdef CONFIG_PPC64 /* Restore callee's TOC */ ld r2, 24(r1) +#endif /* Pop our stack frame */ addi r1, r1, SWITCH_FRAME_SIZE -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif @@ -129,6 +145,7 @@ ftrace_regs_call: _GLOBAL(ftrace_stub) blr +#ifdef CONFIG_PPC64 ftrace_no_trace: mflr r3 mtctr r3 @@ -136,25 +153,31 @@ ftrace_no_trace: addi r1, r1, SWITCH_FRAME_SIZE mtlr r0 bctr +#endif _GLOBAL(ftrace_caller) /* Save the original return address in A's stack frame */ - std r0, LRSAVE(r1) +#ifdef CONFIG_MPROFILE_KERNEL + PPC_STL r0, LRSAVE(r1) +#endif /* Create our stack frame + pt_regs */ - stdu r1, -SWITCH_FRAME_SIZE(r1) + PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ SAVE_GPRS(3, 10, r1) +#ifdef CONFIG_PPC64 lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 beq ftrace_no_trace +#endif /* Get the _mcount() call site out of LR */ mflr r7 - std r7, _NIP(r1) + PPC_STL r7, _NIP(r1) +#ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ std r2, 24(r1) ld r2, PACATOC(r13) /* get kernel TOC in r2 */ @@ -162,6 +185,10 @@ _GLOBAL(ftrace_caller) addis r3, r2, function_trace_op@toc@ha addi r3, r3, function_trace_op@toc@l ld r5, 0(r3) +#else + lis r3,function_trace_op@ha + lwz r5,function_trace_op@l(r3) +#endif #ifdef CONFIG_LIVEPATCH_64 SAVE_GPR(14, r1) @@ -171,7 +198,7 @@ _GLOBAL(ftrace_caller) subi r3, r7, MCOUNT_INSN_SIZE /* Put the original return address in r4 as parent_ip */ - std r0, _LINK(r1) + PPC_STL r0, _LINK(r1) mr r4, r0 /* Load &pt_regs in r6 for call below */ @@ -183,7 +210,7 @@ ftrace_call: bl ftrace_stub nop - ld r3, _NIP(r1) + PPC_LL r3, _NIP(r1) mtctr r3 #ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ @@ -193,11 +220,13 @@ ftrace_call: /* Restore gprs */ REST_GPRS(3, 10, r1) +#ifdef CONFIG_PPC64 /* Restore callee's TOC */ ld r2, 24(r1) +#endif /* Restore possibly modified LR */ - ld r0, _LINK(r1) + PPC_LL r0, _LINK(r1) mtlr r0 /* Pop our stack frame */ @@ -209,7 +238,7 @@ ftrace_call: #endif bctr /* jump after _mcount site */ -#ifdef CONFIG_LIVEPATCH +#ifdef CONFIG_LIVEPATCH_64 /* * This function runs in the mcount context, between two functions. As * such it can only clobber registers which are volatile and used in From 4ee83a2cfbc46c13f2a08fe6d48dbcede53cdbf8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 20 Dec 2021 16:38:44 +0000 Subject: [PATCH 052/179] powerpc/ftrace: Remove ftrace_32.S Functions in ftrace_32.S are common with PPC64. Reuse the ones defined for PPC64 with slight modification when required. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> [mpe: Squash in fixup diff from Christophe] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/5e837fc190504c4ef834272e70d60ae33f175d49.1640017960.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/Makefile | 6 +- arch/powerpc/kernel/trace/ftrace_32.S | 152 ------------------ .../trace/{ftrace_64.S => ftrace_low.S} | 14 ++ ...ftrace_64_mprofile.S => ftrace_mprofile.S} | 0 4 files changed, 17 insertions(+), 155 deletions(-) delete mode 100644 arch/powerpc/kernel/trace/ftrace_32.S rename arch/powerpc/kernel/trace/{ftrace_64.S => ftrace_low.S} (85%) rename arch/powerpc/kernel/trace/{ftrace_64_mprofile.S => ftrace_mprofile.S} (100%) diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index 858503775c58..542aa7a8b2b4 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -8,13 +8,13 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) endif -obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64.o +obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o ifdef CONFIG_MPROFILE_KERNEL -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_mprofile.o +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o else obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o endif +obj-$(CONFIG_FUNCTION_TRACER) += ftrace_low.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S deleted file mode 100644 index 2b425da97a6b..000000000000 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ /dev/null @@ -1,152 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Split from entry_32.S - */ - -#include <linux/magic.h> -#include <asm/reg.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/ftrace.h> -#include <asm/export.h> -#include <asm/ptrace.h> - -_GLOBAL(mcount) -_GLOBAL(_mcount) - /* - * It is required that _mcount on PPC32 must preserve the - * link register. But we have r12 to play with. We use r12 - * to push the return address back to the caller of mcount - * into the ctr register, restore the link register and - * then jump back using the ctr register. - */ - mflr r12 - mtctr r12 - mtlr r0 - bctr -EXPORT_SYMBOL(_mcount) - -_GLOBAL(ftrace_caller) - stwu r1, -INT_FRAME_SIZE(r1) - - SAVE_GPRS(3, 10, r1) - - addi r8, r1, INT_FRAME_SIZE - stw r8, GPR1(r1) - - mflr r3 - stw r3, _NIP(r1) - subi r3, r3, MCOUNT_INSN_SIZE - - stw r0, _LINK(r1) - mr r4, r0 - - lis r5,function_trace_op@ha - lwz r5,function_trace_op@l(r5) - - addi r6, r1, STACK_FRAME_OVERHEAD -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop - - lwz r3, _NIP(r1) - mtctr r3 - - REST_GPRS(3, 10, r1) - - lwz r0, _LINK(r1) - mtlr r0 - - addi r1, r1, INT_FRAME_SIZE - /* old link register ends up in ctr reg */ - bctr - - -_GLOBAL(ftrace_stub) - blr - -_GLOBAL(ftrace_regs_caller) - /* Create our stack frame + pt_regs */ - stwu r1,-INT_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - stw r0, GPR0(r1) - stmw r2, GPR2(r1) - - /* Save previous stack pointer (r1) */ - addi r8, r1, INT_FRAME_SIZE - stw r8, GPR1(r1) - - /* Load special regs for save below */ - mfmsr r8 - mfctr r9 - mfxer r10 - mfcr r11 - - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save it as pt_regs->nip */ - stw r7, _NIP(r1) - /* Save the read LR in pt_regs->link */ - stw r0, _LINK(r1) - - lis r3,function_trace_op@ha - lwz r5,function_trace_op@l(r3) - - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - - /* Save special regs */ - stw r8, _MSR(r1) - stw r9, _CTR(r1) - stw r10, _XER(r1) - stw r11, _CCR(r1) - - /* Load &pt_regs in r6 for call below */ - addi r6, r1, STACK_FRAME_OVERHEAD - - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_regs_call -ftrace_regs_call: - bl ftrace_stub - nop - - /* Load ctr with the possibly modified NIP */ - lwz r3, _NIP(r1) - mtctr r3 - - /* Restore gprs */ - lmw r2, GPR2(r1) - - /* Restore possibly modified LR */ - lwz r0, _LINK(r1) - mtlr r0 - - /* Pop our stack frame */ - addi r1, r1, INT_FRAME_SIZE - /* old link register ends up in ctr reg */ - bctr - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(return_to_handler) - /* need to save return values */ - stwu r1, -16(r1) - stw r3, 8(r1) - stw r4, 12(r1) - - bl ftrace_return_to_handler - - /* return value has real return address */ - mtlr r3 - - lwz r3, 8(r1) - lwz r4, 12(r1) - addi r1, r1, 16 - - /* Jump back to real return address */ - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_low.S similarity index 85% rename from arch/powerpc/kernel/trace/ftrace_64.S rename to arch/powerpc/kernel/trace/ftrace_low.S index 25e5b9e47c06..0bddf1fa6636 100644 --- a/arch/powerpc/kernel/trace/ftrace_64.S +++ b/arch/powerpc/kernel/trace/ftrace_low.S @@ -10,6 +10,7 @@ #include <asm/ppc-opcode.h> #include <asm/export.h> +#ifdef CONFIG_PPC64 .pushsection ".tramp.ftrace.text","aw",@progbits; .globl ftrace_tramp_text ftrace_tramp_text: @@ -21,6 +22,7 @@ ftrace_tramp_text: ftrace_tramp_init: .space 64 .popsection +#endif _GLOBAL(mcount) _GLOBAL(_mcount) @@ -33,6 +35,7 @@ EXPORT_SYMBOL(_mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(return_to_handler) /* need to save return values */ +#ifdef CONFIG_PPC64 std r4, -32(r1) std r3, -24(r1) /* save TOC */ @@ -46,6 +49,11 @@ _GLOBAL(return_to_handler) * Switch to our TOC to run inside the core kernel. */ ld r2, PACATOC(r13) +#else + stwu r1, -16(r1) + stw r3, 8(r1) + stw r4, 12(r1) +#endif bl ftrace_return_to_handler nop @@ -53,11 +61,17 @@ _GLOBAL(return_to_handler) /* return value has real return address */ mtlr r3 +#ifdef CONFIG_PPC64 ld r1, 0(r1) ld r4, -32(r1) ld r3, -24(r1) ld r2, -16(r1) ld r31, -8(r1) +#else + lwz r3, 8(r1) + lwz r4, 12(r1) + addi r1, r1, 16 +#endif /* Jump back to real return address */ blr diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S similarity index 100% rename from arch/powerpc/kernel/trace/ftrace_64_mprofile.S rename to arch/powerpc/kernel/trace/ftrace_mprofile.S From a4c182ecf33584b9b2d1aa9dad073014a504c01f Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 24 Dec 2021 11:07:33 +0000 Subject: [PATCH 053/179] powerpc/set_memory: Avoid spinlock recursion in change_page_attr() Commit 1f9ad21c3b38 ("powerpc/mm: Implement set_memory() routines") included a spin_lock() to change_page_attr() in order to safely perform the three step operations. But then commit 9f7853d7609d ("powerpc/mm: Fix set_memory_*() against concurrent accesses") modify it to use pte_update() and do the operation safely against concurrent access. In the meantime, Maxime reported some spinlock recursion. [ 15.351649] BUG: spinlock recursion on CPU#0, kworker/0:2/217 [ 15.357540] lock: init_mm+0x3c/0x420, .magic: dead4ead, .owner: kworker/0:2/217, .owner_cpu: 0 [ 15.366563] CPU: 0 PID: 217 Comm: kworker/0:2 Not tainted 5.15.0+ #523 [ 15.373350] Workqueue: events do_free_init [ 15.377615] Call Trace: [ 15.380232] [e4105ac0] [800946a4] do_raw_spin_lock+0xf8/0x120 (unreliable) [ 15.387340] [e4105ae0] [8001f4ec] change_page_attr+0x40/0x1d4 [ 15.393413] [e4105b10] [801424e0] __apply_to_page_range+0x164/0x310 [ 15.400009] [e4105b60] [80169620] free_pcp_prepare+0x1e4/0x4a0 [ 15.406045] [e4105ba0] [8016c5a0] free_unref_page+0x40/0x2b8 [ 15.411979] [e4105be0] [8018724c] kasan_depopulate_vmalloc_pte+0x6c/0x94 [ 15.418989] [e4105c00] [801424e0] __apply_to_page_range+0x164/0x310 [ 15.425451] [e4105c50] [80187834] kasan_release_vmalloc+0xbc/0x134 [ 15.431898] [e4105c70] [8015f7a8] __purge_vmap_area_lazy+0x4e4/0xdd8 [ 15.438560] [e4105d30] [80160d10] _vm_unmap_aliases.part.0+0x17c/0x24c [ 15.445283] [e4105d60] [801642d0] __vunmap+0x2f0/0x5c8 [ 15.450684] [e4105db0] [800e32d0] do_free_init+0x68/0x94 [ 15.456181] [e4105dd0] [8005d094] process_one_work+0x4bc/0x7b8 [ 15.462283] [e4105e90] [8005d614] worker_thread+0x284/0x6e8 [ 15.468227] [e4105f00] [8006aaec] kthread+0x1f0/0x210 [ 15.473489] [e4105f40] [80017148] ret_from_kernel_thread+0x14/0x1c Remove the read / modify / write sequence to make the operation atomic and remove the spin_lock() in change_page_attr(). To do the operation atomically, we can't use pte modification helpers anymore. Because all platforms have different combination of bits, it is not easy to use those bits directly. But all have the _PAGE_KERNEL_{RO/ROX/RW/RWX} set of flags. All we need it to compare two sets to know which bits are set or cleared. For instance, by comparing _PAGE_KERNEL_ROX and _PAGE_KERNEL_RO you know which bit gets cleared and which bit get set when changing exec permission. Reported-by: Maxime Bizon <mbizon@freebox.fr> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/all/20211212112152.GA27070@sakura/ Link: https://lore.kernel.org/r/43c3c76a1175ae6dc1a3d3b5c3f7ecb48f683eea.1640344012.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/pageattr.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index edea388e9d3f..8812454e70ff 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -15,12 +15,14 @@ #include <asm/pgtable.h> +static pte_basic_t pte_update_delta(pte_t *ptep, unsigned long addr, + unsigned long old, unsigned long new) +{ + return pte_update(&init_mm, addr, ptep, old & ~new, new & ~old, 0); +} + /* - * Updates the attributes of a page in three steps: - * - * 1. take the page_table_lock - * 2. install the new entry with the updated attributes - * 3. flush the TLB + * Updates the attributes of a page atomically. * * This sequence is safe against concurrent updates, and also allows updating the * attributes of a page currently being executed or accessed. @@ -28,41 +30,33 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) { long action = (long)data; - pte_t pte; - spin_lock(&init_mm.page_table_lock); - - pte = ptep_get(ptep); - - /* modify the PTE bits as desired, then apply */ + /* modify the PTE bits as desired */ switch (action) { case SET_MEMORY_RO: - pte = pte_wrprotect(pte); + /* Don't clear DIRTY bit */ + pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_RO); break; case SET_MEMORY_RW: - pte = pte_mkwrite(pte_mkdirty(pte)); + pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_RW); break; case SET_MEMORY_NX: - pte = pte_exprotect(pte); + pte_update_delta(ptep, addr, _PAGE_KERNEL_ROX, _PAGE_KERNEL_RO); break; case SET_MEMORY_X: - pte = pte_mkexec(pte); + pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX); break; default: WARN_ON_ONCE(1); break; } - pte_update(&init_mm, addr, ptep, ~0UL, pte_val(pte), 0); - /* See ptesync comment in radix__set_pte_at() */ if (radix_enabled()) asm volatile("ptesync": : :"memory"); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - spin_unlock(&init_mm.page_table_lock); - return 0; } From f222ab83df92acf72691a2021e1f0d99880dcdf1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 24 Dec 2021 11:07:40 +0000 Subject: [PATCH 054/179] powerpc: Add set_memory_{p/np}() and remove set_memory_attr() set_memory_attr() was implemented by commit 4d1755b6a762 ("powerpc/mm: implement set_memory_attr()") because the set_memory_xx() couldn't be used at that time to modify memory "on the fly" as explained it the commit. But set_memory_attr() uses set_pte_at() which leads to warnings when CONFIG_DEBUG_VM is selected, because set_pte_at() is unexpected for updating existing page table entries. The check could be bypassed by using __set_pte_at() instead, as it was the case before commit c988cfd38e48 ("powerpc/32: use set_memory_attr()") but since commit 9f7853d7609d ("powerpc/mm: Fix set_memory_*() against concurrent accesses") it is now possible to use set_memory_xx() functions to update page table entries "on the fly" because the update is now atomic. For DEBUG_PAGEALLOC we need to clear and set back _PAGE_PRESENT. Add set_memory_np() and set_memory_p() for that. Replace all uses of set_memory_attr() by the relevant set_memory_xx() and remove set_memory_attr(). Fixes: c988cfd38e48 ("powerpc/32: use set_memory_attr()") Cc: stable@vger.kernel.org Reported-by: Maxime Bizon <mbizon@freebox.fr> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Tested-by: Maxime Bizon <mbizon@freebox.fr> Reviewed-by: Russell Currey <ruscur@russell.cc> Depends-on: 9f7853d7609d ("powerpc/mm: Fix set_memory_*() against concurrent accesses") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/cda2b44b55c96f9ac69fa92e68c01084ec9495c5.1640344012.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/set_memory.h | 12 ++++++++- arch/powerpc/mm/pageattr.c | 39 +++++---------------------- arch/powerpc/mm/pgtable_32.c | 24 ++++++++--------- 3 files changed, 28 insertions(+), 47 deletions(-) diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h index b040094f7920..7ebc807aa8cc 100644 --- a/arch/powerpc/include/asm/set_memory.h +++ b/arch/powerpc/include/asm/set_memory.h @@ -6,6 +6,8 @@ #define SET_MEMORY_RW 1 #define SET_MEMORY_NX 2 #define SET_MEMORY_X 3 +#define SET_MEMORY_NP 4 /* Set memory non present */ +#define SET_MEMORY_P 5 /* Set memory present */ int change_memory_attr(unsigned long addr, int numpages, long action); @@ -29,6 +31,14 @@ static inline int set_memory_x(unsigned long addr, int numpages) return change_memory_attr(addr, numpages, SET_MEMORY_X); } -int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot); +static inline int set_memory_np(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_NP); +} + +static inline int set_memory_p(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_P); +} #endif diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index 8812454e70ff..85753e32a4de 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -46,6 +46,12 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) case SET_MEMORY_X: pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX); break; + case SET_MEMORY_NP: + pte_update(&init_mm, addr, ptep, _PAGE_PRESENT, 0, 0); + break; + case SET_MEMORY_P: + pte_update(&init_mm, addr, ptep, 0, _PAGE_PRESENT, 0); + break; default: WARN_ON_ONCE(1); break; @@ -90,36 +96,3 @@ int change_memory_attr(unsigned long addr, int numpages, long action) return apply_to_existing_page_range(&init_mm, start, size, change_page_attr, (void *)action); } - -/* - * Set the attributes of a page: - * - * This function is used by PPC32 at the end of init to set final kernel memory - * protection. It includes changing the maping of the page it is executing from - * and data pages it is using. - */ -static int set_page_attr(pte_t *ptep, unsigned long addr, void *data) -{ - pgprot_t prot = __pgprot((unsigned long)data); - - spin_lock(&init_mm.page_table_lock); - - set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot)); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); - - spin_unlock(&init_mm.page_table_lock); - - return 0; -} - -int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot) -{ - unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); - unsigned long sz = numpages * PAGE_SIZE; - - if (numpages <= 0) - return 0; - - return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr, - (void *)pgprot_val(prot)); -} diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 906e4e4328b2..f71ededdc02a 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -135,10 +135,12 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_sinittext)) + if (v_block_mapped((unsigned long)_sinittext)) { mmu_mark_initmem_nx(); - else - set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL); + } else { + set_memory_nx((unsigned long)_sinittext, numpages); + set_memory_rw((unsigned long)_sinittext, numpages); + } } #ifdef CONFIG_STRICT_KERNEL_RWX @@ -152,18 +154,14 @@ void mark_rodata_ro(void) return; } - numpages = PFN_UP((unsigned long)_etext) - - PFN_DOWN((unsigned long)_stext); - - set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX); /* - * mark .rodata as read only. Use __init_begin rather than __end_rodata - * to cover NOTES and EXCEPTION_TABLE. + * mark .text and .rodata as read only. Use __init_begin rather than + * __end_rodata to cover NOTES and EXCEPTION_TABLE. */ numpages = PFN_UP((unsigned long)__init_begin) - - PFN_DOWN((unsigned long)__start_rodata); + PFN_DOWN((unsigned long)_stext); - set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO); + set_memory_ro((unsigned long)_stext, numpages); // mark_initmem_nx() should have already run by now ptdump_check_wx(); @@ -179,8 +177,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) return; if (enable) - set_memory_attr(addr, numpages, PAGE_KERNEL); + set_memory_p(addr, numpages); else - set_memory_attr(addr, numpages, __pgprot(0)); + set_memory_np(addr, numpages); } #endif /* CONFIG_DEBUG_PAGEALLOC */ From a8936569a07bf27cc9cfc2a39a1e5ea91273b2d4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 10 Jan 2022 12:29:42 +0000 Subject: [PATCH 055/179] powerpc/bpf: Always reallocate BPF_REG_5, BPF_REG_AX and TMP_REG when possible BPF_REG_5, BPF_REG_AX and TMP_REG are mapped on non volatile registers because there are not enough volatile registers, but they don't need to be preserved on function calls. So when some volatile registers become available, those registers can always be reallocated regardless of whether SEEN_FUNC is set or not. Suggested-by: Naveen N. Rao <naveen.n.rao@linux.ibm.com> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/b04c246874b716911139c04bc004b3b14eed07ef.1641817763.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit.h | 3 --- arch/powerpc/net/bpf_jit_comp32.c | 16 ++++++++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index b20a2a83a6e7..b75507fc8f6b 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -127,9 +127,6 @@ #define SEEN_FUNC 0x20000000 /* might call external helpers */ #define SEEN_TAILCALL 0x40000000 /* uses tail calls */ -#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ -#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ - #ifdef CONFIG_PPC64 extern const int b2p[MAX_BPF_JIT_REG + 2]; #else diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index cf8dd8aea386..43643f1c1034 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -77,14 +77,22 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) return BPF_PPC_STACKFRAME(ctx) - 4; } +#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ +#define SEEN_NVREG_FULL_MASK 0x0003ffff /* Non volatile registers r14-r31 */ +#define SEEN_NVREG_TEMP_MASK 0x00001e01 /* BPF_REG_5, BPF_REG_AX, TMP_REG */ + void bpf_jit_realloc_regs(struct codegen_context *ctx) { - if (ctx->seen & SEEN_FUNC) - return; + unsigned int nvreg_mask; - while (ctx->seen & SEEN_NVREG_MASK && + if (ctx->seen & SEEN_FUNC) + nvreg_mask = SEEN_NVREG_TEMP_MASK; + else + nvreg_mask = SEEN_NVREG_FULL_MASK; + + while (ctx->seen & nvreg_mask && (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) { - int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab)); + int old = 32 - fls(ctx->seen & (nvreg_mask & 0xaaaaaaab)); int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa)); int i; From 0670010f3b10aeaad0dfdf0dad0bcd020fc70eb5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 17 Jan 2022 10:06:39 +0000 Subject: [PATCH 056/179] powerpc/32s: Enable STRICT_MODULE_RWX for the 603 core The book3s/32 MMU doesn't support per page execution protection and doesn't support RO protection for kernel pages. However, on the 603 which implements software loaded TLBs, execution protection is honored by the TLB Miss handler which doesn't load Instruction TLB for non executable pages. And RO protection is honored by clearing the C bit for RO pages, leading to DSI. So on the 603, STRICT_MODULE_RWX is possible without much effort. Don't disable STRICT_MODULE_RWX on book3s/32 and print a warning in case STRICT_MODULE_RWX has been selected and the platform has a Hardware HASH MMU. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/1e6162f334167e75f1140082932e3a354b16daba.1642413973.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 2 +- arch/powerpc/mm/pgtable_32.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2a851d31f120..28e4047e99e8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -135,7 +135,7 @@ config PPC select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION select ARCH_HAS_STRICT_KERNEL_RWX if FSL_BOOKE && !HIBERNATION && !RANDOMIZE_BASE - select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX && !PPC_BOOK3S_32 + select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UBSAN_SANITIZE_ALL diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index f71ededdc02a..a56ade39dc68 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -148,6 +148,9 @@ void mark_rodata_ro(void) { unsigned long numpages; + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && mmu_has_feature(MMU_FTR_HPTE_TABLE)) + pr_warn("This platform has HASH MMU, STRICT_MODULE_RWX won't work\n"); + if (v_block_mapped((unsigned long)_stext + 1)) { mmu_mark_rodata_ro(); ptdump_check_wx(); From 9d44d1bd93b9a881f407b3202dc13fbd85fb5f1a Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 21 Jan 2022 07:58:47 +0000 Subject: [PATCH 057/179] powerpc: Use the newly added is_tsk_32bit_task() macro Two places deserve using the macro is_tsk_32bit_task() added by commit 252745240ba0 ("powerpc/audit: Fix syscall_get_arch()") Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/7304a889dbe885aefad8a8333673c81ee4b8f7a6.1642751874.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/ptrace/ptrace-view.c | 2 +- arch/powerpc/perf/perf_regs.c | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index b8be1d6668b5..f15bc78caf71 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -841,7 +841,7 @@ static const struct user_regset_view user_ppc_compat_view = { const struct user_regset_view *task_user_regset_view(struct task_struct *task) { - if (IS_ENABLED(CONFIG_PPC64) && test_tsk_thread_flag(task, TIF_32BIT)) + if (IS_ENABLED(CONFIG_COMPAT) && is_tsk_32bit_task(task)) return &user_ppc_compat_view; return &user_ppc_native_view; } diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 51d31b65e423..350dccb0143c 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -134,12 +134,10 @@ int perf_reg_validate(u64 mask) u64 perf_reg_abi(struct task_struct *task) { -#ifdef CONFIG_PPC64 - if (!test_tsk_thread_flag(task, TIF_32BIT)) - return PERF_SAMPLE_REGS_ABI_64; + if (is_tsk_32bit_task(task)) + return PERF_SAMPLE_REGS_ABI_32; else -#endif - return PERF_SAMPLE_REGS_ABI_32; + return PERF_SAMPLE_REGS_ABI_64; } void perf_get_regs_user(struct perf_regs *regs_user, From 67484e0de9c93b4a9187bb49f45dfdaa8dc03c0b Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 21 Jan 2022 08:06:27 +0000 Subject: [PATCH 058/179] powerpc/lib/sstep: Use l1_dcache_bytes() instead of opencoding Don't opencode dcache size retrieval based on whether that's ppc32 or ppc64. Use l1_dcache_bytes() Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/6c608fd4795e2d8ea1a0a449405a0087f76d8bb3.1642752375.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/sstep.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a94b0cd0bdc5..b7316d697d80 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1065,14 +1065,11 @@ Efault: int emulate_dcbz(unsigned long ea, struct pt_regs *regs) { int err; - unsigned long size; + unsigned long size = l1_dcache_bytes(); #ifdef __powerpc64__ - size = ppc64_caches.l1d.block_size; if (!(regs->msr & MSR_64BIT)) ea &= 0xffffffffUL; -#else - size = L1_CACHE_BYTES; #endif ea &= ~(size - 1); if (!address_ok(regs, ea, size)) From 7c3bba91999075f4cfcab0542e4eb74d2d63554b Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 21 Jan 2022 08:06:32 +0000 Subject: [PATCH 059/179] powerpc/lib/sstep: Remove unneeded #ifdef __powerpc64__ MSR_64BIT is always defined, no need to hide code using MSR_64BIT inside an #ifdef __powerpc64__ Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/ee61b693bc7e046eed1abb7a34909eb4878a9442.1642752375.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/sstep.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index b7316d697d80..4aabe3854484 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -75,10 +75,8 @@ extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1, static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr, unsigned long val) { -#ifdef __powerpc64__ if ((msr & MSR_64BIT) == 0) val &= 0xffffffffUL; -#endif return val; } @@ -1067,10 +1065,8 @@ int emulate_dcbz(unsigned long ea, struct pt_regs *regs) int err; unsigned long size = l1_dcache_bytes(); -#ifdef __powerpc64__ if (!(regs->msr & MSR_64BIT)) ea &= 0xffffffffUL; -#endif ea &= ~(size - 1); if (!address_ok(regs, ea, size)) return -EFAULT; @@ -1136,10 +1132,8 @@ static nokprobe_inline void set_cr0(const struct pt_regs *regs, op->type |= SETCC; op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); -#ifdef __powerpc64__ if (!(regs->msr & MSR_64BIT)) val = (int) val; -#endif if (val < 0) op->ccval |= 0x80000000; else if (val > 0) @@ -1170,12 +1164,10 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs, op->type = COMPUTE + SETREG + SETXER; op->reg = rd; op->val = val; -#ifdef __powerpc64__ if (!(regs->msr & MSR_64BIT)) { val = (unsigned int) val; val1 = (unsigned int) val1; } -#endif op->xerval = regs->xer; if (val < val1 || (carry_in && val == val1)) op->xerval |= XER_CA; From 6836f099039e6c72fb548bf527345aa4345c3308 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 21 Jan 2022 08:06:38 +0000 Subject: [PATCH 060/179] powerpc/lib/sstep: use truncate_if_32bit() Use truncate_if_32bit() when possible instead of open coding. truncate_if_32bit() returns an unsigned long, so don't use it when a signed value is expected. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/7e1c07123f13156d4a27991a2e2694fb584bc068.1642752375.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/sstep.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 4aabe3854484..ca38d026fd88 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1065,8 +1065,7 @@ int emulate_dcbz(unsigned long ea, struct pt_regs *regs) int err; unsigned long size = l1_dcache_bytes(); - if (!(regs->msr & MSR_64BIT)) - ea &= 0xffffffffUL; + ea = truncate_if_32bit(regs->msr, ea); ea &= ~(size - 1); if (!address_ok(regs, ea, size)) return -EFAULT; @@ -1164,10 +1163,8 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs, op->type = COMPUTE + SETREG + SETXER; op->reg = rd; op->val = val; - if (!(regs->msr & MSR_64BIT)) { - val = (unsigned int) val; - val1 = (unsigned int) val1; - } + val = truncate_if_32bit(regs->msr, val); + val1 = truncate_if_32bit(regs->msr, val1); op->xerval = regs->xer; if (val < val1 || (carry_in && val == val1)) op->xerval |= XER_CA; From f061fb03ee611c5657010ee4fa2a3fa64dfe3bd0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 21 Jan 2022 16:30:21 +0000 Subject: [PATCH 061/179] powerpc/vdso: augment VDSO32 functions to support 64 bits build VDSO64 cacheflush.S datapage.S gettimeofday.S and vgettimeofday.c are very similar to their VDSO32 counterpart. VDSO32 counterpart is already more complete than the VDSO64 version as it supports both PPC32 vdso and 32 bits VDSO for PPC64. Use compat macros wherever necessary in PPC32 files so that they can also be used to build VDSO64. vdso64/note.S is already a link to vdso32/note.S so no change is required. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/c2cbb8f046b7efc251053521dc39b752795e26b7.1642782130.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-compat.h | 2 ++ arch/powerpc/kernel/vdso32/cacheflush.S | 4 ++-- arch/powerpc/kernel/vdso32/datapage.S | 10 ++++++-- arch/powerpc/kernel/vdso32/getcpu.S | 4 ++-- arch/powerpc/kernel/vdso32/gettimeofday.S | 8 +++++-- arch/powerpc/kernel/vdso32/vgettimeofday.c | 27 +++++++++++++++++----- 6 files changed, 41 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 2b736d9fbb1b..2bc53c646ccd 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -21,6 +21,7 @@ #define PPC_STLCX stringify_in_c(stdcx.) #define PPC_CNTLZL stringify_in_c(cntlzd) #define PPC_MTOCRF(FXM, RS) MTOCRF((FXM), RS) +#define PPC_SRL stringify_in_c(srd) #define PPC_LR_STKOFF 16 #define PPC_MIN_STKFRM 112 @@ -54,6 +55,7 @@ #define PPC_STLCX stringify_in_c(stwcx.) #define PPC_CNTLZL stringify_in_c(cntlzw) #define PPC_MTOCRF stringify_in_c(mtcrf) +#define PPC_SRL stringify_in_c(srw) #define PPC_LR_STKOFF 4 #define PPC_MIN_STKFRM 16 diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S index f340e82d1981..d4e43ab2d5df 100644 --- a/arch/powerpc/kernel/vdso32/cacheflush.S +++ b/arch/powerpc/kernel/vdso32/cacheflush.S @@ -46,7 +46,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) add r8,r8,r5 /* ensure we get enough */ #ifdef CONFIG_PPC64 lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + PPC_SRL. r8,r8,r9 /* compute line count */ #else srwi. r8, r8, L1_CACHE_SHIFT mr r7, r6 @@ -72,7 +72,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + PPC_SRL. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ #endif diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 65244416ab94..db8e167f0166 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -30,11 +30,15 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mr. r4,r3 get_datapage r3 mtlr r12 +#ifdef __powerpc64__ + addi r3,r3,CFG_SYSCALL_MAP64 +#else addi r3,r3,CFG_SYSCALL_MAP32 +#endif + crclr cr0*4+so beqlr li r0,NR_syscalls stw r0,0(r4) - crclr cr0*4+so blr .cfi_endproc V_FUNCTION_END(__kernel_get_syscall_map) @@ -49,8 +53,10 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) mflr r12 .cfi_register lr,r12 get_datapage r3 +#ifndef __powerpc64__ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) - lwz r3,CFG_TB_TICKS_PER_SEC(r3) +#endif + PPC_LL r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 crclr cr0*4+so blr diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S index ff5e214fec41..8e08ccf19062 100644 --- a/arch/powerpc/kernel/vdso32/getcpu.S +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -19,8 +19,8 @@ V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc mfspr r5,SPRN_SPRG_VDSO_READ - cmpwi cr0,r3,0 - cmpwi cr1,r4,0 + PPC_LCMPI cr0,r3,0 + PPC_LCMPI cr1,r4,0 clrlwi r6,r5,16 rlwinm r7,r5,16,31-15,31-0 beq cr0,1f diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index d21d08140a5e..c875312274aa 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * Userland implementation of gettimeofday() for 32 bits processes in a - * ppc64 kernel for use in the vDSO + * Userland implementation of gettimeofday() for processes + * for use in the vDSO * * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org, * IBM Corp. @@ -41,9 +41,11 @@ V_FUNCTION_END(__kernel_clock_gettime) * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts); * */ +#ifndef __powerpc64__ V_FUNCTION_BEGIN(__kernel_clock_gettime64) cvdso_call __c_kernel_clock_gettime64 V_FUNCTION_END(__kernel_clock_gettime64) +#endif /* * Exact prototype of clock_getres() @@ -69,6 +71,7 @@ V_FUNCTION_END(__kernel_time) /* Routines for restoring integer registers, called by the compiler. */ /* Called with r11 pointing to the stack header word of the caller of the */ /* function, just beyond the end of the integer restore area. */ +#ifndef __powerpc64__ _GLOBAL(_restgpr_31_x) _GLOBAL(_rest32gpr_31_x) lwz r0,4(r11) @@ -76,3 +79,4 @@ _GLOBAL(_rest32gpr_31_x) mtlr r0 mr r1,r11 blr +#endif diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso32/vgettimeofday.c index 65fb03fb1731..55a287c9a736 100644 --- a/arch/powerpc/kernel/vdso32/vgettimeofday.c +++ b/arch/powerpc/kernel/vdso32/vgettimeofday.c @@ -2,8 +2,22 @@ /* * Powerpc userspace implementations of gettimeofday() and similar. */ +#include <linux/time.h> #include <linux/types.h> +#ifdef __powerpc64__ +int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime_data(vd, clock, ts); +} + +int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_data(vd, clock_id, res); +} +#else int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, const struct vdso_data *vd) { @@ -16,18 +30,19 @@ int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, return __cvdso_clock_gettime_data(vd, clock, ts); } +int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_time32_data(vd, clock_id, res); +} +#endif + int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, const struct vdso_data *vd) { return __cvdso_gettimeofday_data(vd, tv, tz); } -int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, - const struct vdso_data *vd) -{ - return __cvdso_clock_getres_time32_data(vd, clock_id, res); -} - __kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) { return __cvdso_time_data(vd, time); From d88378d8d2c776154c6b606f2a423a81d7795f6f Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 21 Jan 2022 16:30:23 +0000 Subject: [PATCH 062/179] powerpc/vdso: Rework VDSO32 makefile to add a prefix to object files In order to merge vdso32 and vdso64 build in following patch, rework Makefile is order to add -32 suffix to VDSO32 object files. Also change sigtramp.S to sigtramp32.S as VDSO64 sigtramp.S is too different to be squashed into VDSO32 sigtramp.S at the first place. gen_vdso_offsets.sh also becomes gen_vdso32_offsets.sh Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/0c421b704a57b228e75a891512568339c53667ad.1642782130.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso32/Makefile | 47 +++++++++---------- ..._vdso_offsets.sh => gen_vdso32_offsets.sh} | 0 .../vdso32/{sigtramp.S => sigtramp32.S} | 0 3 files changed, 21 insertions(+), 26 deletions(-) rename arch/powerpc/kernel/vdso32/{gen_vdso_offsets.sh => gen_vdso32_offsets.sh} (100%) rename arch/powerpc/kernel/vdso32/{sigtramp.S => sigtramp32.S} (100%) diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 7d9a6fee0e3d..7d7b38d90ca5 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -5,15 +5,16 @@ ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 include $(srctree)/lib/vdso/Makefile -obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o +obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o ifneq ($(c-gettimeofday-y),) - CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) + CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday-32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday-32.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday-32.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc endif # Build rules @@ -24,13 +25,7 @@ else VDSOCC := $(CC) endif -CC32FLAGS := -ifdef CONFIG_PPC64 -CC32FLAGS += -m32 -KBUILD_CFLAGS := $(filter-out -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc,$(KBUILD_CFLAGS)) -endif - -targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday.o +targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) GCOV_PROFILE := n @@ -38,36 +33,36 @@ KCOV_INSTRUMENT := n UBSAN_SANITIZE := n KASAN_SANITIZE := n -ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ - -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both -asflags-y := -D__VDSO32__ -s +ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both + +CC32FLAGS := -Wl,-soname=linux-vdso32.so.1 -m32 +AS32FLAGS := -D__VDSO32__ -s -obj-y += vdso32_wrapper.o targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE $(call if_changed,vdso32ld_and_check) # assembly rules for the .S files -$(obj-vdso32): %.o: %.S FORCE +$(obj-vdso32): %-32.o: %.S FORCE $(call if_changed_dep,vdso32as) -$(obj)/vgettimeofday.o: %.o: %.c FORCE +$(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE $(call if_changed_dep,vdso32cc) # Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ +gen-vdso32sym := $(srctree)/$(src)/gen_vdso32_offsets.sh +quiet_cmd_vdso32sym = VDSO32SYM $@ + cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@ include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE - $(call if_changed,vdsosym) + $(call if_changed,vdso32sym) # actual build commands quiet_cmd_vdso32ld_and_check = VDSO32L $@ cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) quiet_cmd_vdso32as = VDSO32A $@ - cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< + cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso32/gen_vdso32_offsets.sh similarity index 100% rename from arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh rename to arch/powerpc/kernel/vdso32/gen_vdso32_offsets.sh diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp32.S similarity index 100% rename from arch/powerpc/kernel/vdso32/sigtramp.S rename to arch/powerpc/kernel/vdso32/sigtramp32.S From fd1feade75fb1a9275c39d76c5ccdbbbe6b37aa3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 21 Jan 2022 16:30:27 +0000 Subject: [PATCH 063/179] powerpc/vdso: Merge vdso64 and vdso32 into a single directory merge vdso64 into vdso32 and rename it vdso. Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/4dbe05cc130f6a0858d09ac72e436c373cb08b70.1642782130.git.christophe.leroy@csgroup.eu --- arch/powerpc/Makefile | 4 +- arch/powerpc/kernel/Makefile | 6 +- .../kernel/{vdso32 => vdso}/.gitignore | 2 + arch/powerpc/kernel/{vdso32 => vdso}/Makefile | 35 +++++++++ .../kernel/{vdso32 => vdso}/cacheflush.S | 0 .../kernel/{vdso32 => vdso}/datapage.S | 0 .../{vdso32 => vdso}/gen_vdso32_offsets.sh | 0 .../gen_vdso64_offsets.sh} | 0 arch/powerpc/kernel/{vdso32 => vdso}/getcpu.S | 0 .../kernel/{vdso32 => vdso}/gettimeofday.S | 0 arch/powerpc/kernel/{vdso32 => vdso}/note.S | 0 .../kernel/{vdso32 => vdso}/sigtramp32.S | 0 .../{vdso64/sigtramp.S => vdso/sigtramp64.S} | 0 .../kernel/{vdso32 => vdso}/vdso32.lds.S | 0 .../kernel/{vdso64 => vdso}/vdso64.lds.S | 0 .../kernel/{vdso32 => vdso}/vgettimeofday.c | 0 arch/powerpc/kernel/vdso32_wrapper.S | 2 +- arch/powerpc/kernel/vdso64/.gitignore | 3 - arch/powerpc/kernel/vdso64/Makefile | 56 -------------- arch/powerpc/kernel/vdso64/cacheflush.S | 75 ------------------- arch/powerpc/kernel/vdso64/datapage.S | 59 --------------- arch/powerpc/kernel/vdso64/getcpu.S | 33 -------- arch/powerpc/kernel/vdso64/gettimeofday.S | 58 -------------- arch/powerpc/kernel/vdso64/note.S | 1 - arch/powerpc/kernel/vdso64/vgettimeofday.c | 29 ------- arch/powerpc/kernel/vdso64_wrapper.S | 2 +- 26 files changed, 44 insertions(+), 321 deletions(-) rename arch/powerpc/kernel/{vdso32 => vdso}/.gitignore (72%) rename arch/powerpc/kernel/{vdso32 => vdso}/Makefile (56%) rename arch/powerpc/kernel/{vdso32 => vdso}/cacheflush.S (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/datapage.S (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/gen_vdso32_offsets.sh (100%) rename arch/powerpc/kernel/{vdso64/gen_vdso_offsets.sh => vdso/gen_vdso64_offsets.sh} (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/getcpu.S (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/gettimeofday.S (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/note.S (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/sigtramp32.S (100%) rename arch/powerpc/kernel/{vdso64/sigtramp.S => vdso/sigtramp64.S} (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/vdso32.lds.S (100%) rename arch/powerpc/kernel/{vdso64 => vdso}/vdso64.lds.S (100%) rename arch/powerpc/kernel/{vdso32 => vdso}/vgettimeofday.c (100%) delete mode 100644 arch/powerpc/kernel/vdso64/.gitignore delete mode 100644 arch/powerpc/kernel/vdso64/Makefile delete mode 100644 arch/powerpc/kernel/vdso64/cacheflush.S delete mode 100644 arch/powerpc/kernel/vdso64/datapage.S delete mode 100644 arch/powerpc/kernel/vdso64/getcpu.S delete mode 100644 arch/powerpc/kernel/vdso64/gettimeofday.S delete mode 100644 arch/powerpc/kernel/vdso64/note.S delete mode 100644 arch/powerpc/kernel/vdso64/vgettimeofday.c diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5f16ac1583c5..ddc5a706760a 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -421,9 +421,9 @@ ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare vdso_prepare: prepare0 $(if $(CONFIG_VDSO32),$(Q)$(MAKE) \ - $(build)=arch/powerpc/kernel/vdso32 include/generated/vdso32-offsets.h) + $(build)=arch/powerpc/kernel/vdso include/generated/vdso32-offsets.h) $(if $(CONFIG_PPC64),$(Q)$(MAKE) \ - $(build)=arch/powerpc/kernel/vdso64 include/generated/vdso64-offsets.h) + $(build)=arch/powerpc/kernel/vdso include/generated/vdso64-offsets.h) endif archprepare: checkbin diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 4d7829399570..4ddd161aef32 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -194,8 +194,8 @@ targets += prom_init_check clean-files := vmlinux.lds # Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32/vdso32.so.dbg -$(obj)/vdso64_wrapper.o : $(obj)/vdso64/vdso64.so.dbg +$(obj)/vdso32_wrapper.o : $(obj)/vdso/vdso32.so.dbg +$(obj)/vdso64_wrapper.o : $(obj)/vdso/vdso64.so.dbg # for cleaning -subdir- += vdso32 vdso64 +subdir- += vdso diff --git a/arch/powerpc/kernel/vdso32/.gitignore b/arch/powerpc/kernel/vdso/.gitignore similarity index 72% rename from arch/powerpc/kernel/vdso32/.gitignore rename to arch/powerpc/kernel/vdso/.gitignore index 824b863ec6bd..dd9bdd67758b 100644 --- a/arch/powerpc/kernel/vdso32/.gitignore +++ b/arch/powerpc/kernel/vdso/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only vdso32.lds vdso32.so.dbg +vdso64.lds +vdso64.so.dbg diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso/Makefile similarity index 56% rename from arch/powerpc/kernel/vdso32/Makefile rename to arch/powerpc/kernel/vdso/Makefile index 7d7b38d90ca5..954974287ee7 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -6,6 +6,7 @@ ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_ include $(srctree)/lib/vdso/Makefile obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o +obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y) @@ -15,6 +16,17 @@ ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc + CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday-64.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday-64.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday-64.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday-64.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday-64.o = $(CC_FLAGS_FTRACE) +# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true +# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is +# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code +# generation is minimal, it will just use r29 instead. + CFLAGS_vgettimeofday-64.o += $(call cc-option, -ffixed-r30) endif # Build rules @@ -27,6 +39,8 @@ endif targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) +targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) GCOV_PROFILE := n KCOV_INSTRUMENT := n @@ -38,26 +52,42 @@ ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both CC32FLAGS := -Wl,-soname=linux-vdso32.so.1 -m32 AS32FLAGS := -D__VDSO32__ -s +CC64FLAGS := -Wl,-soname=linux-vdso64.so.1 +AS64FLAGS := -D__VDSO64__ -s + targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc +targets += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) # link rule for the .so file, .lds has to be first $(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE $(call if_changed,vdso32ld_and_check) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE + $(call if_changed,vdso64ld_and_check) # assembly rules for the .S files $(obj-vdso32): %-32.o: %.S FORCE $(call if_changed_dep,vdso32as) $(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE $(call if_changed_dep,vdso32cc) +$(obj-vdso64): %-64.o: %.S FORCE + $(call if_changed_dep,vdso64as) +$(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE + $(call if_changed_dep,cc_o_c) # Generate VDSO offsets using helper script gen-vdso32sym := $(srctree)/$(src)/gen_vdso32_offsets.sh quiet_cmd_vdso32sym = VDSO32SYM $@ cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@ +gen-vdso64sym := $(srctree)/$(src)/gen_vdso64_offsets.sh +quiet_cmd_vdso64sym = VDSO64SYM $@ + cmd_vdso64sym = $(NM) $< | $(gen-vdso64sym) | LC_ALL=C sort > $@ include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE $(call if_changed,vdso32sym) +include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE + $(call if_changed,vdso64sym) # actual build commands quiet_cmd_vdso32ld_and_check = VDSO32L $@ @@ -66,3 +96,8 @@ quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< + +quiet_cmd_vdso64ld_and_check = VDSO64L $@ + cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S similarity index 100% rename from arch/powerpc/kernel/vdso32/cacheflush.S rename to arch/powerpc/kernel/vdso/cacheflush.S diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso/datapage.S similarity index 100% rename from arch/powerpc/kernel/vdso32/datapage.S rename to arch/powerpc/kernel/vdso/datapage.S diff --git a/arch/powerpc/kernel/vdso32/gen_vdso32_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh similarity index 100% rename from arch/powerpc/kernel/vdso32/gen_vdso32_offsets.sh rename to arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh diff --git a/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh similarity index 100% rename from arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh rename to arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso/getcpu.S similarity index 100% rename from arch/powerpc/kernel/vdso32/getcpu.S rename to arch/powerpc/kernel/vdso/getcpu.S diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S similarity index 100% rename from arch/powerpc/kernel/vdso32/gettimeofday.S rename to arch/powerpc/kernel/vdso/gettimeofday.S diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso/note.S similarity index 100% rename from arch/powerpc/kernel/vdso32/note.S rename to arch/powerpc/kernel/vdso/note.S diff --git a/arch/powerpc/kernel/vdso32/sigtramp32.S b/arch/powerpc/kernel/vdso/sigtramp32.S similarity index 100% rename from arch/powerpc/kernel/vdso32/sigtramp32.S rename to arch/powerpc/kernel/vdso/sigtramp32.S diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso/sigtramp64.S similarity index 100% rename from arch/powerpc/kernel/vdso64/sigtramp.S rename to arch/powerpc/kernel/vdso/sigtramp64.S diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S similarity index 100% rename from arch/powerpc/kernel/vdso32/vdso32.lds.S rename to arch/powerpc/kernel/vdso/vdso32.lds.S diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S similarity index 100% rename from arch/powerpc/kernel/vdso64/vdso64.lds.S rename to arch/powerpc/kernel/vdso/vdso64.lds.S diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso/vgettimeofday.c similarity index 100% rename from arch/powerpc/kernel/vdso32/vgettimeofday.c rename to arch/powerpc/kernel/vdso/vgettimeofday.c diff --git a/arch/powerpc/kernel/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S index 3f5ef035b0a9..10f92f265d51 100644 --- a/arch/powerpc/kernel/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32_wrapper.S @@ -7,7 +7,7 @@ .globl vdso32_start, vdso32_end .balign PAGE_SIZE vdso32_start: - .incbin "arch/powerpc/kernel/vdso32/vdso32.so.dbg" + .incbin "arch/powerpc/kernel/vdso/vdso32.so.dbg" .balign PAGE_SIZE vdso32_end: diff --git a/arch/powerpc/kernel/vdso64/.gitignore b/arch/powerpc/kernel/vdso64/.gitignore deleted file mode 100644 index 84151a7ba31d..000000000000 --- a/arch/powerpc/kernel/vdso64/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -vdso64.lds -vdso64.so.dbg diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile deleted file mode 100644 index 3c5baaa6f1e7..000000000000 --- a/arch/powerpc/kernel/vdso64/Makefile +++ /dev/null @@ -1,56 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# List of files in the vdso, has to be asm only for now - -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 -include $(srctree)/lib/vdso/Makefile - -obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o - -ifneq ($(c-gettimeofday-y),) - CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -endif - -# Build rules - -targets := $(obj-vdso64) vdso64.so.dbg vgettimeofday.o -obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) - -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n - -ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ - -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both - -# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true -# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is -# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code -# generation is minimal, it will just use r29 instead. -ccflags-y += $(call cc-option, -ffixed-r30) - -asflags-y := -D__VDSO64__ -s - -targets += vdso64.lds -CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) - -# link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE - $(call if_changed,vdso64ld_and_check) - -# Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ - -include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE - $(call if_changed,vdsosym) - -# actual build commands -quiet_cmd_vdso64ld_and_check = VDSO64L $@ - cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S deleted file mode 100644 index 76c3c8cf8ece..000000000000 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * vDSO provided cache flush routines - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), - * IBM Corp. - */ -#include <asm/processor.h> -#include <asm/ppc_asm.h> -#include <asm/vdso.h> -#include <asm/vdso_datapage.h> -#include <asm/asm-offsets.h> - - .text - -/* - * Default "generic" version of __kernel_sync_dicache. - * - * void __kernel_sync_dicache(unsigned long start, unsigned long end) - * - * Flushes the data cache & invalidate the instruction cache for the - * provided range [start, end[ - */ -V_FUNCTION_BEGIN(__kernel_sync_dicache) - .cfi_startproc -BEGIN_FTR_SECTION - b 3f -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) - mflr r12 - .cfi_register lr,r12 - get_datapage r10 - mtlr r12 - .cfi_restore lr - - lwz r7,CFG_DCACHE_BLOCKSZ(r10) - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srd. r8,r8,r9 /* compute line count */ - crclr cr0*4+so - beqlr /* nothing to do? */ - mtctr r8 -1: dcbst 0,r6 - add r6,r6,r7 - bdnz 1b - sync - -/* Now invalidate the instruction cache */ - - lwz r7,CFG_ICACHE_BLOCKSZ(r10) - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 - lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srd. r8,r8,r9 /* compute line count */ - crclr cr0*4+so - beqlr /* nothing to do? */ - mtctr r8 -2: icbi 0,r6 - add r6,r6,r7 - bdnz 2b - isync - li r3,0 - blr -3: - crclr cr0*4+so - sync - isync - li r3,0 - blr - .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S deleted file mode 100644 index 00760dc69d68..000000000000 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Access to the shared data page by the vDSO & syscall map - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. - */ - -#include <asm/processor.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> -#include <asm/vdso.h> -#include <asm/vdso_datapage.h> - - .text - -/* - * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; - * - * returns a pointer to the syscall map. the map is agnostic to the - * size of "long", unlike kernel bitops, it stores bits from top to - * bottom so that memory actually contains a linear bitmap - * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of - * 32 bits int at N >> 5. - */ -V_FUNCTION_BEGIN(__kernel_get_syscall_map) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - mr r4,r3 - get_datapage r3 - mtlr r12 - addi r3,r3,CFG_SYSCALL_MAP64 - cmpldi cr0,r4,0 - crclr cr0*4+so - beqlr - li r0,NR_syscalls - stw r0,0(r4) - blr - .cfi_endproc -V_FUNCTION_END(__kernel_get_syscall_map) - - -/* - * void unsigned long __kernel_get_tbfreq(void); - * - * returns the timebase frequency in HZ - */ -V_FUNCTION_BEGIN(__kernel_get_tbfreq) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - get_datapage r3 - ld r3,CFG_TB_TICKS_PER_SEC(r3) - mtlr r12 - crclr cr0*4+so - blr - .cfi_endproc -V_FUNCTION_END(__kernel_get_tbfreq) diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S deleted file mode 100644 index 12bbf236cdc4..000000000000 --- a/arch/powerpc/kernel/vdso64/getcpu.S +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * - * Copyright (C) IBM Corporation, 2012 - * - * Author: Anton Blanchard <anton@au.ibm.com> - */ -#include <asm/ppc_asm.h> -#include <asm/vdso.h> - - .text -/* - * Exact prototype of getcpu - * - * int __kernel_getcpu(unsigned *cpu, unsigned *node); - * - */ -V_FUNCTION_BEGIN(__kernel_getcpu) - .cfi_startproc - mfspr r5,SPRN_SPRG_VDSO_READ - cmpdi cr0,r3,0 - cmpdi cr1,r4,0 - clrlwi r6,r5,16 - rlwinm r7,r5,16,31-15,31-0 - beq cr0,1f - stw r6,0(r3) -1: beq cr1,2f - stw r7,0(r4) -2: crclr cr0*4+so - li r3,0 /* always success */ - blr - .cfi_endproc -V_FUNCTION_END(__kernel_getcpu) diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S deleted file mode 100644 index d7a7bfb51081..000000000000 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Userland implementation of gettimeofday() for 64 bits processes in a - * ppc64 kernel for use in the vDSO - * - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), - * IBM Corp. - */ -#include <asm/processor.h> -#include <asm/ppc_asm.h> -#include <asm/vdso.h> -#include <asm/vdso_datapage.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> -#include <asm/vdso/gettimeofday.h> - - .text -/* - * Exact prototype of gettimeofday - * - * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); - * - */ -V_FUNCTION_BEGIN(__kernel_gettimeofday) - cvdso_call __c_kernel_gettimeofday -V_FUNCTION_END(__kernel_gettimeofday) - - -/* - * Exact prototype of clock_gettime() - * - * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); - * - */ -V_FUNCTION_BEGIN(__kernel_clock_gettime) - cvdso_call __c_kernel_clock_gettime -V_FUNCTION_END(__kernel_clock_gettime) - - -/* - * Exact prototype of clock_getres() - * - * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); - * - */ -V_FUNCTION_BEGIN(__kernel_clock_getres) - cvdso_call __c_kernel_clock_getres -V_FUNCTION_END(__kernel_clock_getres) - -/* - * Exact prototype of time() - * - * time_t time(time *t); - * - */ -V_FUNCTION_BEGIN(__kernel_time) - cvdso_call_time __c_kernel_time -V_FUNCTION_END(__kernel_time) diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S deleted file mode 100644 index dc2a509f7e8a..000000000000 --- a/arch/powerpc/kernel/vdso64/note.S +++ /dev/null @@ -1 +0,0 @@ -#include "../vdso32/note.S" diff --git a/arch/powerpc/kernel/vdso64/vgettimeofday.c b/arch/powerpc/kernel/vdso64/vgettimeofday.c deleted file mode 100644 index 5b5500058344..000000000000 --- a/arch/powerpc/kernel/vdso64/vgettimeofday.c +++ /dev/null @@ -1,29 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Powerpc userspace implementations of gettimeofday() and similar. - */ -#include <linux/time.h> -#include <linux/types.h> - -int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd) -{ - return __cvdso_clock_gettime_data(vd, clock, ts); -} - -int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, - const struct vdso_data *vd) -{ - return __cvdso_gettimeofday_data(vd, tv, tz); -} - -int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, - const struct vdso_data *vd) -{ - return __cvdso_clock_getres_data(vd, clock_id, res); -} - -__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) -{ - return __cvdso_time_data(vd, time); -} diff --git a/arch/powerpc/kernel/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S index 1d56d81fe3b3..839d1a61411d 100644 --- a/arch/powerpc/kernel/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64_wrapper.S @@ -7,7 +7,7 @@ .globl vdso64_start, vdso64_end .balign PAGE_SIZE vdso64_start: - .incbin "arch/powerpc/kernel/vdso64/vdso64.so.dbg" + .incbin "arch/powerpc/kernel/vdso/vdso64.so.dbg" .balign PAGE_SIZE vdso64_end: From 9b97bea90072a075363a200dd7b54ad4a24e9491 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 21 Jan 2022 16:30:30 +0000 Subject: [PATCH 064/179] powerpc/vdso: Remove cvdso_call_time macro cvdso_call_time macro is very similar to cvdso_call macro. Add a call_time argument to cvdso_call which is 0 by default and set to 1 when using cvdso_call to call __c_kernel_time(). Return returned value as is with CR[SO] cleared when it is used for time(). Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/837a260ad86fc1ce297a562c2117fd69be5f7b5c.1642782130.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso/gettimeofday.h | 37 ++++++-------------- arch/powerpc/kernel/vdso/gettimeofday.S | 2 +- 2 files changed, 11 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 1faff0be1111..df00e91c9a90 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -9,12 +9,12 @@ #include <asm/ppc_asm.h> /* - * The macros sets two stack frames, one for the caller and one for the callee + * The macro sets two stack frames, one for the caller and one for the callee * because there are no requirement for the caller to set a stack frame when * calling VDSO so it may have omitted to set one, especially on PPC64 */ -.macro cvdso_call funct +.macro cvdso_call funct call_time=0 .cfi_startproc PPC_STLU r1, -PPC_MIN_STKFRM(r1) mflr r0 @@ -25,45 +25,28 @@ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) #endif get_datapage r5 + .ifeq \call_time addi r5, r5, VDSO_DATA_OFFSET + .else + addi r4, r5, VDSO_DATA_OFFSET + .endif bl DOTSYM(\funct) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) #endif + .ifeq \call_time cmpwi r3, 0 + .endif mtlr r0 .cfi_restore lr addi r1, r1, 2 * PPC_MIN_STKFRM crclr so + .ifeq \call_time beqlr+ crset so neg r3, r3 - blr - .cfi_endproc -.endm - -.macro cvdso_call_time funct - .cfi_startproc - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - mflr r0 - .cfi_register lr, r0 - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - get_datapage r4 - addi r4, r4, VDSO_DATA_OFFSET - bl DOTSYM(\funct) - PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - crclr so - mtlr r0 - .cfi_restore lr - addi r1, r1, 2 * PPC_MIN_STKFRM + .endif blr .cfi_endproc .endm diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index c875312274aa..397f290015bc 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -65,7 +65,7 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - cvdso_call_time __c_kernel_time + cvdso_call __c_kernel_time call_time=1 V_FUNCTION_END(__kernel_time) /* Routines for restoring integer registers, called by the compiler. */ From 692b21d78046851e75dc25bba773189c670b49c2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 21 Jan 2022 16:30:34 +0000 Subject: [PATCH 065/179] powerpc/vdso: Move cvdso_call macro into gettimeofday.S Now that gettimeofday.S is unique, move cvdso_call macro into that file which is the only user. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/72720359d4c58e3a3b96dd74952741225faac3de.1642782130.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso/gettimeofday.h | 52 +------------------- arch/powerpc/kernel/vdso/gettimeofday.S | 44 ++++++++++++++++- 2 files changed, 45 insertions(+), 51 deletions(-) diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index df00e91c9a90..f0a4cf01e85c 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -2,57 +2,9 @@ #ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H #define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H +#ifndef __ASSEMBLY__ + #include <asm/page.h> - -#ifdef __ASSEMBLY__ - -#include <asm/ppc_asm.h> - -/* - * The macro sets two stack frames, one for the caller and one for the callee - * because there are no requirement for the caller to set a stack frame when - * calling VDSO so it may have omitted to set one, especially on PPC64 - */ - -.macro cvdso_call funct call_time=0 - .cfi_startproc - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - mflr r0 - .cfi_register lr, r0 - PPC_STLU r1, -PPC_MIN_STKFRM(r1) - PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - get_datapage r5 - .ifeq \call_time - addi r5, r5, VDSO_DATA_OFFSET - .else - addi r4, r5, VDSO_DATA_OFFSET - .endif - bl DOTSYM(\funct) - PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) -#ifdef __powerpc64__ - PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) -#endif - .ifeq \call_time - cmpwi r3, 0 - .endif - mtlr r0 - .cfi_restore lr - addi r1, r1, 2 * PPC_MIN_STKFRM - crclr so - .ifeq \call_time - beqlr+ - crset so - neg r3, r3 - .endif - blr - .cfi_endproc -.endm - -#else - #include <asm/vdso/timebase.h> #include <asm/barrier.h> #include <asm/unistd.h> diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 397f290015bc..eb9c81e1c218 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -12,7 +12,49 @@ #include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> -#include <asm/vdso/gettimeofday.h> + +/* + * The macro sets two stack frames, one for the caller and one for the callee + * because there are no requirement for the caller to set a stack frame when + * calling VDSO so it may have omitted to set one, especially on PPC64 + */ + +.macro cvdso_call funct call_time=0 + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + mflr r0 + .cfi_register lr, r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + get_datapage r5 + .ifeq \call_time + addi r5, r5, VDSO_DATA_OFFSET + .else + addi r4, r5, VDSO_DATA_OFFSET + .endif + bl DOTSYM(\funct) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) +#endif + .ifeq \call_time + cmpwi r3, 0 + .endif + mtlr r0 + .cfi_restore lr + addi r1, r1, 2 * PPC_MIN_STKFRM + crclr so + .ifeq \call_time + beqlr+ + crset so + neg r3, r3 + .endif + blr + .cfi_endproc +.endm .text /* From 92e6dc257bd5771cf8db662e4d371fdb58fcbf43 Mon Sep 17 00:00:00 2001 From: Nathan Lynch <nathanl@linux.ibm.com> Date: Mon, 7 Feb 2022 16:12:47 -0600 Subject: [PATCH 066/179] powerpc/pseries: make pseries_devicetree_update() static pseries_devicetree_update() has only one call site, in the same file in which it is defined. Make it static. Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220207221247.354454-1-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 1 - arch/powerpc/platforms/pseries/mobility.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 82e5b055fa2a..00531af17ce0 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -274,7 +274,6 @@ extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal); #ifdef CONFIG_PPC_PSERIES extern time64_t last_rtas_event; extern int clobbering_unread_rtas_event(void); -extern int pseries_devicetree_update(s32 scope); extern void post_mobility_fixup(void); int rtas_syscall_dispatch_ibm_suspend_me(u64 handle); #else diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 85033f392c78..94077fa91959 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -265,7 +265,7 @@ static int add_dt_node(struct device_node *parent_dn, __be32 drc_index) return rc; } -int pseries_devicetree_update(s32 scope) +static int pseries_devicetree_update(s32 scope) { char *rtas_buf; __be32 *data; From 2504e5b9827f7fc76ed0e4593adc852ac7a19851 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani <riteshh@linux.ibm.com> Date: Mon, 13 Sep 2021 11:47:20 +0530 Subject: [PATCH 067/179] selftests/powerpc/copyloops: Add memmove_64 test While debugging an issue, we wanted to check whether the arch specific kernel memmove implementation is correct. This selftest could help test that. Suggested-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Suggested-by: Vaibhav Jain <vaibhav@linux.ibm.com> Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/57242c1fe7aba6b7f0fcd0490303bfd5f222ee00.1631512686.git.riteshh@linux.ibm.com --- .../selftests/powerpc/copyloops/.gitignore | 1 + .../selftests/powerpc/copyloops/Makefile | 9 ++- .../selftests/powerpc/copyloops/asm/ppc_asm.h | 1 + .../selftests/powerpc/copyloops/mem_64.S | 1 + .../powerpc/copyloops/memcpy_stubs.S | 8 +++ .../powerpc/copyloops/memmove_validate.c | 58 +++++++++++++++++++ 6 files changed, 77 insertions(+), 1 deletion(-) create mode 120000 tools/testing/selftests/powerpc/copyloops/mem_64.S create mode 100644 tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S create mode 100644 tools/testing/selftests/powerpc/copyloops/memmove_validate.c diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index 994b11af765c..7283e8b07b75 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -13,3 +13,4 @@ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 copy_mc_64 +memmove_64 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 3095b1f1c02b..77594e697f2f 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -13,7 +13,8 @@ TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ copyuser_p7_t0 copyuser_p7_t1 \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \ - copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 + copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 \ + memmove_64 EXTRA_SOURCES := validate.c ../harness.c stubs.S @@ -56,3 +57,9 @@ $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \ -D COPY_LOOP=test___copy_tofrom_user_base \ -D SELFTEST_CASE=$(subst copyuser_64_exc_t,,$(notdir $@)) \ -o $@ $^ + +$(OUTPUT)/memmove_64: mem_64.S memcpy_64.S memmove_validate.c ../harness.c \ + memcpy_stubs.S + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D TEST_MEMMOVE=test_memmove \ + -o $@ $^ diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h index 58c1cef3e399..003e1b3d9300 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h +++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h @@ -26,6 +26,7 @@ #define _GLOBAL(A) FUNC_START(test_ ## A) #define _GLOBAL_TOC(A) _GLOBAL(A) #define _GLOBAL_TOC_KASAN(A) _GLOBAL(A) +#define _GLOBAL_KASAN(A) _GLOBAL(A) #define PPC_MTOCRF(A, B) mtocrf A, B diff --git a/tools/testing/selftests/powerpc/copyloops/mem_64.S b/tools/testing/selftests/powerpc/copyloops/mem_64.S new file mode 120000 index 000000000000..db254c9a5f5c --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/mem_64.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/mem_64.S \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S b/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S new file mode 100644 index 000000000000..d9baa832fa49 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <asm/ppc_asm.h> + +FUNC_START(memcpy) + b test_memcpy + +FUNC_START(backwards_memcpy) + b test_backwards_memcpy diff --git a/tools/testing/selftests/powerpc/copyloops/memmove_validate.c b/tools/testing/selftests/powerpc/copyloops/memmove_validate.c new file mode 100644 index 000000000000..1a23218b5757 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/memmove_validate.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <malloc.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include "utils.h" + +void *TEST_MEMMOVE(const void *s1, const void *s2, size_t n); + +#define BUF_LEN 65536 +#define MAX_OFFSET 512 + +size_t max(size_t a, size_t b) +{ + if (a >= b) + return a; + return b; +} + +static int testcase_run(void) +{ + size_t i, src_off, dst_off, len; + + char *usermap = memalign(BUF_LEN, BUF_LEN); + char *kernelmap = memalign(BUF_LEN, BUF_LEN); + + assert(usermap != NULL); + assert(kernelmap != NULL); + + memset(usermap, 0, BUF_LEN); + memset(kernelmap, 0, BUF_LEN); + + for (i = 0; i < BUF_LEN; i++) { + usermap[i] = i & 0xff; + kernelmap[i] = i & 0xff; + } + + for (src_off = 0; src_off < MAX_OFFSET; src_off++) { + for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) { + for (len = 1; len < MAX_OFFSET - max(src_off, dst_off); len++) { + + memmove(usermap + dst_off, usermap + src_off, len); + TEST_MEMMOVE(kernelmap + dst_off, kernelmap + src_off, len); + if (memcmp(usermap, kernelmap, MAX_OFFSET) != 0) { + printf("memmove failed at %ld %ld %ld\n", + src_off, dst_off, len); + abort(); + } + } + } + } + return 0; +} + +int main(void) +{ + return test_harness(testcase_run, "memmove"); +} From 2354ad252b66695be02f4acd18e37bf6264f0464 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> Date: Fri, 11 Feb 2022 12:22:15 +0530 Subject: [PATCH 068/179] powerpc/mm: Update default hugetlb size early commit: d9c234005227 ("Do not depend on MAX_ORDER when grouping pages by mobility") introduced pageblock_order which will be used to group pages better. The kernel now groups pages based on the value of HPAGE_SHIFT. Hence HPAGE_SHIFT should be set before we call set_pageblock_order. set_pageblock_order happens early in the boot and default hugetlb page size should be initialized before that to compute the right pageblock_order value. Currently, default hugetlbe page size is set via arch_initcalls which happens late in the boot as shown via the below callstack: [c000000007383b10] [c000000001289328] hugetlbpage_init+0x2b8/0x2f8 [c000000007383bc0] [c0000000012749e4] do_one_initcall+0x14c/0x320 [c000000007383c90] [c00000000127505c] kernel_init_freeable+0x410/0x4e8 [c000000007383da0] [c000000000012664] kernel_init+0x30/0x15c [c000000007383e10] [c00000000000cf14] ret_from_kernel_thread+0x5c/0x64 and the pageblock_order initialization is done early during the boot. [c0000000018bfc80] [c0000000012ae120] set_pageblock_order+0x50/0x64 [c0000000018bfca0] [c0000000012b3d94] sparse_init+0x188/0x268 [c0000000018bfd60] [c000000001288bfc] initmem_init+0x28c/0x328 [c0000000018bfe50] [c00000000127b370] setup_arch+0x410/0x480 [c0000000018bfed0] [c00000000127401c] start_kernel+0xb8/0x934 [c0000000018bff90] [c00000000000d984] start_here_common+0x1c/0x98 delaying default hugetlb page size initialization implies the kernel will initialize pageblock_order to (MAX_ORDER - 1) which is not an optimal value for mobility grouping. IIUC we always had this issue. But it was not a problem for hash translation mode because (MAX_ORDER - 1) is the same as HUGETLB_PAGE_ORDER (8) in the case of hash (16MB). With radix, HUGETLB_PAGE_ORDER will be 5 (2M size) and hence pageblock_order should be 5 instead of 8. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220211065215.101767-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/hugetlb.h | 5 ++++- arch/powerpc/mm/book3s64/hugetlbpage.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 5 +---- arch/powerpc/mm/init_64.c | 4 ++++ 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 962708fa1017..6a1a1ac5743b 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -15,7 +15,7 @@ extern bool hugetlb_disabled; -void __init hugetlbpage_init_default(void); +void __init hugetlbpage_init_defaultsize(void); int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); @@ -76,6 +76,9 @@ static inline void __init gigantic_hugetlb_cma_reserve(void) { } +static inline void __init hugetlbpage_init_defaultsize(void) +{ +} #endif /* CONFIG_HUGETLB_PAGE */ #endif /* _ASM_POWERPC_HUGETLB_H */ diff --git a/arch/powerpc/mm/book3s64/hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c index ea8f83afb0ae..3bc0eb21b2a0 100644 --- a/arch/powerpc/mm/book3s64/hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hugetlbpage.c @@ -150,7 +150,7 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } -void __init hugetlbpage_init_default(void) +void __init hugetlbpage_init_defaultsize(void) { /* Set default large page size. Currently, we pick 16M or 1M * depending on what is available diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index ddead41e2194..b642a5a8668f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -664,10 +664,7 @@ static int __init hugetlbpage_init(void) configured = true; } - if (configured) { - if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) - hugetlbpage_init_default(); - } else + if (!configured) pr_info("Failed to initialize. Disabling HugeTLB"); return 0; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 35f46bf54281..83c0ee9fbf05 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -59,6 +59,7 @@ #include <asm/sections.h> #include <asm/iommu.h> #include <asm/vdso.h> +#include <asm/hugetlb.h> #include <mm/mmu_decl.h> @@ -513,6 +514,9 @@ void __init mmu_early_init_devtree(void) } else hash__early_init_devtree(); + if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) + hugetlbpage_init_defaultsize(); + if (!(cur_cpu_spec->mmu_features & MMU_FTR_HPTE_TABLE) && !(cur_cpu_spec->mmu_features & MMU_FTR_TYPE_RADIX)) panic("kernel does not support any MMU type offered by platform"); From 5a72345e6a78120368fcc841b570331b6c5a50da Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Fri, 11 Feb 2022 17:32:37 +1100 Subject: [PATCH 069/179] powerpc: Fix STACKTRACE=n build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our skiroot_defconfig doesn't enable FTRACE, and so doesn't get STACKTRACE enabled either. That leads to a build failure since commit 1614b2b11fab ("arch: Make ARCH_STACKWALK independent of STACKTRACE") made stacktrace.c build even when STACKTRACE=n. arch/powerpc/kernel/stacktrace.c: In function ‘handle_backtrace_ipi’: arch/powerpc/kernel/stacktrace.c:171:2: error: implicit declaration of function ‘nmi_cpu_backtrace’ 171 | nmi_cpu_backtrace(regs); | ^~~~~~~~~~~~~~~~~ arch/powerpc/kernel/stacktrace.c: In function ‘arch_trigger_cpumask_backtrace’: arch/powerpc/kernel/stacktrace.c:226:2: error: implicit declaration of function ‘nmi_trigger_cpumask_backtrace’ 226 | nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This happens because our headers haven't defined arch_trigger_cpumask_backtrace, which causes lib/nmi_backtrace.c not to build nmi_cpu_backtrace(). The code in question doesn't actually depend on STACKTRACE=y, that was just added because arch_trigger_cpumask_backtrace() lived in stacktrace.c for convenience. So drop the dependency on CONFIG_STACKTRACE, that causes lib/nmi_backtrace.c to build nmi_cpu_backtrace() etc. and fixes the build. Fixes: 1614b2b11fab ("arch: Make ARCH_STACKWALK independent of STACKTRACE") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220212111349.2806972-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 160abcb8e9fa..ea0e487f87b1 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -9,7 +9,7 @@ long soft_nmi_interrupt(struct pt_regs *regs); static inline void arch_touch_nmi_watchdog(void) {} #endif -#if defined(CONFIG_NMI_IPI) && defined(CONFIG_STACKTRACE) +#ifdef CONFIG_NMI_IPI extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace From 38a1756861b8fc2ea9afb93e231194c642a4e261 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Sun, 13 Feb 2022 10:02:41 +0100 Subject: [PATCH 070/179] powerpc: Don't allow the use of EMIT_BUG_ENTRY with BUGFLAG_WARNING Warnings in assembly must use EMIT_WARN_ENTRY in order to generate the necessary entry in exception table. Check in EMIT_BUG_ENTRY that flags don't include BUGFLAG_WARNING. This change avoids problems like the one fixed by commit fd1eaaaaa686 ("powerpc/64s: Use EMIT_WARN_ENTRY for SRR debug warnings"). Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/ddcb422102a37eb45f57694c7ef0ec6187964dff.1644742951.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/bug.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 02c08d1492f8..ecbae1832de3 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -11,7 +11,7 @@ #ifdef __ASSEMBLY__ #include <asm/asm-offsets.h> #ifdef CONFIG_DEBUG_BUGVERBOSE -.macro EMIT_BUG_ENTRY addr,file,line,flags +.macro __EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" 5001: .4byte \addr - 5001b, 5002f - 5001b .short \line, \flags @@ -22,7 +22,7 @@ .previous .endm #else -.macro EMIT_BUG_ENTRY addr,file,line,flags +.macro __EMIT_BUG_ENTRY addr,file,line,flags .section __bug_table,"aw" 5001: .4byte \addr - 5001b .short \flags @@ -33,7 +33,14 @@ .macro EMIT_WARN_ENTRY addr,file,line,flags EX_TABLE(\addr,\addr+4) - EMIT_BUG_ENTRY \addr,\file,\line,\flags + __EMIT_BUG_ENTRY \addr,\file,\line,\flags +.endm + +.macro EMIT_BUG_ENTRY addr,file,line,flags + .if \flags & 1 /* BUGFLAG_WARNING */ + .err /* Use EMIT_WARN_ENTRY for warnings */ + .endif + __EMIT_BUG_ENTRY \addr,\file,\line,\flags .endm #else /* !__ASSEMBLY__ */ From cb7356986db020c96f37532042fdae6706e81df7 Mon Sep 17 00:00:00 2001 From: Paul Menzel <pmenzel@molgen.mpg.de> Date: Mon, 14 Feb 2022 07:55:43 +0100 Subject: [PATCH 071/179] powerpc/boot: Add `otheros-too-big.bld` to .gitignore Currently, `git status` lists the file as untracked by git, so tell git to ignore it. Fixes: aa3bc365ee73 ("powerpc/ps3: Add check for otheros image size") Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de> Acked-by: Geoff Levand <geoff@infradead.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220214065543.198992-1-pmenzel@molgen.mpg.de --- arch/powerpc/boot/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index 1eee61b82341..a4716d138cfc 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -16,6 +16,7 @@ kernel-vmlinux.strip.c kernel-vmlinux.strip.gz mktree otheros.bld +otheros-too-big.bld uImage cuImage.* dtbImage.* From 34d8dac807f0ee3dc42ab45bdb284a3caf2b5ed1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 19:31:22 +0100 Subject: [PATCH 072/179] powerpc/ftrace: Also save r1 in ftrace_caller() Also save r1 in ftrace_caller() r1 is needed during unwinding when the function_graph tracer is active. Fixes: 830213786c49 ("powerpc/ftrace: directly call of function graph tracer by ftrace caller") Reported-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/ff535e86d3a69376a6d89168511d4e403835f18b.1644949750.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_mprofile.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 56da60e98327..8443902d5a05 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -173,6 +173,10 @@ _GLOBAL(ftrace_caller) beq ftrace_no_trace #endif + /* Save previous stack pointer (r1) */ + addi r8, r1, SWITCH_FRAME_SIZE + PPC_STL r8, GPR1(r1) + /* Get the _mcount() call site out of LR */ mflr r7 PPC_STL r7, _NIP(r1) From df45a55788286c541449d82ee09fef3ac5ff77a1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 19:31:23 +0100 Subject: [PATCH 073/179] powerpc/ftrace: Add recursion protection in prepare_ftrace_return() The function_graph_enter() does not provide any recursion protection. Add a protection in prepare_ftrace_return() in case function_graph_enter() calls something that gets function graph traced. Fixes: 830213786c49 ("powerpc/ftrace: directly call of function graph tracer by ftrace caller") Reported-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/74edf2ff0a60e66b0d9225a137100a86a0557032.1644949750.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 74a176e394ef..f21b8fbd418e 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -944,6 +944,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp) { unsigned long return_hooker; + int bit; if (unlikely(ftrace_graph_is_dead())) goto out; @@ -951,10 +952,16 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; + bit = ftrace_test_recursion_trylock(ip, parent); + if (bit < 0) + goto out; + return_hooker = ppc_function_entry(return_to_handler); if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp)) parent = return_hooker; + + ftrace_test_recursion_unlock(bit); out: return parent; } From fc75f87337983229b7355d6b77f30fb6e7f359ee Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 19:31:24 +0100 Subject: [PATCH 074/179] powerpc/ftrace: Have arch_ftrace_get_regs() return NULL unless FL_SAVE_REGS is set When FL_SAVE_REGS is not set we get here via ftrace_caller() which doesn't save all registers. ftrace_caller() explicitely clears regs.msr, so we can rely on it to know where we come from. We don't expect MSR register to be 0 at all when involving ftrace. Fixes: 40b035efe288 ("powerpc/ftrace: Implement CONFIG_DYNAMIC_FTRACE_WITH_ARGS") Reported-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/2f9a7e898c93cc7438ef5ccd47cb9c3a9c5b53ef.1644949750.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ftrace.h | 3 ++- arch/powerpc/kernel/trace/ftrace_mprofile.S | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 70b457097098..ff034ae4e472 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -30,7 +30,8 @@ struct ftrace_regs { static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { - return &fregs->regs; + /* We clear regs.msr in ftrace_call */ + return fregs->regs.msr ? &fregs->regs : NULL; } static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs, diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 8443902d5a05..eb077270ec2f 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -205,6 +205,10 @@ _GLOBAL(ftrace_caller) PPC_STL r0, _LINK(r1) mr r4, r0 + /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ + li r8, 0 + PPC_STL r8, _MSR(r1) + /* Load &pt_regs in r6 for call below */ addi r6, r1 ,STACK_FRAME_OVERHEAD From 76b372814b088aeb76f0f753d968c8aa6d297f2a Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 19:31:25 +0100 Subject: [PATCH 075/179] powerpc/ftrace: Style cleanup in ftrace_mprofile.S Add some line breaks to better match the file's style, add some space after comma and fix a couple of misplaced blanks. Suggested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/973506292d0c7b05c06530c8e11803ce38e5eda2.1644949750.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_mprofile.S | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index eb077270ec2f..89639e64acd1 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -87,8 +87,9 @@ _GLOBAL(ftrace_regs_caller) #endif #ifdef CONFIG_LIVEPATCH_64 - mr r14,r7 /* remember old NIP */ + mr r14, r7 /* remember old NIP */ #endif + /* Calculate ip from nip-4 into r3 for call below */ subi r3, r7, MCOUNT_INSN_SIZE @@ -102,7 +103,7 @@ _GLOBAL(ftrace_regs_caller) PPC_STL r11, _CCR(r1) /* Load &pt_regs in r6 for call below */ - addi r6, r1 ,STACK_FRAME_OVERHEAD + addi r6, r1, STACK_FRAME_OVERHEAD /* ftrace_call(r3, r4, r5, r6) */ .globl ftrace_regs_call @@ -113,6 +114,7 @@ ftrace_regs_call: /* Load ctr with the possibly modified NIP */ PPC_LL r3, _NIP(r1) mtctr r3 + #ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ #endif @@ -196,7 +198,7 @@ _GLOBAL(ftrace_caller) #ifdef CONFIG_LIVEPATCH_64 SAVE_GPR(14, r1) - mr r14,r7 /* remember old NIP */ + mr r14, r7 /* remember old NIP */ #endif /* Calculate ip from nip-4 into r3 for call below */ subi r3, r7, MCOUNT_INSN_SIZE @@ -210,7 +212,7 @@ _GLOBAL(ftrace_caller) PPC_STL r8, _MSR(r1) /* Load &pt_regs in r6 for call below */ - addi r6, r1 ,STACK_FRAME_OVERHEAD + addi r6, r1, STACK_FRAME_OVERHEAD /* ftrace_call(r3, r4, r5, r6) */ .globl ftrace_call @@ -220,6 +222,7 @@ ftrace_call: PPC_LL r3, _NIP(r1) mtctr r3 + #ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ REST_GPR(14, r1) @@ -244,6 +247,7 @@ ftrace_call: /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif + bctr /* jump after _mcount site */ #ifdef CONFIG_LIVEPATCH_64 From bbbca72352bb9484bc057c91a408332b35ee8f4c Mon Sep 17 00:00:00 2001 From: Vaibhav Jain <vaibhav@linux.ibm.com> Date: Tue, 25 Jan 2022 01:52:04 +0530 Subject: [PATCH 076/179] powerpc/papr_scm: Implement initial support for injecting smart errors Presently PAPR doesn't support injecting smart errors on an NVDIMM. This makes testing the NVDIMM health reporting functionality difficult as simulating NVDIMM health related events need a hacked up qemu version. To solve this problem this patch proposes simulating certain set of NVDIMM health related events in papr_scm. Specifically 'fatal' health state and 'dirty' shutdown state. These error can be injected via the user-space 'ndctl-inject-smart(1)' command. With the proposed patch and corresponding ndctl patches following command flow is expected: $ sudo ndctl list -DH -d nmem0 ... "health_state":"ok", "shutdown_state":"clean", ... # inject unsafe shutdown and fatal health error $ sudo ndctl inject-smart nmem0 -Uf ... "health_state":"fatal", "shutdown_state":"dirty", ... # uninject all errors $ sudo ndctl inject-smart nmem0 -N ... "health_state":"ok", "shutdown_state":"clean", ... The patch adds a new member 'health_bitmap_inject_mask' inside struct papr_scm_priv which is then bitwise ANDed to the health bitmap fetched from the hypervisor. The value for 'health_bitmap_inject_mask' is accessible from sysfs at nmemX/papr/health_bitmap_inject. A new PDSM named 'SMART_INJECT' is proposed that accepts newly introduced 'struct nd_papr_pdsm_smart_inject' as payload thats exchanged between libndctl and papr_scm to indicate the requested smart-error states. When the processing the PDSM 'SMART_INJECT', papr_pdsm_smart_inject() constructs a pair or 'inject_mask' and 'clear_mask' bitmaps from the payload and bit-blt it to the 'health_bitmap_inject_mask'. This ensures the after being fetched from the hypervisor, the health_bitmap reflects requested smart-error states. Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220124202204.1488346-1-vaibhav@linux.ibm.com --- Documentation/ABI/testing/sysfs-bus-papr-pmem | 12 +++ arch/powerpc/include/uapi/asm/papr_pdsm.h | 18 ++++ arch/powerpc/platforms/pseries/papr_scm.c | 90 ++++++++++++++++++- 3 files changed, 117 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem index 95254cec92bf..4ac0673901e7 100644 --- a/Documentation/ABI/testing/sysfs-bus-papr-pmem +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -61,3 +61,15 @@ Description: * "CchRHCnt" : Cache Read Hit Count * "CchWHCnt" : Cache Write Hit Count * "FastWCnt" : Fast Write Count + +What: /sys/bus/nd/devices/nmemX/papr/health_bitmap_inject +Date: Jan, 2022 +KernelVersion: v5.17 +Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev, +Description: + (RO) Reports the health bitmap inject bitmap that is applied to + bitmap received from PowerVM via the H_SCM_HEALTH. This is used + to forcibly set specific bits returned from Hcall. These is then + used to simulate various health or shutdown states for an nvdimm + and are set by user-space tools like ndctl by issuing a PAPR DSM. + diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 82488b1e7276..17439925045c 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -116,6 +116,22 @@ struct nd_papr_pdsm_health { }; }; +/* Flags for injecting specific smart errors */ +#define PDSM_SMART_INJECT_HEALTH_FATAL (1 << 0) +#define PDSM_SMART_INJECT_BAD_SHUTDOWN (1 << 1) + +struct nd_papr_pdsm_smart_inject { + union { + struct { + /* One or more of PDSM_SMART_INJECT_ */ + __u32 flags; + __u8 fatal_enable; + __u8 unsafe_shutdown_enable; + }; + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; + }; +}; + /* * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel * via 'nd_cmd_pkg.nd_command' member of the ioctl struct @@ -123,12 +139,14 @@ struct nd_papr_pdsm_health { enum papr_pdsm { PAPR_PDSM_MIN = 0x0, PAPR_PDSM_HEALTH, + PAPR_PDSM_SMART_INJECT, PAPR_PDSM_MAX, }; /* Maximal union that can hold all possible payload types */ union nd_pdsm_payload { struct nd_papr_pdsm_health health; + struct nd_papr_pdsm_smart_inject smart_inject; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; } __packed; diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f48e87ac89c9..20aafd387840 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -120,6 +120,10 @@ struct papr_scm_priv { /* length of the stat buffer as expected by phyp */ size_t stat_buffer_len; + + /* The bits which needs to be overridden */ + u64 health_bitmap_inject_mask; + }; static int papr_scm_pmem_flush(struct nd_region *nd_region, @@ -347,19 +351,29 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, static int __drc_pmem_query_health(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; + u64 bitmap = 0; long rc; /* issue the hcall */ rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); - if (rc != H_SUCCESS) { + if (rc == H_SUCCESS) + bitmap = ret[0] & ret[1]; + else if (rc == H_FUNCTION) + dev_info_once(&p->pdev->dev, + "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap"); + else { + dev_err(&p->pdev->dev, "Failed to query health information, Err:%ld\n", rc); return -ENXIO; } p->lasthealth_jiffies = jiffies; - p->health_bitmap = ret[0] & ret[1]; - + /* Allow injecting specific health bits via inject mask. */ + if (p->health_bitmap_inject_mask) + bitmap = (bitmap & ~p->health_bitmap_inject_mask) | + p->health_bitmap_inject_mask; + WRITE_ONCE(p->health_bitmap, bitmap); dev_dbg(&p->pdev->dev, "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", ret[0], ret[1]); @@ -669,6 +683,56 @@ out: return rc; } +/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */ +static int papr_pdsm_smart_inject(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc; + u32 supported_flags = 0; + u64 inject_mask = 0, clear_mask = 0; + u64 mask; + + /* Check for individual smart error flags and update inject/clear masks */ + if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) { + supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL; + if (payload->smart_inject.fatal_enable) + inject_mask |= PAPR_PMEM_HEALTH_FATAL; + else + clear_mask |= PAPR_PMEM_HEALTH_FATAL; + } + + if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) { + supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; + if (payload->smart_inject.unsafe_shutdown_enable) + inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; + else + clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; + } + + dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n", + inject_mask, clear_mask); + + /* Prevent concurrent access to dimm health bitmap related members */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + return rc; + + /* Use inject/clear masks to set health_bitmap_inject_mask */ + mask = READ_ONCE(p->health_bitmap_inject_mask); + mask = (mask & ~clear_mask) | inject_mask; + WRITE_ONCE(p->health_bitmap_inject_mask, mask); + + /* Invalidate cached health bitmap */ + p->lasthealth_jiffies = 0; + + mutex_unlock(&p->health_mutex); + + /* Return the supported flags back to userspace */ + payload->smart_inject.flags = supported_flags; + + return sizeof(struct nd_papr_pdsm_health); +} + /* * 'struct pdsm_cmd_desc' * Identifies supported PDSMs' expected length of in/out payloads @@ -702,6 +766,12 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { .size_out = sizeof(struct nd_papr_pdsm_health), .service = papr_pdsm_health, }, + + [PAPR_PDSM_SMART_INJECT] = { + .size_in = sizeof(struct nd_papr_pdsm_smart_inject), + .size_out = sizeof(struct nd_papr_pdsm_smart_inject), + .service = papr_pdsm_smart_inject, + }, /* Empty */ [PAPR_PDSM_MAX] = { .size_in = 0, @@ -838,6 +908,19 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t health_bitmap_inject_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + return sprintf(buf, "%#llx\n", + READ_ONCE(p->health_bitmap_inject_mask)); +} + +static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject); + static ssize_t perf_stats_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -952,6 +1035,7 @@ static struct attribute *papr_nd_attributes[] = { &dev_attr_flags.attr, &dev_attr_perf_stats.attr, &dev_attr_dirty_shutdown.attr, + &dev_attr_health_bitmap_inject.attr, NULL, }; From 81df21de8fb45d3a55d41da9c7f5724797d51ce6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:40:56 +0100 Subject: [PATCH 077/179] powerpc: Fix 'sparse' checking on PPC64le 'sparse' is architecture agnostic and knows nothing about ELF ABI version. Just like it gets arch and powerpc type and endian from Makefile, it also need to get _CALL_ELF from there, otherwise it won't set PPC64_ELF_ABI_v2 macro for PPC64le and won't check the correct code. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/ac1312f2451aa558bb2a8806b4d0aa2020f0c176.1644928018.git.christophe.leroy@csgroup.eu --- arch/powerpc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5f16ac1583c5..e499f6023783 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -213,7 +213,7 @@ CHECKFLAGS += -m$(BITS) -D__powerpc__ -D__powerpc$(BITS)__ ifdef CONFIG_CPU_BIG_ENDIAN CHECKFLAGS += -D__BIG_ENDIAN__ else -CHECKFLAGS += -D__LITTLE_ENDIAN__ +CHECKFLAGS += -D__LITTLE_ENDIAN__ -D_CALL_ELF=2 endif ifdef CONFIG_476FPE_ERR46 From 5b23cb8cc6b0aab0535253cc2aa362572bab7072 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:40:57 +0100 Subject: [PATCH 078/179] powerpc: Move and rename func_descr_t There are three architectures with function descriptors, try to have common names for the address they contain in order to refactor some functions into generic functions later. powerpc has 'entry' ia64 has 'ip' parisc has 'addr' Vote for 'addr' and update 'func_descr_t' accordingly. Move it in asm/elf.h to have it at the same place on all three architectures, remove the typedef which hides its real type, and change it to a smoother name 'struct func_desc'. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/529b2ba1d001e8f628ef0d30e8044c9b3d0a4921.1644928018.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/code-patching.h | 2 +- arch/powerpc/include/asm/elf.h | 6 ++++++ arch/powerpc/include/asm/types.h | 6 ------ arch/powerpc/kernel/signal_64.c | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index e26080539c31..409483b2d0ce 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -118,7 +118,7 @@ static inline unsigned long ppc_function_entry(void *func) * function's descriptor. The first entry in the descriptor is the * address of the function text. */ - return ((func_descr_t *)func)->entry; + return ((struct func_desc *)func)->addr; #else return (unsigned long)func; #endif diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index b8425e3cfd81..971589a21bc0 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -176,4 +176,10 @@ do { \ /* Relocate the kernel image to @final_address */ void relocate(unsigned long final_address); +struct func_desc { + unsigned long addr; + unsigned long toc; + unsigned long env; +}; + #endif /* _ASM_POWERPC_ELF_H */ diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h index f1630c553efe..97da77bc48c9 100644 --- a/arch/powerpc/include/asm/types.h +++ b/arch/powerpc/include/asm/types.h @@ -23,12 +23,6 @@ typedef __vector128 vector128; -typedef struct { - unsigned long entry; - unsigned long toc; - unsigned long env; -} func_descr_t; - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_TYPES_H */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index d1e1fc0acbea..73d483b07ff3 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -936,11 +936,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, * descriptor is the entry address of signal and the second * entry is the TOC value we need to use. */ - func_descr_t __user *funct_desc_ptr = - (func_descr_t __user *) ksig->ka.sa.sa_handler; + struct func_desc __user *ptr = + (struct func_desc __user *)ksig->ka.sa.sa_handler; - err |= get_user(regs->ctr, &funct_desc_ptr->entry); - err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); + err |= get_user(regs->ctr, &ptr->addr); + err |= get_user(regs->gpr[2], &ptr->toc); } /* enter the signal handler in native-endian mode */ From d3e32b997a4ca2e7be71cb770bcb2c000ee20b36 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:40:58 +0100 Subject: [PATCH 079/179] powerpc: Use 'struct func_desc' instead of 'struct ppc64_opd_entry' 'struct ppc64_opd_entry' is somehow redundant with 'struct func_desc', the later is more correct/complete as it includes the third field which is unused. So use 'struct func_desc' instead of 'struct ppc64_opd_entry' Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Kees Cook <keescook@chromium.org> Reviewed-by: Daniel Axtens <dja@axtens.net> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/34e76bac6cbe95a63ecd37df69fb7feb93b0ea7c.1644928018.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/sections.h | 4 ++-- arch/powerpc/kernel/module_64.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 38f79e42bf3c..baca39f4c6d3 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -61,10 +61,10 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) #undef dereference_function_descriptor static inline void *dereference_function_descriptor(void *ptr) { - struct ppc64_opd_entry *desc = ptr; + struct func_desc *desc = ptr; void *p; - if (!get_kernel_nofault(p, (void *)&desc->funcaddr)) + if (!get_kernel_nofault(p, (void *)&desc->addr)) ptr = p; return ptr; } diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 5d77d3f5fbb5..d2082f236bc1 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -64,19 +64,19 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) #else /* An address is address of the OPD entry, which contains address of fn. */ -typedef struct ppc64_opd_entry func_desc_t; +typedef struct func_desc func_desc_t; static func_desc_t func_desc(unsigned long addr) { - return *(struct ppc64_opd_entry *)addr; + return *(struct func_desc *)addr; } static unsigned long func_addr(unsigned long addr) { - return func_desc(addr).funcaddr; + return func_desc(addr).addr; } static unsigned long stub_func_addr(func_desc_t func) { - return func.funcaddr; + return func.addr; } static unsigned int local_entry_offset(const Elf64_Sym *sym) { @@ -187,7 +187,7 @@ static int relacmp(const void *_x, const void *_y) static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, const Elf64_Shdr *sechdrs) { - /* One extra reloc so it's always 0-funcaddr terminated */ + /* One extra reloc so it's always 0-addr terminated */ unsigned long relocs = 1; unsigned i; From 0a9c5ae279c963149df9a84588281d3d607f7a1f Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:40:59 +0100 Subject: [PATCH 080/179] powerpc: Remove 'struct ppc64_opd_entry' 'struct ppc64_opd_entry' doesn't belong to uapi/asm/elf.h It was initially in module_64.c and commit 2d291e902791 ("Fix compile failure with non modular builds") moved it into asm/elf.h But it was by mistake added outside of __KERNEL__ section, therefore commit c3617f72036c ("UAPI: (Scripted) Disintegrate arch/powerpc/include/asm") moved it to uapi/asm/elf.h Now that it is not used anymore by the kernel, remove it. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Kees Cook <keescook@chromium.org> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/c309ccee65ec2e3802df7a7fe761d0a298584809.1644928018.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/uapi/asm/elf.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 860c59291bfc..308857123a08 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -289,12 +289,4 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG]; /* Keep this the last entry. */ #define R_PPC64_NUM 253 -/* There's actually a third entry here, but it's unused */ -struct ppc64_opd_entry -{ - unsigned long funcaddr; - unsigned long r2; -}; - - #endif /* _UAPI_ASM_POWERPC_ELF_H */ From 2fd986377d546bedaf27e36554dc9090d272f15d Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:41:00 +0100 Subject: [PATCH 081/179] powerpc: Prepare func_desc_t for refactorisation In preparation of making func_desc_t generic, change the ELFv2 version to a struct containing 'addr' element. This allows using single helpers common to ELFv1 and ELFv2 and reduces the amount of #ifdef's Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Kees Cook <keescook@chromium.org> Acked-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/5c36105e08b27b98450535bff48d71b690c19739.1644928018.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/module_64.c | 36 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index d2082f236bc1..f81bab3eb8e9 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -33,19 +33,17 @@ #ifdef PPC64_ELF_ABI_v2 /* An address is simply the address of the function. */ -typedef unsigned long func_desc_t; +typedef struct { + unsigned long addr; +} func_desc_t; static func_desc_t func_desc(unsigned long addr) { - return addr; -} -static unsigned long func_addr(unsigned long addr) -{ - return addr; -} -static unsigned long stub_func_addr(func_desc_t func) -{ - return func; + func_desc_t desc = { + .addr = addr, + }; + + return desc; } /* PowerPC64 specific values for the Elf64_Sym st_other field. */ @@ -70,14 +68,6 @@ static func_desc_t func_desc(unsigned long addr) { return *(struct func_desc *)addr; } -static unsigned long func_addr(unsigned long addr) -{ - return func_desc(addr).addr; -} -static unsigned long stub_func_addr(func_desc_t func) -{ - return func.addr; -} static unsigned int local_entry_offset(const Elf64_Sym *sym) { return 0; @@ -93,6 +83,16 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr) } #endif +static unsigned long func_addr(unsigned long addr) +{ + return func_desc(addr).addr; +} + +static unsigned long stub_func_addr(func_desc_t func) +{ + return func.addr; +} + #define STUB_MAGIC 0x73747562 /* stub */ /* Like PPC32, we need little trampolines to do > 24-bit jumps (into From 41a88b45479da873bfc5d29ba1a545a780c5329a Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:41:01 +0100 Subject: [PATCH 082/179] ia64: Rename 'ip' to 'addr' in 'struct fdesc' There are three architectures with function descriptors, try to have common names for the address they contain in order to refactor some functions into generic functions later. powerpc has 'entry' ia64 has 'ip' parisc has 'addr' Vote for 'addr' and update 'struct fdesc' accordingly. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/65b73ac614e4c002c5819d40b42f6f426d2ee52b.1644928018.git.christophe.leroy@csgroup.eu --- arch/ia64/include/asm/elf.h | 2 +- arch/ia64/include/asm/sections.h | 2 +- arch/ia64/kernel/module.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h index 6629301a2620..2ef5f9966ad1 100644 --- a/arch/ia64/include/asm/elf.h +++ b/arch/ia64/include/asm/elf.h @@ -226,7 +226,7 @@ struct got_entry { * Layout of the Function Descriptor */ struct fdesc { - uint64_t ip; + uint64_t addr; uint64_t gp; }; diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index 3a033d2008b3..35f24e52149a 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct fdesc *desc = ptr; void *p; - if (!get_kernel_nofault(p, (void *)&desc->ip)) + if (!get_kernel_nofault(p, (void *)&desc->addr)) ptr = p; return ptr; } diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index 360f36b0eb3f..8f62cf97f691 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -602,15 +602,15 @@ get_fdesc (struct module *mod, uint64_t value, int *okp) return value; /* Look for existing function descriptor. */ - while (fdesc->ip) { - if (fdesc->ip == value) + while (fdesc->addr) { + if (fdesc->addr == value) return (uint64_t)fdesc; if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size) BUG(); } /* Create new one */ - fdesc->ip = value; + fdesc->addr = value; fdesc->gp = mod->arch.gp; return (uint64_t) fdesc; } From a257cacc38718c83cee003487e03197f237f5c3f Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:41:02 +0100 Subject: [PATCH 083/179] asm-generic: Define CONFIG_HAVE_FUNCTION_DESCRIPTORS Replace HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR by a config option named CONFIG_HAVE_FUNCTION_DESCRIPTORS and use it instead of 'dereference_function_descriptor' macro to know whether an arch has function descriptors. To limit churn in one of the following patches, use an #ifdef/#else construct with empty first part instead of an #ifndef in asm-generic/sections.h On powerpc, make sure the config option matches the ABI used by the compiler with a BUILD_BUG_ON() and add missing _CALL_ELF=2 when calling 'sparse' so that sparse sees the same piece of code as GCC. And include a helper to check whether an arch has function descriptors or not : have_function_descriptors() Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Kees Cook <keescook@chromium.org> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Acked-by: Helge Deller <deller@gmx.de> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/4a0f11fb0ea74a3197bc44dd7ba25e53a24fd03d.1644928018.git.christophe.leroy@csgroup.eu --- arch/Kconfig | 3 +++ arch/ia64/Kconfig | 1 + arch/ia64/include/asm/sections.h | 2 -- arch/parisc/Kconfig | 1 + arch/parisc/include/asm/sections.h | 2 -- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/sections.h | 2 -- arch/powerpc/kernel/ptrace/ptrace.c | 6 ++++++ include/asm-generic/sections.h | 8 +++++++- include/linux/kallsyms.h | 2 +- 10 files changed, 20 insertions(+), 8 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 678a80713b21..fe24174cb63c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -205,6 +205,9 @@ config HAVE_FUNCTION_ERROR_INJECTION config HAVE_NMI bool +config HAVE_FUNCTION_DESCRIPTORS + bool + config TRACE_IRQFLAGS_SUPPORT bool diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a7e01573abd8..da85c3b23b16 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -35,6 +35,7 @@ config IA64 select HAVE_SETUP_PER_CPU_AREA select TTY select HAVE_ARCH_TRACEHOOK + select HAVE_FUNCTION_DESCRIPTORS select HAVE_VIRT_CPU_ACCOUNTING select HUGETLB_PAGE_SIZE_VARIABLE if HUGETLB_PAGE select VIRT_TO_BUS diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index 35f24e52149a..2460d365a057 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -27,8 +27,6 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_b extern char __start_unwind[], __end_unwind[]; extern char __start_ivt_text[], __end_ivt_text[]; -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - #undef dereference_function_descriptor static inline void *dereference_function_descriptor(void *ptr) { diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 43c1c880def6..82e7ab1a9764 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -69,6 +69,7 @@ config PARISC select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS select TRACE_IRQFLAGS_SUPPORT + select HAVE_FUNCTION_DESCRIPTORS if 64BIT help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/include/asm/sections.h b/arch/parisc/include/asm/sections.h index bb52aea0cb21..c8092e4d94de 100644 --- a/arch/parisc/include/asm/sections.h +++ b/arch/parisc/include/asm/sections.h @@ -9,8 +9,6 @@ extern char __alt_instructions[], __alt_instructions_end[]; #ifdef CONFIG_64BIT -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - #undef dereference_function_descriptor void *dereference_function_descriptor(void *); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b779603978e1..a0c9cd0bbc85 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -202,6 +202,7 @@ config PPC select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_DESCRIPTORS if PPC64 && !CPU_LITTLE_ENDIAN select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index baca39f4c6d3..7728a7a146c3 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -56,8 +56,6 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) #ifdef PPC64_ELF_ABI_v1 -#define HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR 1 - #undef dereference_function_descriptor static inline void *dereference_function_descriptor(void *ptr) { diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index c43f77e2ac31..1212a812a7ab 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -445,4 +445,10 @@ void __init pt_regs_check(void) * real registers. */ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long)); + +#ifdef PPC64_ELF_ABI_v1 + BUILD_BUG_ON(!IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS)); +#else + BUILD_BUG_ON(IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS)); +#endif } diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 690f741764e1..3ef83e1aebee 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -59,11 +59,17 @@ extern char __noinstr_text_start[], __noinstr_text_end[]; extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ -#ifndef dereference_function_descriptor +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +#else #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) #endif +static inline bool have_function_descriptors(void) +{ + return IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS); +} + /** * memory_contains - checks if an object is contained within a memory region * @begin: virtual address of the beginning of the memory region diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 4176c7eca7b5..ce1bd2fbf23e 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -48,7 +48,7 @@ static inline int is_ksym_addr(unsigned long addr) static inline void *dereference_symbol_descriptor(void *ptr) { -#ifdef HAVE_DEREFERENCE_FUNCTION_DESCRIPTOR +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS struct module *mod; ptr = dereference_kernel_function_descriptor(ptr); From 0dc690e4ef5b901e9d4b53520854fbd5c749e09d Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:41:03 +0100 Subject: [PATCH 084/179] asm-generic: Define 'func_desc_t' to commonly describe function descriptors We have three architectures using function descriptors, each with its own type and name. Add a common typedef that can be used in generic code. Also add a stub typedef for architecture without function descriptors, to avoid a forest of #ifdefs. It replaces the similar 'func_desc_t' previously defined in arch/powerpc/kernel/module_64.c Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Kees Cook <keescook@chromium.org> Acked-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Helge Deller <deller@gmx.de> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/f1f91b142b3c1082bdc1586ce71c9bac1e75213c.1644928018.git.christophe.leroy@csgroup.eu --- arch/ia64/include/asm/sections.h | 3 +++ arch/parisc/include/asm/sections.h | 5 +++++ arch/powerpc/include/asm/sections.h | 4 ++++ arch/powerpc/kernel/module_64.c | 8 -------- include/asm-generic/sections.h | 5 +++++ 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index 2460d365a057..3abe0562b01a 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -9,6 +9,9 @@ #include <linux/elf.h> #include <linux/uaccess.h> + +typedef struct fdesc func_desc_t; + #include <asm-generic/sections.h> extern char __phys_per_cpu_start[]; diff --git a/arch/parisc/include/asm/sections.h b/arch/parisc/include/asm/sections.h index c8092e4d94de..ace1d4047a0b 100644 --- a/arch/parisc/include/asm/sections.h +++ b/arch/parisc/include/asm/sections.h @@ -2,6 +2,11 @@ #ifndef _PARISC_SECTIONS_H #define _PARISC_SECTIONS_H +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +#include <asm/elf.h> +typedef Elf64_Fdesc func_desc_t; +#endif + /* nothing to see, move along */ #include <asm-generic/sections.h> diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 7728a7a146c3..fddfb3937868 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -6,6 +6,10 @@ #include <linux/elf.h> #include <linux/uaccess.h> +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +typedef struct func_desc func_desc_t; +#endif + #include <asm-generic/sections.h> extern char __head_end[]; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index f81bab3eb8e9..0337b46424bc 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -32,11 +32,6 @@ #ifdef PPC64_ELF_ABI_v2 -/* An address is simply the address of the function. */ -typedef struct { - unsigned long addr; -} func_desc_t; - static func_desc_t func_desc(unsigned long addr) { func_desc_t desc = { @@ -61,9 +56,6 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) } #else -/* An address is address of the OPD entry, which contains address of fn. */ -typedef struct func_desc func_desc_t; - static func_desc_t func_desc(unsigned long addr) { return *(struct func_desc *)addr; diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 3ef83e1aebee..bbf97502470c 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -63,6 +63,11 @@ extern __visible const void __nosave_begin, __nosave_end; #else #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) + +/* An address is simply the address of the function. */ +typedef struct { + unsigned long addr; +} func_desc_t; #endif static inline bool have_function_descriptors(void) From e1478d8eaf27704db17a44dee4c53696ed01fc9c Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:41:04 +0100 Subject: [PATCH 085/179] asm-generic: Refactor dereference_[kernel]_function_descriptor() dereference_function_descriptor() and dereference_kernel_function_descriptor() are identical on the three architectures implementing them. Make them common and put them out-of-line in kernel/extable.c which is one of the users and has similar type of functions. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Kees Cook <keescook@chromium.org> Reviewed-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Helge Deller <deller@gmx.de> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/449db09b2eba57f4ab05f80102a67d8675bc8bcd.1644928018.git.christophe.leroy@csgroup.eu --- arch/ia64/include/asm/sections.h | 19 ------------------- arch/parisc/include/asm/sections.h | 9 --------- arch/parisc/kernel/process.c | 21 --------------------- arch/powerpc/include/asm/sections.h | 23 ----------------------- include/asm-generic/sections.h | 2 ++ kernel/extable.c | 23 ++++++++++++++++++++++- 6 files changed, 24 insertions(+), 73 deletions(-) diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index 3abe0562b01a..8e0875cf6071 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -30,23 +30,4 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_b extern char __start_unwind[], __end_unwind[]; extern char __start_ivt_text[], __end_ivt_text[]; -#undef dereference_function_descriptor -static inline void *dereference_function_descriptor(void *ptr) -{ - struct fdesc *desc = ptr; - void *p; - - if (!get_kernel_nofault(p, (void *)&desc->addr)) - ptr = p; - return ptr; -} - -#undef dereference_kernel_function_descriptor -static inline void *dereference_kernel_function_descriptor(void *ptr) -{ - if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) - return ptr; - return dereference_function_descriptor(ptr); -} - #endif /* _ASM_IA64_SECTIONS_H */ diff --git a/arch/parisc/include/asm/sections.h b/arch/parisc/include/asm/sections.h index ace1d4047a0b..33df42b5cc6d 100644 --- a/arch/parisc/include/asm/sections.h +++ b/arch/parisc/include/asm/sections.h @@ -12,13 +12,4 @@ typedef Elf64_Fdesc func_desc_t; extern char __alt_instructions[], __alt_instructions_end[]; -#ifdef CONFIG_64BIT - -#undef dereference_function_descriptor -void *dereference_function_descriptor(void *); - -#undef dereference_kernel_function_descriptor -void *dereference_kernel_function_descriptor(void *); -#endif - #endif diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index ea3d83b6fb62..2030c77592d3 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -263,27 +263,6 @@ __get_wchan(struct task_struct *p) return 0; } -#ifdef CONFIG_64BIT -void *dereference_function_descriptor(void *ptr) -{ - Elf64_Fdesc *desc = ptr; - void *p; - - if (!get_kernel_nofault(p, (void *)&desc->addr)) - ptr = p; - return ptr; -} - -void *dereference_kernel_function_descriptor(void *ptr) -{ - if (ptr < (void *)__start_opd || - ptr >= (void *)__end_opd) - return ptr; - - return dereference_function_descriptor(ptr); -} -#endif - static inline unsigned long brk_rnd(void) { return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT; diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index fddfb3937868..8be2c491c733 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -58,29 +58,6 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) (unsigned long)_stext < end; } -#ifdef PPC64_ELF_ABI_v1 - -#undef dereference_function_descriptor -static inline void *dereference_function_descriptor(void *ptr) -{ - struct func_desc *desc = ptr; - void *p; - - if (!get_kernel_nofault(p, (void *)&desc->addr)) - ptr = p; - return ptr; -} - -#undef dereference_kernel_function_descriptor -static inline void *dereference_kernel_function_descriptor(void *ptr) -{ - if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) - return ptr; - - return dereference_function_descriptor(ptr); -} -#endif /* PPC64_ELF_ABI_v1 */ - #endif #endif /* __KERNEL__ */ diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index bbf97502470c..d0f7bdd2fdf2 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -60,6 +60,8 @@ extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ #ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +void *dereference_function_descriptor(void *ptr); +void *dereference_kernel_function_descriptor(void *ptr); #else #define dereference_function_descriptor(p) ((void *)(p)) #define dereference_kernel_function_descriptor(p) ((void *)(p)) diff --git a/kernel/extable.c b/kernel/extable.c index b6f330f0fe74..394c39b86e38 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -3,6 +3,7 @@ Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM. */ +#include <linux/elf.h> #include <linux/ftrace.h> #include <linux/memory.h> #include <linux/extable.h> @@ -132,12 +133,32 @@ out: } /* - * On some architectures (PPC64, IA64) function pointers + * On some architectures (PPC64, IA64, PARISC) function pointers * are actually only tokens to some data that then holds the * real function address. As a result, to find if a function * pointer is part of the kernel text, we need to do some * special dereferencing first. */ +#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS +void *dereference_function_descriptor(void *ptr) +{ + func_desc_t *desc = ptr; + void *p; + + if (!get_kernel_nofault(p, (void *)&desc->addr)) + ptr = p; + return ptr; +} + +void *dereference_kernel_function_descriptor(void *ptr) +{ + if (ptr < (void *)__start_opd || ptr >= (void *)__end_opd) + return ptr; + + return dereference_function_descriptor(ptr); +} +#endif + int func_ptr_is_kernel_text(void *ptr) { unsigned long addr; From 69b420ed8fd3917ac7073256b4929aa246b6fe31 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:41:05 +0100 Subject: [PATCH 086/179] lkdtm: Force do_nothing() out of line LKDTM tests display that the run do_nothing() at a given address, but in reality do_nothing() is inlined into the caller. Force it out of line so that it really runs text at the displayed address. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Acked-by: Kees Cook <keescook@chromium.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/a5dcf4d2088e6aca47ab3b4c6d5c0f7fa064e25a.1644928018.git.christophe.leroy@csgroup.eu --- drivers/misc/lkdtm/perms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 2dede2ef658f..60b3b2fe929d 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -21,7 +21,7 @@ /* This is non-const, so it will end up in the .data section. */ static u8 data_area[EXEC_SIZE]; -/* This is cost, so it will end up in the .rodata section. */ +/* This is const, so it will end up in the .rodata section. */ static const unsigned long rodata = 0xAA55AA55; /* This is marked __ro_after_init, so it should ultimately be .rodata. */ @@ -31,7 +31,7 @@ static unsigned long ro_after_init __ro_after_init = 0x55AA5500; * This just returns to the caller. It is designed to be copied into * non-executable memory regions. */ -static void do_nothing(void) +static noinline void do_nothing(void) { return; } From b64913394f123e819bffabc79a0e48f98e78dc5d Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:41:06 +0100 Subject: [PATCH 087/179] lkdtm: Really write into kernel text in WRITE_KERN WRITE_KERN is supposed to overwrite some kernel text, namely do_overwritten() function. But at the time being it overwrites do_overwritten() function descriptor, not function text. Fix it by dereferencing the function descriptor to obtain function text pointer. Export dereference_function_descriptor() for when LKDTM is built as a module. And make do_overwritten() noinline so that it is really do_overwritten() which is called by lkdtm_WRITE_KERN(). Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Acked-by: Kees Cook <keescook@chromium.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/31e58eaffb5bc51c07d8d4891d1982100ade8cfc.1644928018.git.christophe.leroy@csgroup.eu --- drivers/misc/lkdtm/perms.c | 8 +++++--- kernel/extable.c | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 60b3b2fe929d..035fcca441f0 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -10,6 +10,7 @@ #include <linux/mman.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> +#include <asm/sections.h> /* Whether or not to fill the target memory area with do_nothing(). */ #define CODE_WRITE true @@ -37,7 +38,7 @@ static noinline void do_nothing(void) } /* Must immediately follow do_nothing for size calculuations to work out. */ -static void do_overwritten(void) +static noinline void do_overwritten(void) { pr_info("do_overwritten wasn't overwritten!\n"); return; @@ -113,8 +114,9 @@ void lkdtm_WRITE_KERN(void) size_t size; volatile unsigned char *ptr; - size = (unsigned long)do_overwritten - (unsigned long)do_nothing; - ptr = (unsigned char *)do_overwritten; + size = (unsigned long)dereference_function_descriptor(do_overwritten) - + (unsigned long)dereference_function_descriptor(do_nothing); + ptr = dereference_function_descriptor(do_overwritten); pr_info("attempting bad %zu byte write at %px\n", size, ptr); memcpy((void *)ptr, (unsigned char *)do_nothing, size); diff --git a/kernel/extable.c b/kernel/extable.c index 394c39b86e38..bda5e9761541 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -149,6 +149,7 @@ void *dereference_function_descriptor(void *ptr) ptr = p; return ptr; } +EXPORT_SYMBOL_GPL(dereference_function_descriptor); void *dereference_kernel_function_descriptor(void *ptr) { From 72a86433049dcfe918886645ac3d19c1eaaa67ab Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:41:07 +0100 Subject: [PATCH 088/179] lkdtm: Fix execute_[user]_location() execute_location() and execute_user_location() intent to copy do_nothing() text and execute it at a new location. However, at the time being it doesn't copy do_nothing() function but do_nothing() function descriptor which still points to the original text. So at the end it still executes do_nothing() at its original location allthough using a copied function descriptor. So, fix that by really copying do_nothing() text and build a new function descriptor by copying do_nothing() function descriptor and updating the target address with the new location. Also fix the displayed addresses by dereferencing do_nothing() function descriptor. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Acked-by: Kees Cook <keescook@chromium.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/4055839683d8d643cd99be121f4767c7c611b970.1644928018.git.christophe.leroy@csgroup.eu --- drivers/misc/lkdtm/perms.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 035fcca441f0..1cf24c4a79e9 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -44,19 +44,34 @@ static noinline void do_overwritten(void) return; } +static void *setup_function_descriptor(func_desc_t *fdesc, void *dst) +{ + if (!have_function_descriptors()) + return dst; + + memcpy(fdesc, do_nothing, sizeof(*fdesc)); + fdesc->addr = (unsigned long)dst; + barrier(); + + return fdesc; +} + static noinline void execute_location(void *dst, bool write) { - void (*func)(void) = dst; + void (*func)(void); + func_desc_t fdesc; + void *do_nothing_text = dereference_function_descriptor(do_nothing); - pr_info("attempting ok execution at %px\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing_text); do_nothing(); if (write == CODE_WRITE) { - memcpy(dst, do_nothing, EXEC_SIZE); + memcpy(dst, do_nothing_text, EXEC_SIZE); flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE); } - pr_info("attempting bad execution at %px\n", func); + pr_info("attempting bad execution at %px\n", dst); + func = setup_function_descriptor(&fdesc, dst); func(); pr_err("FAIL: func returned\n"); } @@ -66,16 +81,19 @@ static void execute_user_location(void *dst) int copied; /* Intentionally crossing kernel/user memory boundary. */ - void (*func)(void) = dst; + void (*func)(void); + func_desc_t fdesc; + void *do_nothing_text = dereference_function_descriptor(do_nothing); - pr_info("attempting ok execution at %px\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing_text); do_nothing(); - copied = access_process_vm(current, (unsigned long)dst, do_nothing, + copied = access_process_vm(current, (unsigned long)dst, do_nothing_text, EXEC_SIZE, FOLL_WRITE); if (copied < EXEC_SIZE) return; - pr_info("attempting bad execution at %px\n", func); + pr_info("attempting bad execution at %px\n", dst); + func = setup_function_descriptor(&fdesc, dst); func(); pr_err("FAIL: func returned\n"); } @@ -153,7 +171,8 @@ void lkdtm_EXEC_VMALLOC(void) void lkdtm_EXEC_RODATA(void) { - execute_location(lkdtm_rodata_do_nothing, CODE_AS_IS); + execute_location(dereference_function_descriptor(lkdtm_rodata_do_nothing), + CODE_AS_IS); } void lkdtm_EXEC_USERSPACE(void) From 5e5a6c5441654d1b9e576ce4ca8a1759e701079e Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 15 Feb 2022 13:41:08 +0100 Subject: [PATCH 089/179] lkdtm: Add a test for function descriptors protection Add WRITE_OPD to check that you can't modify function descriptors. Gives the following result when function descriptors are not protected: lkdtm: Performing direct entry WRITE_OPD lkdtm: attempting bad 16 bytes write at c00000000269b358 lkdtm: FAIL: survived bad write lkdtm: do_nothing was hijacked! Looks like a standard compiler barrier() is not enough to force GCC to use the modified function descriptor. Had to add a fake empty inline assembly to force GCC to reload the function descriptor. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Acked-by: Kees Cook <keescook@chromium.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/7eeba50d16a35e9d799820e43304150225f20197.1644928018.git.christophe.leroy@csgroup.eu --- drivers/misc/lkdtm/core.c | 1 + drivers/misc/lkdtm/lkdtm.h | 1 + drivers/misc/lkdtm/perms.c | 22 ++++++++++++++++++++++ tools/testing/selftests/lkdtm/tests.txt | 1 + 4 files changed, 25 insertions(+) diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index f69b964b9952..e2228b6fc09b 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -149,6 +149,7 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(WRITE_RO), CRASHTYPE(WRITE_RO_AFTER_INIT), CRASHTYPE(WRITE_KERN), + CRASHTYPE(WRITE_OPD), CRASHTYPE(REFCOUNT_INC_OVERFLOW), CRASHTYPE(REFCOUNT_ADD_OVERFLOW), CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW), diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index d6137c70ebbe..305fc2ec3f25 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -106,6 +106,7 @@ void __init lkdtm_perms_init(void); void lkdtm_WRITE_RO(void); void lkdtm_WRITE_RO_AFTER_INIT(void); void lkdtm_WRITE_KERN(void); +void lkdtm_WRITE_OPD(void); void lkdtm_EXEC_DATA(void); void lkdtm_EXEC_STACK(void); void lkdtm_EXEC_KMALLOC(void); diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 1cf24c4a79e9..2c6aba3ff32b 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -44,6 +44,11 @@ static noinline void do_overwritten(void) return; } +static noinline void do_almost_nothing(void) +{ + pr_info("do_nothing was hijacked!\n"); +} + static void *setup_function_descriptor(func_desc_t *fdesc, void *dst) { if (!have_function_descriptors()) @@ -144,6 +149,23 @@ void lkdtm_WRITE_KERN(void) do_overwritten(); } +void lkdtm_WRITE_OPD(void) +{ + size_t size = sizeof(func_desc_t); + void (*func)(void) = do_nothing; + + if (!have_function_descriptors()) { + pr_info("XFAIL: Platform doesn't use function descriptors.\n"); + return; + } + pr_info("attempting bad %zu bytes write at %px\n", size, do_nothing); + memcpy(do_nothing, do_almost_nothing, size); + pr_err("FAIL: survived bad write\n"); + + asm("" : "=m"(func)); + func(); +} + void lkdtm_EXEC_DATA(void) { execute_location(data_area, CODE_WRITE); diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 6b36b7f5dcf9..243c781f0780 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -44,6 +44,7 @@ ACCESS_NULL WRITE_RO WRITE_RO_AFTER_INIT WRITE_KERN +WRITE_OPD REFCOUNT_INC_OVERFLOW REFCOUNT_ADD_OVERFLOW REFCOUNT_INC_NOT_ZERO_OVERFLOW From 406a8c1d8fa59ae6a6462a6fb6ff892f6a4f7499 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Tue, 22 Feb 2022 15:05:30 +0100 Subject: [PATCH 090/179] powerpc: Remove remaining stab codes Following commit 12318163737c ("powerpc/32: Remove remaining .stabs annotations"), stabs code are not used anymore. Remove them. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/d8b33342d7454f6ca4f368f5206896558dfa06f4.1645538722.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 3c06a33b5da4..4dea2d963738 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -692,12 +692,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define evr30 30 #define evr31 31 -/* some stab codes */ -#define N_FUN 36 -#define N_RSYM 64 -#define N_SLINE 68 -#define N_SO 100 - #define RFSCV .long 0x4c0000a4 /* From 8b91cee5eadd2021f55e6775f2d50bd56d00c217 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Fri, 4 Feb 2022 13:53:48 +1000 Subject: [PATCH 091/179] powerpc/64s/hash: Make hash faults work in NMI context Hash faults are not resoved in NMI context, instead causing the access to fail. This is done because perf interrupts can get backtraces including walking the user stack, and taking a hash fault on those could deadlock on the HPTE lock if the perf interrupt hits while the same HPTE lock is being held by the hash fault code. The user-access for the stack walking will notice the access failed and deal with that in the perf code. The reason to allow perf interrupts in is to better profile hash faults. The problem with this is any hash fault on a kernel access that happens in NMI context will crash, because kernel accesses must not fail. Hard lockups, system reset, machine checks that access vmalloc space including modules and including stack backtracing and symbol lookup in modules, per-cpu data, etc could all run into this problem. Fix this by disallowing perf interrupts in the hash fault code (the direct hash fault is covered by MSR[EE]=0 so the PMI disable just needs to extend to the preload case). This simplifies the tricky logic in hash faults and perf, at the cost of reduced profiling of hash faults. perf can still latch addresses when interrupts are disabled, it just won't get the stack trace at that point, so it would still find hot spots, just sometimes with confusing stack chains. An alternative could be to allow perf interrupts here but always do the slowpath stack walk if we are in nmi context, but that slows down all perf interrupt stack walking on hash though and it does not remove as much tricky code. Reported-by: Laurent Dufour <ldufour@linux.ibm.com> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Tested-by: Laurent Dufour <ldufour@linux.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220204035348.545435-1-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 54 ++++----------------------- arch/powerpc/perf/callchain.h | 9 +---- arch/powerpc/perf/callchain_64.c | 27 -------------- 4 files changed, 10 insertions(+), 82 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index fc28f46d2f9d..5404f7abbcf8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -612,7 +612,7 @@ DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault); DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt); /* hash_utils.c */ -DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); +DECLARE_INTERRUPT_HANDLER(do_hash_fault); /* fault.c */ DECLARE_INTERRUPT_HANDLER(do_page_fault); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7abf82a698d3..985cabdd7f67 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1621,8 +1621,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -DECLARE_INTERRUPT_HANDLER(__do_hash_fault); -DEFINE_INTERRUPT_HANDLER(__do_hash_fault) +DEFINE_INTERRUPT_HANDLER(do_hash_fault) { unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; @@ -1681,35 +1680,6 @@ DEFINE_INTERRUPT_HANDLER(__do_hash_fault) } } -/* - * The _RAW interrupt entry checks for the in_nmi() case before - * running the full handler. - */ -DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) -{ - /* - * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then - * don't call hash_page, just fail the fault. This is required to - * prevent re-entrancy problems in the hash code, namely perf - * interrupts hitting while something holds H_PAGE_BUSY, and taking a - * hash fault. See the comment in hash_preload(). - * - * We come here as a result of a DSI at a point where we don't want - * to call hash_page, such as when we are accessing memory (possibly - * user memory) inside a PMU interrupt that occurred while interrupts - * were soft-disabled. We want to invoke the exception handler for - * the access, or panic if there isn't a handler. - */ - if (unlikely(in_nmi())) { - do_bad_page_fault_segv(regs); - return 0; - } - - __do_hash_fault(regs); - - return 0; -} - #ifdef CONFIG_PPC_MM_SLICES static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) { @@ -1776,26 +1746,18 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, #endif /* CONFIG_PPC_64K_PAGES */ /* - * __hash_page_* must run with interrupts off, as it sets the - * H_PAGE_BUSY bit. It's possible for perf interrupts to hit at any - * time and may take a hash fault reading the user stack, see - * read_user_stack_slow() in the powerpc/perf code. + * __hash_page_* must run with interrupts off, including PMI interrupts + * off, as it sets the H_PAGE_BUSY bit. * - * If that takes a hash fault on the same page as we lock here, it - * will bail out when seeing H_PAGE_BUSY set, and retry the access - * leading to an infinite loop. - * - * Disabling interrupts here does not prevent perf interrupts, but it - * will prevent them taking hash faults (see the NMI test in - * do_hash_page), then read_user_stack's copy_from_user_nofault will - * fail and perf will fall back to read_user_stack_slow(), which - * walks the Linux page tables. + * It's otherwise possible for perf interrupts to hit at any time and + * may take a hash fault reading the user stack, which could take a + * hash miss and deadlock on the same H_PAGE_BUSY bit. * * Interrupts must also be off for the duration of the * mm_is_thread_local test and update, to prevent preempt running the * mm on another CPU (XXX: this may be racy vs kthread_use_mm). */ - local_irq_save(flags); + powerpc_local_irq_pmu_save(flags); /* Is that local to this CPU ? */ if (mm_is_thread_local(mm)) @@ -1820,7 +1782,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea, mm_ctx_user_psize(&mm->context), pte_val(*ptep)); - local_irq_restore(flags); + powerpc_local_irq_pmu_restore(flags); } /* diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index d6fa6e25234f..19a8d051ddf1 100644 --- a/arch/powerpc/perf/callchain.h +++ b/arch/powerpc/perf/callchain.h @@ -2,7 +2,6 @@ #ifndef _POWERPC_PERF_CALLCHAIN_H #define _POWERPC_PERF_CALLCHAIN_H -int read_user_stack_slow(const void __user *ptr, void *buf, int nb); void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, @@ -26,17 +25,11 @@ static inline int __read_user_stack(const void __user *ptr, void *ret, size_t size) { unsigned long addr = (unsigned long)ptr; - int rc; if (addr > TASK_SIZE - size || (addr & (size - 1))) return -EFAULT; - rc = copy_from_user_nofault(ret, ptr, size); - - if (IS_ENABLED(CONFIG_PPC64) && !radix_enabled() && rc) - return read_user_stack_slow(ptr, ret, size); - - return rc; + return copy_from_user_nofault(ret, ptr, size); } #endif /* _POWERPC_PERF_CALLCHAIN_H */ diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 8d0df4226328..488e8a21a11e 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -18,33 +18,6 @@ #include "callchain.h" -/* - * On 64-bit we don't want to invoke hash_page on user addresses from - * interrupt context, so if the access faults, we read the page tables - * to find which page (if any) is mapped and access it directly. Radix - * has no need for this so it doesn't use read_user_stack_slow. - */ -int read_user_stack_slow(const void __user *ptr, void *buf, int nb) -{ - - unsigned long addr = (unsigned long) ptr; - unsigned long offset; - struct page *page; - void *kaddr; - - if (get_user_page_fast_only(addr, FOLL_WRITE, &page)) { - kaddr = page_address(page); - - /* align address to page boundary */ - offset = addr & ~PAGE_MASK; - - memcpy(buf, kaddr + offset, nb); - put_page(page); - return 0; - } - return -EFAULT; -} - static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret) { return __read_user_stack(ptr, ret, sizeof(*ret)); From 8a0edc72bec25fa62450bfef1a150483558e1289 Mon Sep 17 00:00:00 2001 From: Guo Zhengkui <guozhengkui@vivo.com> Date: Wed, 23 Feb 2022 15:54:23 +0800 Subject: [PATCH 092/179] powerpc/module_64: fix array_size.cocci warning Fix following coccicheck warning: ./arch/powerpc/kernel/module_64.c:432:40-41: WARNING: Use ARRAY_SIZE. ARRAY_SIZE(arr) is a macro provided by the kernel. It makes sure that arr is an array, so it's safer than sizeof(arr) / sizeof(arr[0]) and more standard. Signed-off-by: Guo Zhengkui <guozhengkui@vivo.com> Reviewed-by: Russell Currey <ruscur@russell.cc> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220223075426.20939-1-guozhengkui@vivo.com --- arch/powerpc/kernel/module_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 6a45e6ddbe58..94d14cf99bca 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -14,6 +14,7 @@ #include <linux/ftrace.h> #include <linux/bug.h> #include <linux/uaccess.h> +#include <linux/kernel.h> #include <asm/module.h> #include <asm/firmware.h> #include <asm/code-patching.h> @@ -429,7 +430,7 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, if (is_mprofile_ftrace_call(name)) return create_ftrace_stub(entry, addr, me); - for (i = 0; i < sizeof(ppc64_stub_insns) / sizeof(u32); i++) { + for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { if (patch_instruction(&entry->jump[i], ppc_inst(ppc64_stub_insns[i]))) return 0; From f961e20f15ed35e9ca154a099897d600b78b0311 Mon Sep 17 00:00:00 2001 From: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Date: Thu, 27 Jan 2022 12:49:53 +0530 Subject: [PATCH 093/179] selftests/powerpc/pmu: Include mmap_buffer field as part of struct event To enable the capturing of samples as part of perf event, add a new field "mmap_buffer" to "struct event". This field is a place-holder for sample collection Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-2-kjain@linux.ibm.com --- tools/testing/selftests/powerpc/pmu/event.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h index 302eaab51706..23d20340a160 100644 --- a/tools/testing/selftests/powerpc/pmu/event.h +++ b/tools/testing/selftests/powerpc/pmu/event.h @@ -22,6 +22,11 @@ struct event { u64 running; u64 enabled; } result; + /* + * mmap buffer used while recording sample. + * Accessed as "struct perf_event_mmap_page" + */ + void *mmap_buffer; }; void event_init(struct event *e, u64 config); From c315669e2fbd71bb9387066f60f0d91b0ceb28f3 Mon Sep 17 00:00:00 2001 From: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Date: Thu, 27 Jan 2022 12:49:54 +0530 Subject: [PATCH 094/179] selftests/powerpc/pmu: Add support for perf sampling tests Add support functions for enabling perf sampling test in a new folder "sampling_tests" under "selftests/powerpc/pmu". This includes support functions for allocating and processing the mmap buffer. These functions are added/defined in "sampling_tests/misc.*" files. Also updates the corresponding Makefiles in "selftests/powerpc" and "sampling_tests" folder. Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> [mpe: Drop unneeded bits from the Makefile] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-3-kjain@linux.ibm.com --- tools/testing/selftests/powerpc/pmu/Makefile | 11 +- .../powerpc/pmu/sampling_tests/Makefile | 7 ++ .../powerpc/pmu/sampling_tests/misc.c | 105 ++++++++++++++++++ .../powerpc/pmu/sampling_tests/misc.h | 9 ++ 4 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index 904672fb78dd..edbd96d3b2ab 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -8,7 +8,7 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c top_srcdir = ../../../../.. include ../../lib.mk -all: $(TEST_GEN_PROGS) ebb +all: $(TEST_GEN_PROGS) ebb sampling_tests $(TEST_GEN_PROGS): $(EXTRA_SOURCES) @@ -26,25 +26,32 @@ DEFAULT_RUN_TESTS := $(RUN_TESTS) override define RUN_TESTS $(DEFAULT_RUN_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests endef DEFAULT_EMIT_TESTS := $(EMIT_TESTS) override define EMIT_TESTS $(DEFAULT_EMIT_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests endef DEFAULT_INSTALL_RULE := $(INSTALL_RULE) override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install endef clean: $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean + TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean ebb: TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all -.PHONY: all run_tests clean ebb +sampling_tests: + TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all + +.PHONY: all run_tests clean ebb sampling_tests diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile new file mode 100644 index 000000000000..ac3d03ffc428 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -m64 + +top_srcdir = ../../../../../.. +include ../../../lib.mk + +$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c new file mode 100644 index 000000000000..4779b107f43b --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <unistd.h> +#include <sys/syscall.h> +#include <string.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <stdlib.h> +#include <ctype.h> + +#include "misc.h" + +#define PAGE_SIZE sysconf(_SC_PAGESIZE) + +/* + * Allocate mmap buffer of "mmap_pages" number of + * pages. + */ +void *event_sample_buf_mmap(int fd, int mmap_pages) +{ + size_t page_size = sysconf(_SC_PAGESIZE); + size_t mmap_size; + void *buff; + + if (mmap_pages <= 0) + return NULL; + + if (fd <= 0) + return NULL; + + mmap_size = page_size * (1 + mmap_pages); + buff = mmap(NULL, mmap_size, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (buff == MAP_FAILED) { + perror("mmap() failed."); + return NULL; + } + return buff; +} + +/* + * Post process the mmap buffer. + * - If sample_count != NULL then return count of total + * number of samples present in the mmap buffer. + * - If sample_count == NULL then return the address + * of first sample from the mmap buffer + */ +void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count) +{ + size_t page_size = sysconf(_SC_PAGESIZE); + struct perf_event_header *header = sample_buff + page_size; + struct perf_event_mmap_page *metadata_page = sample_buff; + unsigned long data_head, data_tail; + + /* + * PERF_RECORD_SAMPLE: + * struct { + * struct perf_event_header hdr; + * u64 data[]; + * }; + */ + + data_head = metadata_page->data_head; + /* sync memory before reading sample */ + mb(); + data_tail = metadata_page->data_tail; + + /* Check for sample_count */ + if (sample_count) + *sample_count = 0; + + while (1) { + /* + * Reads the mmap data buffer by moving + * the data_tail to know the last read data. + * data_head points to head in data buffer. + * refer "struct perf_event_mmap_page" in + * "include/uapi/linux/perf_event.h". + */ + if (data_head - data_tail < sizeof(header)) + return NULL; + + data_tail += sizeof(header); + if (header->type == PERF_RECORD_SAMPLE) { + *size = (header->size - sizeof(header)); + if (!sample_count) + return sample_buff + page_size + data_tail; + data_tail += *size; + *sample_count += 1; + } else { + *size = (header->size - sizeof(header)); + if ((metadata_page->data_tail + *size) > metadata_page->data_head) + data_tail = metadata_page->data_head; + else + data_tail += *size; + } + header = (struct perf_event_header *)((void *)header + header->size); + } + return NULL; +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h new file mode 100644 index 000000000000..291f9adba817 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include "../event.h" + +void *event_sample_buf_mmap(int fd, int mmap_pages); +void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); From 6523dce86222451e5ca2df8a46597a217084bfdf Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan <maddy@linux.ibm.com> Date: Thu, 27 Jan 2022 12:49:55 +0530 Subject: [PATCH 095/179] selftests/powerpc/pmu: Add macros to parse event codes Each platform has raw event encoding format which specifies the bit positions for different fields. The fields from event code gets translated into performance monitoring mode control register (MMCRx) settings. Patch add macros to extract individual fields from the event code. Add functions for sanity checks, since testcases currently are only supported in power9 and power10. Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> [mpe: Read PVR directly rather than using /proc/cpuinfo] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-4-kjain@linux.ibm.com --- tools/testing/selftests/powerpc/include/reg.h | 4 + .../powerpc/pmu/sampling_tests/misc.c | 132 ++++++++++++++++++ .../powerpc/pmu/sampling_tests/misc.h | 36 +++++ .../selftests/powerpc/security/spectre_v2.c | 2 - 4 files changed, 172 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index c0f2742a3a59..c422be8a42b2 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -52,6 +52,9 @@ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_TAR 0x32f /* Target Address Register */ +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) +#define SPRN_PVR 0x11F + #define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */ #define SPRN_DSCR 0x03 /* Data Stream Control Register */ #define SPRN_PPR 896 /* Program Priority Register */ @@ -84,6 +87,7 @@ #define TEXASR_ROT 0x0000000002000000 /* MSR register bits */ +#define MSR_HV (1ul << 60) /* Hypervisor state */ #define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */ #define MSR_TS_T_LG 34 /* Trans Mem state: Active */ diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index 4779b107f43b..a86d7d125955 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2022, Athira Rajeev, IBM Corp. + * Copyright 2022, Madhavan Srinivasan, IBM Corp. */ #include <unistd.h> @@ -16,6 +17,137 @@ #define PAGE_SIZE sysconf(_SC_PAGESIZE) +/* Storage for platform version */ +int pvr; +u64 platform_extended_mask; + +/* Mask and Shift for Event code fields */ +int ev_mask_pmcxsel, ev_shift_pmcxsel; //pmcxsel field +int ev_mask_marked, ev_shift_marked; //marked filed +int ev_mask_comb, ev_shift_comb; //combine field +int ev_mask_unit, ev_shift_unit; //unit field +int ev_mask_pmc, ev_shift_pmc; //pmc field +int ev_mask_cache, ev_shift_cache; //Cache sel field +int ev_mask_sample, ev_shift_sample; //Random sampling field +int ev_mask_thd_sel, ev_shift_thd_sel; //thresh_sel field +int ev_mask_thd_start, ev_shift_thd_start; //thresh_start field +int ev_mask_thd_stop, ev_shift_thd_stop; //thresh_stop field +int ev_mask_thd_cmp, ev_shift_thd_cmp; //thresh cmp field +int ev_mask_sm, ev_shift_sm; //SDAR mode field +int ev_mask_rsq, ev_shift_rsq; //radix scope qual field +int ev_mask_l2l3, ev_shift_l2l3; //l2l3 sel field +int ev_mask_mmcr3_src, ev_shift_mmcr3_src; //mmcr3 field + +static void init_ev_encodes(void) +{ + ev_mask_pmcxsel = 0xff; + ev_shift_pmcxsel = 0; + ev_mask_marked = 1; + ev_shift_marked = 8; + ev_mask_unit = 0xf; + ev_shift_unit = 12; + ev_mask_pmc = 0xf; + ev_shift_pmc = 16; + ev_mask_sample = 0x1f; + ev_shift_sample = 24; + ev_mask_thd_sel = 0x7; + ev_shift_thd_sel = 29; + ev_mask_thd_start = 0xf; + ev_shift_thd_start = 36; + ev_mask_thd_stop = 0xf; + ev_shift_thd_stop = 32; + + switch (pvr) { + case POWER10: + ev_mask_rsq = 1; + ev_shift_rsq = 9; + ev_mask_comb = 3; + ev_shift_comb = 10; + ev_mask_cache = 3; + ev_shift_cache = 20; + ev_mask_sm = 0x3; + ev_shift_sm = 22; + ev_mask_l2l3 = 0x1f; + ev_shift_l2l3 = 40; + ev_mask_mmcr3_src = 0x7fff; + ev_shift_mmcr3_src = 45; + break; + case POWER9: + ev_mask_comb = 3; + ev_shift_comb = 10; + ev_mask_cache = 0xf; + ev_shift_cache = 20; + ev_mask_thd_cmp = 0x3ff; + ev_shift_thd_cmp = 40; + ev_mask_sm = 0x3; + ev_shift_sm = 50; + break; + default: + FAIL_IF_EXIT(1); + } +} + +/* Return the extended regs mask value */ +static u64 perf_get_platform_reg_mask(void) +{ + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) + return PERF_POWER10_MASK; + if (have_hwcap2(PPC_FEATURE2_ARCH_3_00)) + return PERF_POWER9_MASK; + + return -1; +} + +int check_extended_regs_support(void) +{ + int fd; + struct event event; + + event_init(&event, 0x1001e); + + event.attr.type = 4; + event.attr.sample_period = 1; + event.attr.disabled = 1; + event.attr.sample_type = PERF_SAMPLE_REGS_INTR; + event.attr.sample_regs_intr = platform_extended_mask; + + fd = event_open(&event); + if (fd != -1) + return 0; + + return -1; +} + +int check_pvr_for_sampling_tests(void) +{ + pvr = PVR_VER(mfspr(SPRN_PVR)); + + platform_extended_mask = perf_get_platform_reg_mask(); + + /* + * Check for supported platforms + * for sampling test + */ + if ((pvr != POWER10) && (pvr != POWER9)) + goto out; + + /* + * Check PMU driver registered by looking for + * PPC_FEATURE2_EBB bit in AT_HWCAP2 + */ + if (!have_hwcap2(PPC_FEATURE2_EBB)) + goto out; + + /* check if platform supports extended regs */ + if (check_extended_regs_support()) + goto out; + + init_ev_encodes(); + return 0; +out: + printf("%s: Sampling tests un-supported\n", __func__); + return -1; +} /* * Allocate mmap buffer of "mmap_pages" number of * pages. diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index 291f9adba817..c8f64e8e749c 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -1,9 +1,45 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright 2022, Athira Rajeev, IBM Corp. + * Copyright 2022, Madhavan Srinivasan, IBM Corp. */ #include "../event.h" +#define POWER10 0x80 +#define POWER9 0x4e +#define PERF_POWER9_MASK 0x7f8ffffffffffff +#define PERF_POWER10_MASK 0x7ffffffffffffff + +extern int ev_mask_pmcxsel, ev_shift_pmcxsel; +extern int ev_mask_marked, ev_shift_marked; +extern int ev_mask_comb, ev_shift_comb; +extern int ev_mask_unit, ev_shift_unit; +extern int ev_mask_pmc, ev_shift_pmc; +extern int ev_mask_cache, ev_shift_cache; +extern int ev_mask_sample, ev_shift_sample; +extern int ev_mask_thd_sel, ev_shift_thd_sel; +extern int ev_mask_thd_start, ev_shift_thd_start; +extern int ev_mask_thd_stop, ev_shift_thd_stop; +extern int ev_mask_thd_cmp, ev_shift_thd_cmp; +extern int ev_mask_sm, ev_shift_sm; +extern int ev_mask_rsq, ev_shift_rsq; +extern int ev_mask_l2l3, ev_shift_l2l3; +extern int ev_mask_mmcr3_src, ev_shift_mmcr3_src; +extern int pvr; +extern u64 platform_extended_mask; +extern int check_pvr_for_sampling_tests(void); + +/* + * Event code field extraction macro. + * Raw event code is combination of multiple + * fields. Macro to extract individual fields + * + * x - Raw event code value + * y - Field to extract + */ +#define EV_CODE_EXTRACT(x, y) \ + ((x >> ev_shift_##y) & ev_mask_##y) + void *event_sample_buf_mmap(int fd, int mmap_pages); void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index 83647b8277e7..d42ca8c676c3 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -125,8 +125,6 @@ static enum spectre_v2_state get_sysfs_state(void) #define PM_BR_PRED_PCACHE 0x048a0 // P9 only #define PM_BR_MPRED_PCACHE 0x048b0 // P9 only -#define SPRN_PVR 287 - int spectre_v2_test(void) { enum spectre_v2_state state; From 5f6c3061af7ca3b0f9f8a20ec7a445671f7e6b5a Mon Sep 17 00:00:00 2001 From: Kajol Jain <kjain@linux.ibm.com> Date: Thu, 27 Jan 2022 12:49:56 +0530 Subject: [PATCH 096/179] selftests/powerpc/pmu: Add utility functions to post process the mmap buffer Add couple of basic utility functions to post process the mmap buffer. It includes function to read the total number of samples present in the mmap buffer and function to get the address of the first sample. Add function "get_intr_regs" which will return pointer to interrupt registers present in the sample, incase sample type PERF_SAMPLE_REGS_INTR is set. Add functions "get_reg_value" which can be used to read any interrupt register value from a given sample. Signed-off-by: Kajol Jain <kjain@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-5-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/misc.c | 175 ++++++++++++++++++ .../powerpc/pmu/sampling_tests/misc.h | 4 + 2 files changed, 179 insertions(+) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index a86d7d125955..fca054bbc094 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -2,6 +2,7 @@ /* * Copyright 2022, Athira Rajeev, IBM Corp. * Copyright 2022, Madhavan Srinivasan, IBM Corp. + * Copyright 2022, Kajol Jain, IBM Corp. */ #include <unistd.h> @@ -235,3 +236,177 @@ void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count) } return NULL; } + +int collect_samples(void *sample_buff) +{ + u64 sample_count; + size_t size = 0; + + __event_read_samples(sample_buff, &size, &sample_count); + return sample_count; +} + +static void *perf_read_first_sample(void *sample_buff, size_t *size) +{ + return __event_read_samples(sample_buff, size, NULL); +} + +u64 *get_intr_regs(struct event *event, void *sample_buff) +{ + u64 type = event->attr.sample_type; + u64 *intr_regs; + size_t size = 0; + + if ((type ^ PERF_SAMPLE_REGS_INTR)) + return NULL; + + intr_regs = (u64 *)perf_read_first_sample(sample_buff, &size); + if (!intr_regs) + return NULL; + + /* + * First entry in the sample buffer used to specify + * PERF_SAMPLE_REGS_ABI_64, skip perf regs abi to access + * interrupt registers. + */ + ++intr_regs; + + return intr_regs; +} + +static const unsigned int __perf_reg_mask(const char *register_name) +{ + if (!strcmp(register_name, "R0")) + return 0; + else if (!strcmp(register_name, "R1")) + return 1; + else if (!strcmp(register_name, "R2")) + return 2; + else if (!strcmp(register_name, "R3")) + return 3; + else if (!strcmp(register_name, "R4")) + return 4; + else if (!strcmp(register_name, "R5")) + return 5; + else if (!strcmp(register_name, "R6")) + return 6; + else if (!strcmp(register_name, "R7")) + return 7; + else if (!strcmp(register_name, "R8")) + return 8; + else if (!strcmp(register_name, "R9")) + return 9; + else if (!strcmp(register_name, "R10")) + return 10; + else if (!strcmp(register_name, "R11")) + return 11; + else if (!strcmp(register_name, "R12")) + return 12; + else if (!strcmp(register_name, "R13")) + return 13; + else if (!strcmp(register_name, "R14")) + return 14; + else if (!strcmp(register_name, "R15")) + return 15; + else if (!strcmp(register_name, "R16")) + return 16; + else if (!strcmp(register_name, "R17")) + return 17; + else if (!strcmp(register_name, "R18")) + return 18; + else if (!strcmp(register_name, "R19")) + return 19; + else if (!strcmp(register_name, "R20")) + return 20; + else if (!strcmp(register_name, "R21")) + return 21; + else if (!strcmp(register_name, "R22")) + return 22; + else if (!strcmp(register_name, "R23")) + return 23; + else if (!strcmp(register_name, "R24")) + return 24; + else if (!strcmp(register_name, "R25")) + return 25; + else if (!strcmp(register_name, "R26")) + return 26; + else if (!strcmp(register_name, "R27")) + return 27; + else if (!strcmp(register_name, "R28")) + return 28; + else if (!strcmp(register_name, "R29")) + return 29; + else if (!strcmp(register_name, "R30")) + return 30; + else if (!strcmp(register_name, "R31")) + return 31; + else if (!strcmp(register_name, "NIP")) + return 32; + else if (!strcmp(register_name, "MSR")) + return 33; + else if (!strcmp(register_name, "ORIG_R3")) + return 34; + else if (!strcmp(register_name, "CTR")) + return 35; + else if (!strcmp(register_name, "LINK")) + return 36; + else if (!strcmp(register_name, "XER")) + return 37; + else if (!strcmp(register_name, "CCR")) + return 38; + else if (!strcmp(register_name, "SOFTE")) + return 39; + else if (!strcmp(register_name, "TRAP")) + return 40; + else if (!strcmp(register_name, "DAR")) + return 41; + else if (!strcmp(register_name, "DSISR")) + return 42; + else if (!strcmp(register_name, "SIER")) + return 43; + else if (!strcmp(register_name, "MMCRA")) + return 44; + else if (!strcmp(register_name, "MMCR0")) + return 45; + else if (!strcmp(register_name, "MMCR1")) + return 46; + else if (!strcmp(register_name, "MMCR2")) + return 47; + else if (!strcmp(register_name, "MMCR3")) + return 48; + else if (!strcmp(register_name, "SIER2")) + return 49; + else if (!strcmp(register_name, "SIER3")) + return 50; + else if (!strcmp(register_name, "PMC1")) + return 51; + else if (!strcmp(register_name, "PMC2")) + return 52; + else if (!strcmp(register_name, "PMC3")) + return 53; + else if (!strcmp(register_name, "PMC4")) + return 54; + else if (!strcmp(register_name, "PMC5")) + return 55; + else if (!strcmp(register_name, "PMC6")) + return 56; + else if (!strcmp(register_name, "SDAR")) + return 57; + else if (!strcmp(register_name, "SIAR")) + return 58; + else + return -1; +} + +u64 get_reg_value(u64 *intr_regs, char *register_name) +{ + int register_bit_position; + + register_bit_position = __perf_reg_mask(register_name); + + if (register_bit_position < 0 || (!((platform_extended_mask >> + (register_bit_position - 1)) & 1))) + return -1; + + return *(intr_regs + register_bit_position); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index c8f64e8e749c..a8d67fcad9ae 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -2,6 +2,7 @@ /* * Copyright 2022, Athira Rajeev, IBM Corp. * Copyright 2022, Madhavan Srinivasan, IBM Corp. + * Copyright 2022, Kajol Jain, IBM Corp. */ #include "../event.h" @@ -43,3 +44,6 @@ extern int check_pvr_for_sampling_tests(void); void *event_sample_buf_mmap(int fd, int mmap_pages); void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); +int collect_samples(void *sample_buff); +u64 *get_intr_regs(struct event *event, void *sample_buff); +u64 get_reg_value(u64 *intr_regs, char *register_name); From 54d4ba7f22d1ed5bfbc915771cf2e3e147cf03b2 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan <maddy@linux.ibm.com> Date: Thu, 27 Jan 2022 12:49:57 +0530 Subject: [PATCH 097/179] selftests/powerpc/pmu: Add event_init_sampling function Extended event_init_opts() to include initialization of sampling testcases. Patch adds an event_init_sampling() wrapper to initialize event attribute fields for sampling events. This includes initializing sample period, sample type and event type. Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-6-kjain@linux.ibm.com --- tools/testing/selftests/powerpc/pmu/event.c | 19 ++++++++++++++++++- tools/testing/selftests/powerpc/pmu/event.h | 1 + 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/pmu/event.c b/tools/testing/selftests/powerpc/pmu/event.c index 48e3a413b15d..0c1c1bdba081 100644 --- a/tools/testing/selftests/powerpc/pmu/event.c +++ b/tools/testing/selftests/powerpc/pmu/event.c @@ -8,6 +8,7 @@ #include <sys/syscall.h> #include <string.h> #include <stdio.h> +#include <stdbool.h> #include <sys/ioctl.h> #include "event.h" @@ -20,7 +21,8 @@ int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, group_fd, flags); } -void event_init_opts(struct event *e, u64 config, int type, char *name) +static void __event_init_opts(struct event *e, u64 config, + int type, char *name, bool sampling) { memset(e, 0, sizeof(*e)); @@ -32,6 +34,16 @@ void event_init_opts(struct event *e, u64 config, int type, char *name) /* This has to match the structure layout in the header */ e->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | \ PERF_FORMAT_TOTAL_TIME_RUNNING; + if (sampling) { + e->attr.sample_period = 1000; + e->attr.sample_type = PERF_SAMPLE_REGS_INTR; + e->attr.disabled = 1; + } +} + +void event_init_opts(struct event *e, u64 config, int type, char *name) +{ + __event_init_opts(e, config, type, name, false); } void event_init_named(struct event *e, u64 config, char *name) @@ -44,6 +56,11 @@ void event_init(struct event *e, u64 config) event_init_opts(e, config, PERF_TYPE_RAW, "event"); } +void event_init_sampling(struct event *e, u64 config) +{ + __event_init_opts(e, config, PERF_TYPE_RAW, "event", true); +} + #define PERF_CURRENT_PID 0 #define PERF_NO_PID -1 #define PERF_NO_CPU -1 diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h index 23d20340a160..51aad0b6d9ad 100644 --- a/tools/testing/selftests/powerpc/pmu/event.h +++ b/tools/testing/selftests/powerpc/pmu/event.h @@ -32,6 +32,7 @@ struct event { void event_init(struct event *e, u64 config); void event_init_named(struct event *e, u64 config, char *name); void event_init_opts(struct event *e, u64 config, int type, char *name); +void event_init_sampling(struct event *e, u64 config); int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd); int event_open_with_group(struct event *e, int group_fd); int event_open_with_pid(struct event *e, pid_t pid); From 79c4e6aba8dfc9206acc68884498080f451121f7 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan <maddy@linux.ibm.com> Date: Thu, 27 Jan 2022 12:49:58 +0530 Subject: [PATCH 098/179] selftests/powerpc/pmu: Add macros to extract mmcr fields Along with it, Add macros and utility functions to fetch individual fields from Monitor Mode Control Register 2(MMCR2) register. Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-7-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/misc.h | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index a8d67fcad9ae..bac447e92cf6 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -47,3 +47,55 @@ void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); int collect_samples(void *sample_buff); u64 *get_intr_regs(struct event *event, void *sample_buff); u64 get_reg_value(u64 *intr_regs, char *register_name); + +static inline int get_mmcr2_fcs(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (63 - (((pmc) - 1) * 9)))) >> (63 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcp(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (62 - (((pmc) - 1) * 9)))) >> (62 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcpc(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (61 - (((pmc) - 1) * 9)))) >> (61 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcm1(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (60 - (((pmc) - 1) * 9)))) >> (60 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcm0(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (59 - (((pmc) - 1) * 9)))) >> (59 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcwait(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (58 - (((pmc) - 1) * 9)))) >> (58 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fch(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (57 - (((pmc) - 1) * 9)))) >> (57 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcti(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (56 - (((pmc) - 1) * 9)))) >> (56 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_fcta(u64 mmcr2, int pmc) +{ + return ((mmcr2 & (1ull << (55 - (((pmc) - 1) * 9)))) >> (55 - (((pmc) - 1) * 9))); +} + +static inline int get_mmcr2_l2l3(u64 mmcr2, int pmc) +{ + if (pvr == POWER10) + return ((mmcr2 & 0xf8) >> 3); + return 0; +} From 2b49e641063e7569493371ef433b9c4ce8c8dd8b Mon Sep 17 00:00:00 2001 From: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Date: Thu, 27 Jan 2022 12:49:59 +0530 Subject: [PATCH 099/179] selftests/powerpc/pmu: Add macro to extract mmcr0/mmcr1 fields Add macro and utility functions to fetch individual fields from Monitor Mode Control Register 0(MMCR0) and Monitor Mode Control Register 1(MMCR1) PMU register. Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-8-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/misc.h | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index bac447e92cf6..77690ca1dfc1 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -12,6 +12,10 @@ #define PERF_POWER9_MASK 0x7f8ffffffffffff #define PERF_POWER10_MASK 0x7ffffffffffffff +#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ +#define MMCR0_PMCCEXT 0x00000200UL /* PMCCEXT control */ +#define MMCR1_RSQ 0x200000000000ULL /* radix scope qual field */ + extern int ev_mask_pmcxsel, ev_shift_pmcxsel; extern int ev_mask_marked, ev_shift_marked; extern int ev_mask_comb, ev_shift_comb; @@ -48,6 +52,66 @@ int collect_samples(void *sample_buff); u64 *get_intr_regs(struct event *event, void *sample_buff); u64 get_reg_value(u64 *intr_regs, char *register_name); +static inline int get_mmcr0_fc56(u64 mmcr0, int pmc) +{ + return (mmcr0 & MMCR0_FC56); +} + +static inline int get_mmcr0_pmccext(u64 mmcr0, int pmc) +{ + return (mmcr0 & MMCR0_PMCCEXT); +} + +static inline int get_mmcr0_pmao(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 7) & 0x1); +} + +static inline int get_mmcr0_cc56run(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 8) & 0x1); +} + +static inline int get_mmcr0_pmcjce(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 14) & 0x1); +} + +static inline int get_mmcr0_pmc1ce(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 15) & 0x1); +} + +static inline int get_mmcr0_pmae(u64 mmcr0, int pmc) +{ + return ((mmcr0 >> 27) & 0x1); +} + +static inline int get_mmcr1_pmcxsel(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> ((24 - (((pmc) - 1) * 8))) & 0xff)); +} + +static inline int get_mmcr1_unit(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> ((60 - (4 * ((pmc) - 1))))) & 0xf); +} + +static inline int get_mmcr1_comb(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> (38 - ((pmc - 1) * 2))) & 0x3); +} + +static inline int get_mmcr1_cache(u64 mmcr1, int pmc) +{ + return ((mmcr1 >> 46) & 0x3); +} + +static inline int get_mmcr1_rsq(u64 mmcr1, int pmc) +{ + return mmcr1 & MMCR1_RSQ; +} + static inline int get_mmcr2_fcs(u64 mmcr2, int pmc) { return ((mmcr2 & (1ull << (63 - (((pmc) - 1) * 9)))) >> (63 - (((pmc) - 1) * 9))); From 13307f9584ea9408d9959302074dc4e8308b6cab Mon Sep 17 00:00:00 2001 From: Kajol Jain <kjain@linux.ibm.com> Date: Thu, 27 Jan 2022 12:50:00 +0530 Subject: [PATCH 100/179] selftests/powerpc/pmu: Add macro to extract mmcr3 and mmcra fields Add macro and utility functions to fetch individual fields from Monitor Mode Control Register 3(MMCR3)and Monitor Mode Control Register A(MMCRA) PMU registers Signed-off-by: Kajol Jain <kjain@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-9-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/misc.h | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index 77690ca1dfc1..7675f3177725 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -15,6 +15,7 @@ #define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ #define MMCR0_PMCCEXT 0x00000200UL /* PMCCEXT control */ #define MMCR1_RSQ 0x200000000000ULL /* radix scope qual field */ +#define BHRB_DISABLE 0x2000000000ULL /* MMCRA BHRB DISABLE bit */ extern int ev_mask_pmcxsel, ev_shift_pmcxsel; extern int ev_mask_marked, ev_shift_marked; @@ -163,3 +164,64 @@ static inline int get_mmcr2_l2l3(u64 mmcr2, int pmc) return ((mmcr2 & 0xf8) >> 3); return 0; } + +static inline int get_mmcr3_src(u64 mmcr3, int pmc) +{ + if (pvr != POWER10) + return 0; + return ((mmcr3 >> ((49 - (15 * ((pmc) - 1))))) & 0x7fff); +} + +static inline int get_mmcra_thd_cmp(u64 mmcra, int pmc) +{ + if (pvr == POWER10) + return ((mmcra >> 45) & 0x7ff); + return ((mmcra >> 45) & 0x3ff); +} + +static inline int get_mmcra_sm(u64 mmcra, int pmc) +{ + return ((mmcra >> 42) & 0x3); +} + +static inline int get_mmcra_bhrb_disable(u64 mmcra, int pmc) +{ + if (pvr == POWER10) + return mmcra & BHRB_DISABLE; + return 0; +} + +static inline int get_mmcra_ifm(u64 mmcra, int pmc) +{ + return ((mmcra >> 30) & 0x3); +} + +static inline int get_mmcra_thd_sel(u64 mmcra, int pmc) +{ + return ((mmcra >> 16) & 0x7); +} + +static inline int get_mmcra_thd_start(u64 mmcra, int pmc) +{ + return ((mmcra >> 12) & 0xf); +} + +static inline int get_mmcra_thd_stop(u64 mmcra, int pmc) +{ + return ((mmcra >> 8) & 0xf); +} + +static inline int get_mmcra_rand_samp_elig(u64 mmcra, int pmc) +{ + return ((mmcra >> 4) & 0x7); +} + +static inline int get_mmcra_sample_mode(u64 mmcra, int pmc) +{ + return ((mmcra >> 1) & 0x3); +} + +static inline int get_mmcra_marked(u64 mmcra, int pmc) +{ + return mmcra & 0x1; +} From eb7aa044df18c6f7a88bc17fc4c9f4524652a290 Mon Sep 17 00:00:00 2001 From: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Date: Thu, 27 Jan 2022 12:50:01 +0530 Subject: [PATCH 101/179] selftests/powerpc/pmu/: Add interface test for mmcr0 exception bits The testcase uses "instructions" event to verify two bits(PMAE and PMAO) in Monitor Mode Control Register 0 (MMCR0). At the time of interrupt, pmae bit ( which enables performance monitor exception ) is expected to be cleared and pmao (which indicates performance monitor alert) bit is expected to be set in MMCR0. And testcases handles these checks. Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-10-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 4 +- .../sampling_tests/mmcr0_exceptionbits_test.c | 59 +++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore new file mode 100644 index 000000000000..067b9f3a7f84 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -0,0 +1 @@ +mmcr0_exceptionbits_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index ac3d03ffc428..c81db8b553f6 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -1,7 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -m64 +TEST_GEN_PROGS := mmcr0_exceptionbits_test + top_srcdir = ../../../../../.. include ../../../lib.mk -$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h +$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c new file mode 100644 index 000000000000..982aa56d2171 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * fields : pmae, pmao. + */ +static int mmcr0_exceptionbits(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that pmae is cleared and pmao is set in MMCR0 */ + FAIL_IF(get_mmcr0_pmae(get_reg_value(intr_regs, "MMCR0"), 5)); + FAIL_IF(!get_mmcr0_pmao(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_exceptionbits, "mmcr0_exceptionbits"); +} From a7c0ab2e61484c0844eae2f208a06cc940338d83 Mon Sep 17 00:00:00 2001 From: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Date: Thu, 27 Jan 2022 12:50:02 +0530 Subject: [PATCH 102/179] selftests/powerpc/pmu/: Add interface test for mmcr0_cc56run field The testcase uses event code 0x500fa ("instructions") to check the CC56RUN bit setting in Monitor Mode Control Register 0(MMCR0). In ISA v3.1 platform, this bit is expected to be set in MMCR0 when using Performance Monitor Counter 5 and 6 (PMC5 and PMC6). Verify this is done correctly by perf interface. CC56RUN bit makes PMC5 and PMC6 count regardless of the run latch state. This bit is set in power10 since PMC5 and PMC6 is used in power10 for counting instructions and cycles. Hence added a check to skip this test in other platforms Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-11-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/mmcr0_cc56run_test.c | 59 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 067b9f3a7f84..641634e3bace 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -1 +1,2 @@ mmcr0_exceptionbits_test +mmcr0_cc56run_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index c81db8b553f6..fc0b39957522 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -m64 -TEST_GEN_PROGS := mmcr0_exceptionbits_test +TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c new file mode 100644 index 000000000000..ae4172f83817 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * field: cc56run. + */ +static int mmcr0_cc56run(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that cc56run bit is set in MMCR0 */ + FAIL_IF(!get_mmcr0_cc56run(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_cc56run, "mmcr0_cc56run"); +} From b24142b9d2401468bcd8df157013306d5b4f6626 Mon Sep 17 00:00:00 2001 From: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Date: Thu, 27 Jan 2022 12:50:03 +0530 Subject: [PATCH 103/179] selftests/powerpc/pmu/: Add interface test for mmcr0_pmccext bit The testcase uses cycles event to check the PMCCEXT bit setting in Monitor Mode Control Register 0 (MMCR0). Check if perf interface sets this control bit in MMCR0 on ISA v3.1 platform. Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-12-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/mmcr0_pmccext_test.c | 59 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 641634e3bace..991ed33eda20 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -1,2 +1,3 @@ mmcr0_exceptionbits_test mmcr0_cc56run_test +mmcr0_pmccext_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index fc0b39957522..7feaf5d387c2 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -m64 -TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test +TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c new file mode 100644 index 000000000000..dfd186cd8eec --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * field: pmccext + */ +static int mmcr0_pmccext(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x4001e); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that pmccext field is set in MMCR0 */ + FAIL_IF(!get_mmcr0_pmccext(get_reg_value(intr_regs, "MMCR0"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_pmccext, "mmcr0_pmccext"); +} From 9ac7c6d5e4b570f416d849b263a6f67a617b4fa5 Mon Sep 17 00:00:00 2001 From: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Date: Thu, 27 Jan 2022 12:50:04 +0530 Subject: [PATCH 104/179] selftests/powerpc/pmu/: Add interface test for mmcr0_pmcjce field The testcase uses event code 0x500fa ("instructions") to verify the PMCjCE bit setting in Monitor Mode Control Register 0 (MMCR0). This bit is expected to be set in MMCR0 when using Performance Monitor Counter 5 (PMC5). Checks if perf interface sets this bit correctly. Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-13-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 3 +- .../pmu/sampling_tests/mmcr0_pmcjce_test.c | 58 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 991ed33eda20..0bc68fed074d 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -1,3 +1,4 @@ mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test +mmcr0_pmcjce_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 7feaf5d387c2..28029c0f1399 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -m64 -TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test +TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ + mmcr0_pmcjce_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c new file mode 100644 index 000000000000..fdd8ed9bf725 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * field: pmcjce + */ +static int mmcr0_pmcjce(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that pmcjce field is set in MMCR0 */ + FAIL_IF(!get_mmcr0_pmcjce(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_pmcjce, "mmcr0_pmcjce"); +} From d5172f2585cd0fc9788aa9b25d3dce6483321792 Mon Sep 17 00:00:00 2001 From: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Date: Thu, 27 Jan 2022 12:50:05 +0530 Subject: [PATCH 105/179] selftests/powerpc/pmu/: Add interface test for mmcr0_fc56 field using pmc1 The testcase uses event code 0x1001e to verify two bit settings (FC5-6 and PMC1CE) in Monitor Mode Control Register 0 (MMCR0). Check if FC5-6 bit to be set in MMCR0 when not using Performance Monitor Counter 5 and 6 (PMC5 and PMC6). And also PMC1CE is expected to be set when using PMC1. Test if these fields are programmed correctly via perf interface. Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-14-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../sampling_tests/mmcr0_fc56_pmc1ce_test.c | 59 +++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 0bc68fed074d..3229f088f542 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -2,3 +2,4 @@ mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test mmcr0_pmcjce_test +mmcr0_fc56_pmc1ce_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 28029c0f1399..b6bc066e5047 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -2,7 +2,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ - mmcr0_pmcjce_test + mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c new file mode 100644 index 000000000000..1c1813c182c0 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * fields: fc56, pmc1ce. + */ +static int mmcr0_fc56_pmc1ce(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x1001e); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that fc56, pmc1ce fields are set in MMCR0 */ + FAIL_IF(!get_mmcr0_fc56(get_reg_value(intr_regs, "MMCR0"), 1)); + FAIL_IF(!get_mmcr0_pmc1ce(get_reg_value(intr_regs, "MMCR0"), 1)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_fc56_pmc1ce, "mmcr0_fc56_pmc1ce"); +} From 6e11374b08723b2c43ae83bd5d48000d695f13a0 Mon Sep 17 00:00:00 2001 From: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Date: Thu, 27 Jan 2022 12:50:06 +0530 Subject: [PATCH 106/179] selftests/powerpc/pmu/: Add interface test for mmcr0_pmc56 using pmc5 The testcase uses event code 0x500fa to verify the FC5-6 bit setting in Monitor Mode Control Register 0 (MMCR0). Check if FC5-6 bit is not set in MMCR0 when using Performance Monitor Counter 5 and 6 (PMC5 and PMC6). Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-15-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../sampling_tests/mmcr0_fc56_pmc56_test.c | 58 +++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 3229f088f542..1a3b7323acd1 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -3,3 +3,4 @@ mmcr0_cc56run_test mmcr0_pmccext_test mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test +mmcr0_fc56_pmc56_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index b6bc066e5047..790a7ff21a90 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -2,7 +2,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ - mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test + mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c new file mode 100644 index 000000000000..332d24b5ab9c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* + * A perf sampling test for mmcr0 + * fields: fc56_pmc56 + */ +static int mmcr0_fc56_pmc56(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x500fa); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that fc56 is not set in MMCR0 when using PMC5 */ + FAIL_IF(get_mmcr0_fc56(get_reg_value(intr_regs, "MMCR0"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr0_fc56_pmc56, "mmcr0_fc56_pmc56"); +} From 2becea3b6acf308642d6c0e9bd41caf7820753f5 Mon Sep 17 00:00:00 2001 From: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Date: Thu, 27 Jan 2022 12:50:07 +0530 Subject: [PATCH 107/179] selftests/powerpc/pmu/: Add interface test for mmcr1_comb field The testcase uses event code "0x26880" to verify the settings for different fields in Monitor Mode Control Register 1 (MMCR1). The field include PMCxCOMB. Checks if this field are translated correctly via perf interface to MMCR1 Add selftest for mmcr1 comb field. Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-16-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 3 +- .../pmu/sampling_tests/mmcr1_comb_test.c | 66 +++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 1a3b7323acd1..f0ad66d78ee0 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -4,3 +4,4 @@ mmcr0_pmccext_test mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test +mmcr1_comb_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 790a7ff21a90..da87ffddc568 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -2,7 +2,8 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ - mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test + mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ + mmcr1_comb_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c new file mode 100644 index 000000000000..5aea6499ee9a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define EventCode 0x46880 + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* + * A perf sampling test for mmcr1 + * fields : comb. + */ +static int mmcr1_comb(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop_with_ll_sc(10000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that comb field match with + * corresponding event code fields + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, comb) != + get_mmcr1_comb(get_reg_value(intr_regs, "MMCR1"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr1_comb, "mmcr1_comb"); +} From ac575b2606bf99a0d01a054196e24e1ad82c194d Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan <maddy@linux.ibm.com> Date: Thu, 27 Jan 2022 12:50:09 +0530 Subject: [PATCH 108/179] selftests/powerpc/pmu/: Add interface test for mmcr2_l2l3 field The testcases uses event code 0x010000046080 to verify the l2l3 bit setting for Monitor Mode Control Register 2 (MMCR2). check if this bit is set correctly via perf interface in ISA v3.1 platform. Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-18-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/mmcr2_l2l3_test.c | 74 +++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index f0ad66d78ee0..84bfc4d0e51c 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -5,3 +5,4 @@ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test mmcr1_comb_test +mmcr2_l2l3_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index da87ffddc568..ec4c758e91a9 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -3,7 +3,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ - mmcr1_comb_test + mmcr1_comb_test mmcr2_l2l3_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c new file mode 100644 index 000000000000..ceca597016b2 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Madhavan Srinivasan, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* All successful D-side store dispatches for this thread */ +#define EventCode 0x010000046080 + +#define MALLOC_SIZE (0x10000 * 10) /* Ought to be enough .. */ + +/* + * A perf sampling test for mmcr2 + * fields : l2l3 + */ +static int mmcr2_l2l3(void) +{ + struct event event; + u64 *intr_regs; + char *p; + int i; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + p = malloc(MALLOC_SIZE); + FAIL_IF(!p); + + for (i = 0; i < MALLOC_SIZE; i += 0x10000) + p[i] = i; + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that l2l3 field of MMCR2 match with + * corresponding event code field + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, l2l3) != + get_mmcr2_l2l3(get_reg_value(intr_regs, "MMCR2"), 4)); + + event_close(&event); + free(p); + + return 0; +} + +int main(void) +{ + return test_harness(mmcr2_l2l3, "mmcr2_l2l3"); +} From 9ee241f1b1447c7e8ca90902ab1888aa9e7a3c00 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan <maddy@linux.ibm.com> Date: Thu, 27 Jan 2022 12:50:10 +0530 Subject: [PATCH 109/179] selftests/powerpc/pmu/: Add interface test for mmcr2_fcs_fch fields The testcases uses cycles event to verify the freeze counter settings in Monitor Mode Control Register 2 (MMCR2). Event modifier (exclude_kernel) setting is used for the event attribute to check the FCxS and FCxH ( Freeze counter in privileged and hypervisor state ) settings via perf interface. Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com> [mpe: Add error checking, check MSR for MSR_HV, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-19-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../pmu/sampling_tests/mmcr2_fcs_fch_test.c | 85 +++++++++++++++++++ 3 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 84bfc4d0e51c..58d7551f4c2a 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -6,3 +6,4 @@ mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test mmcr1_comb_test mmcr2_l2l3_test +mmcr2_fcs_fch_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index ec4c758e91a9..b4271509fe70 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -3,7 +3,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ - mmcr1_comb_test mmcr2_l2l3_test + mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c new file mode 100644 index 000000000000..4e242fd61b25 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Madhavan Srinivasan, IBM Corp. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +static bool is_hv; + +static void sig_usr2_handler(int signum, siginfo_t *info, void *data) +{ + ucontext_t *uctx = data; + + is_hv = !!(uctx->uc_mcontext.gp_regs[PT_MSR] & MSR_HV); +} + +/* + * A perf sampling test for mmcr2 + * fields : fcs, fch. + */ +static int mmcr2_fcs_fch(void) +{ + struct sigaction sigact = { + .sa_sigaction = sig_usr2_handler, + .sa_flags = SA_SIGINFO + }; + struct event event; + u64 *intr_regs; + + FAIL_IF(sigaction(SIGUSR2, &sigact, NULL)); + FAIL_IF(kill(getpid(), SIGUSR2)); + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, 0x1001e); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.exclude_kernel = 1; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that fcs and fch field of MMCR2 match + * with corresponding modifier fields. + */ + if (is_hv) + FAIL_IF(event.attr.exclude_kernel != + get_mmcr2_fch(get_reg_value(intr_regs, "MMCR2"), 1)); + else + FAIL_IF(event.attr.exclude_kernel != + get_mmcr2_fcs(get_reg_value(intr_regs, "MMCR2"), 1)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr2_fcs_fch, "mmcr2_fcs_fch"); +} From 02f02feb6b50c67171fd56bc3fd0fd96118c5c12 Mon Sep 17 00:00:00 2001 From: Kajol Jain <kjain@linux.ibm.com> Date: Thu, 27 Jan 2022 12:50:11 +0530 Subject: [PATCH 110/179] selftests/powerpc/pmu/: Add interface test for mmcr3_src fields The testcase uses event code 0x1340000001c040 to verify the settings for different src fields in Monitor Mode Control Register 3 (MMCR3). Checks if these fields are translated correctly via perf interface to MMCR3 on ISA v3.1 platform. Signed-off-by: Kajol Jain <kjain@linux.ibm.com> [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-20-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 3 +- .../pmu/sampling_tests/mmcr3_src_test.c | 67 +++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 58d7551f4c2a..2969a9e9ba72 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -7,3 +7,4 @@ mmcr0_fc56_pmc56_test mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test +mmcr3_src_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index b4271509fe70..cd2704b173b3 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -3,7 +3,8 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ - mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test + mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ + mmcr3_src_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c new file mode 100644 index 000000000000..e154e2a4cc3a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define EventCode 0x1340000001c040 + +/* + * A perf sampling test for mmcr3 + * fields. + */ +static int mmcr3_src(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make event overflow */ + thirty_two_instruction_loop_with_ll_sc(1000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that src field of MMCR3 match with + * corresponding event code field + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, mmcr3_src) != + get_mmcr3_src(get_reg_value(intr_regs, "MMCR3"), 1)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcr3_src, "mmcr3_src"); +} From 29cf373c5766e6bd1b97056d2d678a41777669aa Mon Sep 17 00:00:00 2001 From: Kajol Jain <kjain@linux.ibm.com> Date: Thu, 27 Jan 2022 12:50:12 +0530 Subject: [PATCH 111/179] selftests/powerpc/pmu: Add interface test for mmcra register fields The testcase uses event code 0x35340401e0 to verify the settings for different fields in Monitor Mode Control Register A (MMCRA). The fields include thresh_start, thresh_stop thresh_select, sdar mode, sample and marked bit. Checks if these fields are translated correctly via perf interface to MMCRA. Signed-off-by: Kajol Jain <kjain@linux.ibm.com> [mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220127072012.662451-21-kjain@linux.ibm.com --- .../powerpc/pmu/sampling_tests/.gitignore | 1 + .../powerpc/pmu/sampling_tests/Makefile | 2 +- .../mmcra_thresh_marked_sample_test.c | 80 +++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore index 2969a9e9ba72..0fce5a694684 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore @@ -8,3 +8,4 @@ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test mmcr3_src_test +mmcra_thresh_marked_sample_test diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index cd2704b173b3..a785c6a173b9 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -4,7 +4,7 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ - mmcr3_src_test + mmcr3_src_test mmcra_thresh_marked_sample_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c new file mode 100644 index 000000000000..022cc1655eb5 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0) + * Threshold event selection used is issue to complete for cycles + * Sampling criteria is Load only sampling + */ +#define EventCode 0x35340401e0 + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* A perf sampling test to test mmcra fields */ +static int mmcra_thresh_marked_sample(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop_with_ll_sc(1000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that thresh sel/start/stop, marked, random sample + * eligibility, sdar mode and sample mode fields match with + * the corresponding event code fields + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_sel) != + get_mmcra_thd_sel(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_start) != + get_mmcra_thd_start(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_stop) != + get_mmcra_thd_stop(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, marked) != + get_mmcra_marked(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sample >> 2) != + get_mmcra_rand_samp_elig(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sample & 0x3) != + get_mmcra_sample_mode(get_reg_value(intr_regs, "MMCRA"), 4)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sm) != + get_mmcra_sm(get_reg_value(intr_regs, "MMCRA"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_thresh_marked_sample, "mmcra_thresh_marked_sample"); +} From 607451ce0aa9bdff590db4d087173edba6d7a29d Mon Sep 17 00:00:00 2001 From: Hari Bathini <hbathini@linux.ibm.com> Date: Tue, 1 Feb 2022 16:23:05 +0530 Subject: [PATCH 112/179] powerpc/fadump: register for fadump as early as possible Crash recovery (fadump) is setup in the userspace by some service. This service rebuilds initrd with dump capture capability, if it is not already dump capture capable before proceeding to register for firmware assisted dump (echo 1 > /sys/kernel/fadump/registered). But arming the kernel with crash recovery support does not have to wait for userspace configuration. So, register for fadump while setting it up itself. This can at worst lead to a scenario, where /proc/vmcore is ready afer crash but the initrd does not know how/where to offload it, which is always better than not having a /proc/vmcore at all due to incomplete configuration in the userspace at the time of crash. Commit 0823c68b054b ("powerpc/fadump: re-register firmware-assisted dump if already registered") ensures this change does not break userspace. Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> [mpe: Reword comment] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220201105305.155511-1-hbathini@linux.ibm.com --- arch/powerpc/kernel/fadump.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index d03e488cfe9c..cf92f3fb17d2 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1637,9 +1637,11 @@ int __init setup_fadump(void) if (fw_dump.ops->fadump_process(&fw_dump) < 0) fadump_invalidate_release_mem(); } - /* Initialize the kernel dump memory structure for FAD registration. */ - else if (fw_dump.reserve_dump_area_size) + /* Initialize the kernel dump memory structure and register with f/w */ + else if (fw_dump.reserve_dump_area_size) { fw_dump.ops->fadump_init_mem_struct(&fw_dump); + register_fadump(); + } /* * In case of panic, fadump is triggered via ppc_panic_event() @@ -1651,7 +1653,12 @@ int __init setup_fadump(void) return 1; } -subsys_initcall(setup_fadump); +/* + * Use subsys_initcall_sync() here because there is dependency with + * crash_save_vmcoreinfo_init(), which mush run first to ensure vmcoreinfo initialization + * is done before regisering with f/w. + */ +subsys_initcall_sync(setup_fadump); #else /* !CONFIG_PRESERVE_FA_DUMP */ /* Scan the Firmware Assisted dump configuration details. */ From 973e2e6462405d85d3e8bb02d516d5fe6d1193ed Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 25 Feb 2022 17:36:22 +0100 Subject: [PATCH 113/179] powerpc/interrupt: Remove struct interrupt_state Since commit ceff77efa4f8 ("powerpc/64e/interrupt: Use new interrupt context tracking scheme") struct interrupt_state has been empty and unused. Remove it. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/1d862ce3eab3da6ca7ac47d4a78a18f154462511.1645806970.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 32 +++++++++++----------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 5404f7abbcf8..f3b2c93a5db1 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -123,9 +123,6 @@ static inline void nap_adjust_return(struct pt_regs *regs) #endif } -struct interrupt_state { -}; - static inline void booke_restore_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -138,7 +135,7 @@ static inline void booke_restore_dbcr0(void) #endif } -static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_enter_prepare(struct pt_regs *regs) { #ifdef CONFIG_PPC32 if (!arch_irq_disabled_regs(regs)) @@ -228,17 +225,17 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup * However interrupt_nmi_exit_prepare does return directly to regs, because * NMIs do not do "exit work" or replay soft-masked interrupts. */ -static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_exit_prepare(struct pt_regs *regs) { } -static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_async_enter_prepare(struct pt_regs *regs) { #ifdef CONFIG_PPC64 /* Ensure interrupt_enter_prepare does not enable MSR[EE] */ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; #endif - interrupt_enter_prepare(regs, state); + interrupt_enter_prepare(regs); #ifdef CONFIG_PPC_BOOK3S_64 /* * RI=1 is set by interrupt_enter_prepare, so this thread flags access @@ -251,7 +248,7 @@ static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct in irq_enter(); } -static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) +static inline void interrupt_async_exit_prepare(struct pt_regs *regs) { /* * Adjust at exit so the main handler sees the true NIA. This must @@ -262,7 +259,7 @@ static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct int nap_adjust_return(regs); irq_exit(); - interrupt_exit_prepare(regs, state); + interrupt_exit_prepare(regs); } struct interrupt_nmi_state { @@ -447,13 +444,11 @@ static __always_inline void ____##func(struct pt_regs *regs); \ \ interrupt_handler void func(struct pt_regs *regs) \ { \ - struct interrupt_state state; \ - \ - interrupt_enter_prepare(regs, &state); \ + interrupt_enter_prepare(regs); \ \ ____##func (regs); \ \ - interrupt_exit_prepare(regs, &state); \ + interrupt_exit_prepare(regs); \ } \ NOKPROBE_SYMBOL(func); \ \ @@ -482,14 +477,13 @@ static __always_inline long ____##func(struct pt_regs *regs); \ \ interrupt_handler long func(struct pt_regs *regs) \ { \ - struct interrupt_state state; \ long ret; \ \ - interrupt_enter_prepare(regs, &state); \ + interrupt_enter_prepare(regs); \ \ ret = ____##func (regs); \ \ - interrupt_exit_prepare(regs, &state); \ + interrupt_exit_prepare(regs); \ \ return ret; \ } \ @@ -518,13 +512,11 @@ static __always_inline void ____##func(struct pt_regs *regs); \ \ interrupt_handler void func(struct pt_regs *regs) \ { \ - struct interrupt_state state; \ - \ - interrupt_async_enter_prepare(regs, &state); \ + interrupt_async_enter_prepare(regs); \ \ ____##func (regs); \ \ - interrupt_async_exit_prepare(regs, &state); \ + interrupt_async_exit_prepare(regs); \ } \ NOKPROBE_SYMBOL(func); \ \ From 749ed4a20657bcea66a6e082ca3dc0d228cbec80 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza <danielhb413@gmail.com> Date: Thu, 24 Feb 2022 15:23:12 -0300 Subject: [PATCH 114/179] powerpc/mm/numa: skip NUMA_NO_NODE onlining in parse_numa_properties() Executing node_set_online() when nid = NUMA_NO_NODE results in an undefined behavior. node_set_online() will call node_set_state(), into __node_set(), into set_bit(), and since NUMA_NO_NODE is -1 we'll end up doing a negative shift operation inside arch/powerpc/include/asm/bitops.h. This potential UB was detected running a kernel with CONFIG_UBSAN. The behavior was introduced by commit 10f78fd0dabb ("powerpc/numa: Fix a regression on memoryless node 0"), where the check for nid > 0 was removed to fix a problem that was happening with nid = 0, but the result is that now we're trying to online NUMA_NO_NODE nids as well. Checking for nid >= 0 will allow node 0 to be onlined while avoiding this UB with NUMA_NO_NODE. Fixes: 10f78fd0dabb ("powerpc/numa: Fix a regression on memoryless node 0") Reported-by: Ping Fang <pifang@redhat.com> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220224182312.1012527-1-danielhb413@gmail.com --- arch/powerpc/mm/numa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9d5f710d2c20..b9b7fefbb64b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -956,7 +956,9 @@ static int __init parse_numa_properties(void) of_node_put(cpu); } - node_set_online(nid); + /* node_set_online() is an UB if 'nid' is negative */ + if (likely(nid >= 0)) + node_set_online(nid); } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); From 2863dd2db23e0407f6c50b8ba5c0e55abef894f1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Tue, 15 Feb 2022 22:28:58 +1100 Subject: [PATCH 115/179] powerpc/Makefile: Don't pass -mcpu=powerpc64 when building 32-bit When CONFIG_GENERIC_CPU=y (true for all our defconfigs) we pass -mcpu=powerpc64 to the compiler, even when we're building a 32-bit kernel. This happens because we have an ifdef CONFIG_PPC_BOOK3S_64/else block in the Makefile that was written before 32-bit supported GENERIC_CPU. Prior to that the else block only applied to 64-bit Book3E. The GCC man page says -mcpu=powerpc64 "[specifies] a pure ... 64-bit big endian PowerPC ... architecture machine [type], with an appropriate, generic processor model assumed for scheduling purposes." It's unclear how that interacts with -m32, which we are also passing, although obviously -m32 is taking precedence in some sense, as the 32-bit kernel only contains 32-bit instructions. This was noticed by inspection, not via any bug reports, but it does affect code generation. Comparing before/after code generation, there are some changes to instruction scheduling, and the after case (with -mcpu=powerpc64 removed) the compiler seems more keen to use r8. Fix it by making the else case only apply to Book3E 64, which excludes 32-bit. Fixes: 0e00a8c9fd92 ("powerpc: Allow CPU selection also on PPC32") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220215112858.304779-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index ddc5a706760a..1e1ef4352f62 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -171,7 +171,7 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5)) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4) endif -else +else ifdef CONFIG_PPC_BOOK3E_64 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif From a633cb1edddaa643fadc70abc88f89a408fa834a Mon Sep 17 00:00:00 2001 From: Anders Roxell <anders.roxell@linaro.org> Date: Thu, 24 Feb 2022 17:22:13 +0100 Subject: [PATCH 116/179] powerpc/lib/sstep: Fix 'sthcx' instruction Looks like there been a copy paste mistake when added the instruction 'stbcx' twice and one was probably meant to be 'sthcx'. Changing to 'sthcx' from 'stbcx'. Fixes: 350779a29f11 ("powerpc: Handle most loads and stores in instruction emulation code") Cc: stable@vger.kernel.org # v4.14+ Reported-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Anders Roxell <anders.roxell@linaro.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220224162215.3406642-1-anders.roxell@linaro.org --- arch/powerpc/lib/sstep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index ca38d026fd88..31de2c5b586e 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -3373,7 +3373,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op->val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "sthcx.", cr); break; #endif case 4: From 8667d0d64dd1f84fd41b5897fd87fa9113ae05e3 Mon Sep 17 00:00:00 2001 From: Anders Roxell <anders.roxell@linaro.org> Date: Thu, 24 Feb 2022 17:22:14 +0100 Subject: [PATCH 117/179] powerpc: Fix build errors with newer binutils Building tinyconfig with gcc (Debian 11.2.0-16) and assembler (Debian 2.37.90.20220207) the following build error shows up: {standard input}: Assembler messages: {standard input}:1190: Error: unrecognized opcode: `stbcix' {standard input}:1433: Error: unrecognized opcode: `lwzcix' {standard input}:1453: Error: unrecognized opcode: `stbcix' {standard input}:1460: Error: unrecognized opcode: `stwcix' {standard input}:1596: Error: unrecognized opcode: `stbcix' ... Rework to add assembler directives [1] around the instruction. Going through them one by one shows that the changes should be safe. Like __get_user_atomic_128_aligned() is only called in p9_hmi_special_emu(), which according to the name is specific to power9. And __raw_rm_read*() are only called in things that are powernv or book3s_hv specific. [1] https://sourceware.org/binutils/docs/as/PowerPC_002dPseudo.html#PowerPC_002dPseudo Cc: stable@vger.kernel.org Co-developed-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Anders Roxell <anders.roxell@linaro.org> Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org> [mpe: Make commit subject more descriptive] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220224162215.3406642-2-anders.roxell@linaro.org --- arch/powerpc/include/asm/io.h | 40 ++++++++++++++++++++++------ arch/powerpc/include/asm/uaccess.h | 3 +++ arch/powerpc/platforms/powernv/rng.c | 6 ++++- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index beba4979bff9..fee979d3a1aa 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -359,25 +359,37 @@ static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr) */ static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stbcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stbcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr) { - __asm__ __volatile__("sthcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + sthcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stwcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stwcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stdcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stdcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } @@ -389,7 +401,10 @@ static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr) static inline u8 __raw_rm_readb(volatile void __iomem *paddr) { u8 ret; - __asm__ __volatile__("lbzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lbzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -397,7 +412,10 @@ static inline u8 __raw_rm_readb(volatile void __iomem *paddr) static inline u16 __raw_rm_readw(volatile void __iomem *paddr) { u16 ret; - __asm__ __volatile__("lhzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lhzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -405,7 +423,10 @@ static inline u16 __raw_rm_readw(volatile void __iomem *paddr) static inline u32 __raw_rm_readl(volatile void __iomem *paddr) { u32 ret; - __asm__ __volatile__("lwzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lwzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -413,7 +434,10 @@ static inline u32 __raw_rm_readl(volatile void __iomem *paddr) static inline u64 __raw_rm_readq(volatile void __iomem *paddr) { u64 ret; - __asm__ __volatile__("ldcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + ldcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 63316100080c..4a35423f766d 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -125,8 +125,11 @@ do { \ */ #define __get_user_atomic_128_aligned(kaddr, uaddr, err) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine altivec\n" \ "1: lvx 0,0,%1 # get user\n" \ " stvx 0,0,%2 # put kernel\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index b4386714494a..e3d44b36ae98 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -43,7 +43,11 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) unsigned long parity; /* Calculate the parity of the value */ - asm ("popcntd %0,%1" : "=r" (parity) : "r" (val)); + asm (".machine push; \ + .machine power7; \ + popcntd %0,%1; \ + .machine pop;" + : "=r" (parity) : "r" (val)); /* xor our value with the previous mask */ val ^= rng->mask; From 8219d31effa7be5dbc7ff915d7970672e028c701 Mon Sep 17 00:00:00 2001 From: Anders Roxell <anders.roxell@linaro.org> Date: Thu, 24 Feb 2022 17:22:15 +0100 Subject: [PATCH 118/179] powerpc/lib/sstep: Fix build errors with newer binutils Building tinyconfig with gcc (Debian 11.2.0-16) and assembler (Debian 2.37.90.20220207) the following build error shows up: {standard input}: Assembler messages: {standard input}:10576: Error: unrecognized opcode: `stbcx.' {standard input}:10680: Error: unrecognized opcode: `lharx' {standard input}:10694: Error: unrecognized opcode: `lbarx' Rework to add assembler directives [1] around the instruction. The problem with this might be that we can trick a power6 into single-stepping through an stbcx. for instance, and it will execute that in kernel mode. [1] https://sourceware.org/binutils/docs/as/PowerPC_002dPseudo.html#PowerPC_002dPseudo Fixes: 350779a29f11 ("powerpc: Handle most loads and stores in instruction emulation code") Cc: stable@vger.kernel.org # v4.14+ Co-developed-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Anders Roxell <anders.roxell@linaro.org> Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220224162215.3406642-3-anders.roxell@linaro.org --- arch/powerpc/lib/sstep.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 31de2c5b586e..ef6ea8e156ed 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1089,7 +1089,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: " op " %2,0,%3\n" \ + ".machine pop\n" \ " mfcr %1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ @@ -1102,7 +1105,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __get_user_asmx(x, addr, err, op) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: "op" %1,0,%2\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ From e40b38a41ce916d6a3a4751d59a01b6c0c03afd0 Mon Sep 17 00:00:00 2001 From: Nour-eddine Taleb <kernel.noureddine@gmail.com> Date: Thu, 3 Mar 2022 15:34:16 +0100 Subject: [PATCH 119/179] KVM: PPC: Book3S HV: remove unnecessary casts Remove unnecessary casts, from "void *" to "struct kvmppc_xics *" Signed-off-by: Nour-eddine Taleb <kernel.noureddine@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220303143416.201851-1-kernel.noureddine@gmail.com --- arch/powerpc/kvm/book3s_xics.c | 2 +- arch/powerpc/kvm/book3s_xive.c | 2 +- arch/powerpc/kvm/book3s_xive_native.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 306c85e70eea..ab6d37d78c62 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1431,7 +1431,7 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) static void kvmppc_xics_init(struct kvm_device *dev) { - struct kvmppc_xics *xics = (struct kvmppc_xics *)dev->private; + struct kvmppc_xics *xics = dev->private; xics_debugfs_init(xics); } diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 37a56cbb1701..c0ce5531d9bc 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -2362,7 +2362,7 @@ static void xive_debugfs_init(struct kvmppc_xive *xive) static void kvmppc_xive_init(struct kvm_device *dev) { - struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; + struct kvmppc_xive *xive = dev->private; /* Register some debug interfaces */ xive_debugfs_init(xive); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 3c2b128e5f0f..f81ba6f84e72 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1267,7 +1267,7 @@ static void xive_native_debugfs_init(struct kvmppc_xive *xive) static void kvmppc_xive_native_init(struct kvm_device *dev) { - struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; + struct kvmppc_xive *xive = dev->private; /* Register some debug interfaces */ xive_native_debugfs_init(xive); From 40562fe4fa3d94c7462ec909ab89b075e26c59ac Mon Sep 17 00:00:00 2001 From: Haren Myneni <haren@linux.ibm.com> Date: Mon, 28 Feb 2022 17:11:28 -0800 Subject: [PATCH 120/179] powerpc/pseries/vas: Use common names in VAS capability structure nr_total/nr_used_credits provides credits usage to user space via sysfs and the same interface can be used on PowerNV in future. Changed with proper naming so that applicable on both pseries and PowerNV. Signed-off-by: Haren Myneni <haren@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/f4313e9f198ee4f8d4fa4d015d8d1873e17851e6.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 10 +++++----- arch/powerpc/platforms/pseries/vas.h | 5 ++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index d243ddc58827..18aae037ffe9 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -310,8 +310,8 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, cop_feat_caps = &caps->caps; - if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > - atomic_read(&cop_feat_caps->target_lpar_creds)) { + if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > + atomic_read(&cop_feat_caps->nr_total_credits)) { pr_err("Credits are not available to allocate window\n"); rc = -EINVAL; goto out; @@ -385,7 +385,7 @@ out_free: free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); out: - atomic_dec(&cop_feat_caps->used_lpar_creds); + atomic_dec(&cop_feat_caps->nr_used_credits); kfree(txwin); return ERR_PTR(rc); } @@ -445,7 +445,7 @@ static int vas_deallocate_window(struct vas_window *vwin) } list_del(&win->win_list); - atomic_dec(&caps->used_lpar_creds); + atomic_dec(&caps->nr_used_credits); mutex_unlock(&vas_pseries_mutex); put_vas_user_win_ref(&vwin->task_ref); @@ -521,7 +521,7 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, } caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); - atomic_set(&caps->target_lpar_creds, + atomic_set(&caps->nr_total_credits, be16_to_cpu(hv_caps->target_lpar_creds)); if (feat == VAS_GZIP_DEF_FEAT) { caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 4ecb3fcabd10..d6ea8ab8b07a 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -72,9 +72,8 @@ struct vas_cop_feat_caps { }; /* Total LPAR available credits. Can be different from max LPAR */ /* credits due to DLPAR operation */ - atomic_t target_lpar_creds; - atomic_t used_lpar_creds; /* Used credits so far */ - u16 avail_lpar_creds; /* Remaining available credits */ + atomic_t nr_total_credits; /* Total credits assigned to LPAR */ + atomic_t nr_used_credits; /* Used credits so far */ }; /* From 976410cd2cb4c6ed53bd12c192fc46bbcc0fbce7 Mon Sep 17 00:00:00 2001 From: Haren Myneni <haren@linux.ibm.com> Date: Mon, 28 Feb 2022 17:12:04 -0800 Subject: [PATCH 121/179] powerpc/pseries/vas: Save PID in pseries_vas_window struct The kernel sets the VAS window with PID when it is opened in the hypervisor. During DLPAR operation, windows can be closed and reopened in the hypervisor when the credit is available. So saves this PID in pseries_vas_window struct when the window is opened initially and reuse it later during DLPAR operation. Signed-off-by: Haren Myneni <haren@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/a57cbe6d292fe49ad55a0b49c5679d6a24d8fe73.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 9 +++++---- arch/powerpc/platforms/pseries/vas.h | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 18aae037ffe9..1035446f985b 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -107,7 +107,6 @@ static int h_deallocate_vas_window(u64 winid) static int h_modify_vas_window(struct pseries_vas_window *win) { long rc; - u32 lpid = mfspr(SPRN_PID); /* * AMR value is not supported in Linux VAS implementation. @@ -115,7 +114,7 @@ static int h_modify_vas_window(struct pseries_vas_window *win) */ do { rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, - win->vas_win.winid, lpid, 0, + win->vas_win.winid, win->pid, 0, VAS_MOD_WIN_FLAGS, 0); rc = hcall_return_busy_check(rc); @@ -124,8 +123,8 @@ static int h_modify_vas_window(struct pseries_vas_window *win) if (rc == H_SUCCESS) return 0; - pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", - rc, win->vas_win.winid, lpid); + pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", + rc, win->vas_win.winid, win->pid); return -EIO; } @@ -338,6 +337,8 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, } } + txwin->pid = mfspr(SPRN_PID); + /* * Allocate / Deallocate window hcalls and setup / free IRQs * have to be protected with mutex. diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index d6ea8ab8b07a..2872532ed72a 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -114,6 +114,7 @@ struct pseries_vas_window { u64 domain[6]; /* Associativity domain Ids */ /* this window is allocated */ u64 util; + u32 pid; /* PID associated with this window */ /* List of windows opened which is used for LPM */ struct list_head win_list; From 1fe3a33ba0a37e7aa0df0acbe31d5dda7610c16e Mon Sep 17 00:00:00 2001 From: Haren Myneni <haren@linux.ibm.com> Date: Mon, 28 Feb 2022 17:12:41 -0800 Subject: [PATCH 122/179] powerpc/vas: Add paste address mmap fault handler The user space opens VAS windows and issues NX requests by pasting CRB on the corresponding paste address mmap. When the system lost credits due to core removal, the kernel has to close the window in the hypervisor and make the window inactive by unmapping this paste address. Also the OS has to handle NX request page faults if the user space issue NX requests. This handler maps the new paste address with the same VMA when the window is active again (due to core add with DLPAR). Otherwise returns paste failure. Signed-off-by: Haren Myneni <haren@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/3956e1c1fdfde69127055ff1c0256c7d71104030.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 10 ++++ arch/powerpc/platforms/book3s/vas-api.c | 68 +++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 57573d9c1e09..27251af18c65 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -29,6 +29,12 @@ #define VAS_THRESH_FIFO_GT_QTR_FULL 2 #define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3 +/* + * VAS window Linux status bits + */ +#define VAS_WIN_ACTIVE 0x0 /* Used in platform independent */ + /* vas mmap() */ + /* * Get/Set bit fields */ @@ -59,6 +65,9 @@ struct vas_user_win_ref { struct pid *pid; /* PID of owner */ struct pid *tgid; /* Thread group ID of owner */ struct mm_struct *mm; /* Linux process mm_struct */ + struct mutex mmap_mutex; /* protects paste address mmap() */ + /* with DLPAR close/open windows */ + struct vm_area_struct *vma; /* Save VMA and used in DLPAR ops */ }; /* @@ -67,6 +76,7 @@ struct vas_user_win_ref { struct vas_window { u32 winid; u32 wcreds_max; /* Window credits */ + u32 status; /* Window status used in OS */ enum vas_cop_type cop; struct vas_user_win_ref task_ref; char *dbgname; diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 4d82c92ddd52..217b4a624d09 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -316,6 +316,7 @@ static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) return PTR_ERR(txwin); } + mutex_init(&txwin->task_ref.mmap_mutex); cp_inst->txwin = txwin; return 0; @@ -350,6 +351,70 @@ static int coproc_release(struct inode *inode, struct file *fp) return 0; } +/* + * This fault handler is invoked when the core generates page fault on + * the paste address. Happens if the kernel closes window in hypervisor + * (on pseries) due to lost credit or the paste address is not mapped. + */ +static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct file *fp = vma->vm_file; + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + vm_fault_t fault; + u64 paste_addr; + + /* + * window is not opened. Shouldn't expect this error. + */ + if (!cp_inst || !cp_inst->txwin) { + pr_err("%s(): Unexpected fault on paste address with TX window closed\n", + __func__); + return VM_FAULT_SIGBUS; + } + + txwin = cp_inst->txwin; + /* + * When the LPAR lost credits due to core removal or during + * migration, invalidate the existing mapping for the current + * paste addresses and set windows in-active (zap_page_range in + * reconfig_close_windows()). + * New mapping will be done later after migration or new credits + * available. So continue to receive faults if the user space + * issue NX request. + */ + if (txwin->task_ref.vma != vmf->vma) { + pr_err("%s(): No previous mapping with paste address\n", + __func__); + return VM_FAULT_SIGBUS; + } + + mutex_lock(&txwin->task_ref.mmap_mutex); + /* + * The window may be inactive due to lost credit (Ex: core + * removal with DLPAR). If the window is active again when + * the credit is available, map the new paste address at the + * the window virtual address. + */ + if (txwin->status == VAS_WIN_ACTIVE) { + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); + if (paste_addr) { + fault = vmf_insert_pfn(vma, vma->vm_start, + (paste_addr >> PAGE_SHIFT)); + mutex_unlock(&txwin->task_ref.mmap_mutex); + return fault; + } + } + mutex_unlock(&txwin->task_ref.mmap_mutex); + + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct vas_vm_ops = { + .fault = vas_mmap_fault, +}; + static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) { struct coproc_instance *cp_inst = fp->private_data; @@ -398,6 +463,9 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, paste_addr, vma->vm_start, rc); + txwin->task_ref.vma = vma; + vma->vm_ops = &vas_vm_ops; + return rc; } From b5c63d90cc2de8ac6724fec84d1d72cfebcae41d Mon Sep 17 00:00:00 2001 From: Haren Myneni <haren@linux.ibm.com> Date: Mon, 28 Feb 2022 17:13:15 -0800 Subject: [PATCH 123/179] powerpc/vas: Return paste instruction failure if no active window The VAS window may not be active if the system looses credits and the NX generates page fault when it receives request on unmap paste address. The kernel handles the fault by remap new paste address if the window is active again, Otherwise return the paste instruction failure if the executed instruction that caused the fault was a paste. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Haren Myneni <haren@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/492b9aefd593061d51dda67ee4d2fc449c000dce.camel@linux.ibm.com --- arch/powerpc/include/asm/ppc-opcode.h | 2 + arch/powerpc/platforms/book3s/vas-api.c | 54 +++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 9675303b724e..82f1f0041c6f 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -262,6 +262,8 @@ #define PPC_INST_MFSPR_PVR 0x7c1f42a6 #define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe #define PPC_INST_MTMSRD 0x7c000164 +#define PPC_INST_PASTE 0x7c20070d +#define PPC_INST_PASTE_MASK 0xfc2007ff #define PPC_INST_POPCNTB 0x7c0000f4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_RFEBB 0x4c000124 diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 217b4a624d09..82f32781c5d2 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -351,6 +351,41 @@ static int coproc_release(struct inode *inode, struct file *fp) return 0; } +/* + * If the executed instruction that caused the fault was a paste, then + * clear regs CR0[EQ], advance NIP, and return 0. Else return error code. + */ +static int do_fail_paste(void) +{ + struct pt_regs *regs = current->thread.regs; + u32 instword; + + if (WARN_ON_ONCE(!regs)) + return -EINVAL; + + if (WARN_ON_ONCE(!user_mode(regs))) + return -EINVAL; + + /* + * If we couldn't translate the instruction, the driver should + * return success without handling the fault, it will be retried + * or the instruction fetch will fault. + */ + if (get_user(instword, (u32 __user *)(regs->nip))) + return -EAGAIN; + + /* + * Not a paste instruction, driver may fail the fault. + */ + if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE) + return -ENOENT; + + regs->ccr &= ~0xe0000000; /* Clear CR0[0-2] to fail paste */ + regs_add_return_ip(regs, 4); /* Emulate the paste */ + + return 0; +} + /* * This fault handler is invoked when the core generates page fault on * the paste address. Happens if the kernel closes window in hypervisor @@ -364,6 +399,7 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) struct vas_window *txwin; vm_fault_t fault; u64 paste_addr; + int ret; /* * window is not opened. Shouldn't expect this error. @@ -408,6 +444,24 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) } mutex_unlock(&txwin->task_ref.mmap_mutex); + /* + * Received this fault due to closing the actual window. + * It can happen during migration or lost credits. + * Since no mapping, return the paste instruction failure + * to the user space. + */ + ret = do_fail_paste(); + /* + * The user space can retry several times until success (needed + * for migration) or should fallback to SW compression or + * manage with the existing open windows if available. + * Looking at sysfs interface, it can determine whether these + * failures are coming during migration or core removal: + * nr_used_credits > nr_total_credits when lost credits + */ + if (!ret || (ret == -EAGAIN)) + return VM_FAULT_NOPAGE; + return VM_FAULT_SIGBUS; } From 6a8d4ca891aa5f9402973eab5d7d9cf3929678b7 Mon Sep 17 00:00:00 2001 From: Haren Myneni <haren@linux.ibm.com> Date: Mon, 28 Feb 2022 17:13:54 -0800 Subject: [PATCH 124/179] powerpc/vas: Map paste address only if window is active The paste address mapping is done with mmap() after the window is opened with ioctl. The partition has to close VAS windows in the hypervisor if it lost credits due to DLPAR core removal. But the kernel marks these windows inactive until the previously lost credits are available later. If the window is inactive due to DLPAR after this mmap(), the paste instruction returns failure until the the OS reopens this window again. Before the user space issuing mmap(), there is a possibility of happening DLPAR core removal event which causes the corresponding window inactive. So if the window is not active, return mmap() failure with -EACCES and expects the user space reissue mmap() when the window is active or open a new window when the credit is available. Signed-off-by: Haren Myneni <haren@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/bbb203c26b324534e25658cb1dbbcb5160a2f93a.camel@linux.ibm.com --- arch/powerpc/platforms/book3s/vas-api.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 82f32781c5d2..f9a1615b74da 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -497,10 +497,29 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EACCES; } + /* + * The initial mmap is done after the window is opened + * with ioctl. But before mmap(), this window can be closed in + * the hypervisor due to lost credit (core removal on pseries). + * So if the window is not active, return mmap() failure with + * -EACCES and expects the user space reissue mmap() when it + * is active again or open new window when the credit is available. + * mmap_mutex protects the paste address mmap() with DLPAR + * close/open event and allows mmap() only when the window is + * active. + */ + mutex_lock(&txwin->task_ref.mmap_mutex); + if (txwin->status != VAS_WIN_ACTIVE) { + pr_err("%s(): Window is not active\n", __func__); + rc = -EACCES; + goto out; + } + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); if (!paste_addr) { pr_err("%s(): Window paste address failed\n", __func__); - return -EINVAL; + rc = -EINVAL; + goto out; } pfn = paste_addr >> PAGE_SHIFT; @@ -520,6 +539,8 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) txwin->task_ref.vma = vma; vma->vm_ops = &vas_vm_ops; +out: + mutex_unlock(&txwin->task_ref.mmap_mutex); return rc; } From 8ef7b9e1765a52c8023d9133a2438ac9f6da486a Mon Sep 17 00:00:00 2001 From: Haren Myneni <haren@linux.ibm.com> Date: Mon, 28 Feb 2022 17:14:28 -0800 Subject: [PATCH 125/179] powerpc/pseries/vas: Close windows with DLPAR core removal The hypervisor assigns vas credits (windows) for each LPAR based on the number of cores configured in that system. The OS is expected to release credits when cores are removed, and may allocate more when cores are added. So there is a possibility of using excessive credits (windows) in the LPAR and the hypervisor expects the system to close the excessive windows so that NX load can be equally distributed across all LPARs in the system. When the OS closes the excessive windows in the hypervisor, it sets the window status inactive and invalidates window virtual address mapping. The user space receives paste instruction failure if any NX requests are issued on the inactive window. Then the user space can use with the available open windows or retry NX requests until this window active again. This patch also adds the notifier for core removal/add to close windows in the hypervisor if the system lost credits (core removal) and reopen windows in the hypervisor when the previously lost credits are available. Signed-off-by: Haren Myneni <haren@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/108928f9c00a48cc6a722315d482d07cf66acf5a.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 2 + arch/powerpc/platforms/pseries/vas.c | 207 +++++++++++++++++++++++++-- arch/powerpc/platforms/pseries/vas.h | 3 + 3 files changed, 204 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 27251af18c65..6baf7b9ffed4 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -34,6 +34,8 @@ */ #define VAS_WIN_ACTIVE 0x0 /* Used in platform independent */ /* vas mmap() */ +/* Window is closed in the hypervisor due to lost credit */ +#define VAS_WIN_NO_CRED_CLOSE 0x00000001 /* * Get/Set bit fields diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 1035446f985b..a297720bcdae 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -370,13 +370,28 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, if (rc) goto out_free; - vas_user_win_add_mm_context(&txwin->vas_win.task_ref); txwin->win_type = cop_feat_caps->win_type; mutex_lock(&vas_pseries_mutex); - list_add(&txwin->win_list, &caps->list); + /* + * Possible to lose the acquired credit with DLPAR core + * removal after the window is opened. So if there are any + * closed windows (means with lost credits), do not give new + * window to user space. New windows will be opened only + * after the existing windows are reopened when credits are + * available. + */ + if (!caps->nr_close_wins) { + list_add(&txwin->win_list, &caps->list); + caps->nr_open_windows++; + mutex_unlock(&vas_pseries_mutex); + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); + return &txwin->vas_win; + } mutex_unlock(&vas_pseries_mutex); - return &txwin->vas_win; + put_vas_user_win_ref(&txwin->vas_win.task_ref); + rc = -EBUSY; + pr_err("No credit is available to allocate window\n"); out_free: /* @@ -439,14 +454,24 @@ static int vas_deallocate_window(struct vas_window *vwin) caps = &vascaps[win->win_type].caps; mutex_lock(&vas_pseries_mutex); - rc = deallocate_free_window(win); - if (rc) { - mutex_unlock(&vas_pseries_mutex); - return rc; - } + /* + * VAS window is already closed in the hypervisor when + * lost the credit. So just remove the entry from + * the list, remove task references and free vas_window + * struct. + */ + if (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) { + rc = deallocate_free_window(win); + if (rc) { + mutex_unlock(&vas_pseries_mutex); + return rc; + } + } else + vascaps[win->win_type].nr_close_wins--; list_del(&win->win_list); atomic_dec(&caps->nr_used_credits); + vascaps[win->win_type].nr_open_windows--; mutex_unlock(&vas_pseries_mutex); put_vas_user_win_ref(&vwin->task_ref); @@ -501,6 +526,7 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, memset(vcaps, 0, sizeof(*vcaps)); INIT_LIST_HEAD(&vcaps->list); + vcaps->feat = feat; caps = &vcaps->caps; rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, @@ -539,6 +565,168 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, return 0; } +/* + * The hypervisor reduces the available credits if the LPAR lost core. It + * means the excessive windows should not be active and the user space + * should not be using these windows to send compression requests to NX. + * So the kernel closes the excessive windows and unmap the paste address + * such that the user space receives paste instruction failure. Then up to + * the user space to fall back to SW compression and manage with the + * existing windows. + */ +static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) +{ + struct pseries_vas_window *win, *tmp; + struct vas_user_win_ref *task_ref; + struct vm_area_struct *vma; + int rc = 0; + + list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { + /* + * This window is already closed due to lost credit + * before. Go for next window. + */ + if (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) + continue; + + task_ref = &win->vas_win.task_ref; + mutex_lock(&task_ref->mmap_mutex); + vma = task_ref->vma; + /* + * Number of available credits are reduced, So select + * and close windows. + */ + win->vas_win.status |= VAS_WIN_NO_CRED_CLOSE; + + mmap_write_lock(task_ref->mm); + /* + * vma is set in the original mapping. But this mapping + * is done with mmap() after the window is opened with ioctl. + * so we may not see the original mapping if the core remove + * is done before the original mmap() and after the ioctl. + */ + if (vma) + zap_page_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start); + + mmap_write_unlock(task_ref->mm); + mutex_unlock(&task_ref->mmap_mutex); + /* + * Close VAS window in the hypervisor, but do not + * free vas_window struct since it may be reused + * when the credit is available later (DLPAR with + * adding cores). This struct will be used + * later when the process issued with close(FD). + */ + rc = deallocate_free_window(win); + if (rc) + return rc; + + vcap->nr_close_wins++; + + if (!--excess_creds) + break; + } + + return 0; +} + +/* + * Get new VAS capabilities when the core add/removal configuration + * changes. Reconfig window configurations based on the credits + * availability from this new capabilities. + */ +static int vas_reconfig_capabilties(u8 type) +{ + struct hv_vas_cop_feat_caps *hv_caps; + struct vas_cop_feat_caps *caps; + int old_nr_creds, new_nr_creds; + struct vas_caps *vcaps; + int rc = 0, nr_active_wins; + + if (type >= VAS_MAX_FEAT_TYPE) { + pr_err("Invalid credit type %d\n", type); + return -EINVAL; + } + + vcaps = &vascaps[type]; + caps = &vcaps->caps; + + hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); + if (!hv_caps) + return -ENOMEM; + + mutex_lock(&vas_pseries_mutex); + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps->feat, + (u64)virt_to_phys(hv_caps)); + if (rc) + goto out; + + new_nr_creds = be16_to_cpu(hv_caps->target_lpar_creds); + + old_nr_creds = atomic_read(&caps->nr_total_credits); + + atomic_set(&caps->nr_total_credits, new_nr_creds); + /* + * The total number of available credits may be decreased or + * inceased with DLPAR operation. Means some windows have to be + * closed / reopened. Hold the vas_pseries_mutex so that the + * the user space can not open new windows. + */ + if (old_nr_creds > new_nr_creds) { + /* + * # active windows is more than new LPAR available + * credits. So close the excessive windows. + * On pseries, each window will have 1 credit. + */ + nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; + if (nr_active_wins > new_nr_creds) + rc = reconfig_close_windows(vcaps, + nr_active_wins - new_nr_creds); + } + +out: + mutex_unlock(&vas_pseries_mutex); + kfree(hv_caps); + return rc; +} +/* + * Total number of default credits available (target_credits) + * in LPAR depends on number of cores configured. It varies based on + * whether processors are in shared mode or dedicated mode. + * Get the notifier when CPU configuration is changed with DLPAR + * operation so that get the new target_credits (vas default capabilities) + * and then update the existing windows usage if needed. + */ +static int pseries_vas_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct of_reconfig_data *rd = data; + struct device_node *dn = rd->dn; + const __be32 *intserv = NULL; + int len, rc = 0; + + if ((action == OF_RECONFIG_ATTACH_NODE) || + (action == OF_RECONFIG_DETACH_NODE)) + intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", + &len); + /* + * Processor config is not changed + */ + if (!intserv) + return NOTIFY_OK; + + rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE); + if (rc) + pr_err("Failed reconfig VAS capabilities with DLPAR\n"); + + return rc; +} + +static struct notifier_block pseries_vas_nb = { + .notifier_call = pseries_vas_notifier, +}; + static int __init pseries_vas_init(void) { struct hv_vas_cop_feat_caps *hv_cop_caps; @@ -592,6 +780,9 @@ static int __init pseries_vas_init(void) goto out_cop; } + if (copypaste_feat && firmware_has_feature(FW_FEATURE_LPAR)) + of_reconfig_notifier_register(&pseries_vas_nb); + pr_info("GZIP feature is available\n"); out_cop: diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 2872532ed72a..701363cfd7c1 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -83,6 +83,9 @@ struct vas_cop_feat_caps { struct vas_caps { struct vas_cop_feat_caps caps; struct list_head list; /* List of open windows */ + int nr_close_wins; /* closed windows in the hypervisor for DLPAR */ + int nr_open_windows; /* Number of successful open windows */ + u8 feat; /* Feature type */ }; /* From c656cfe571a9b8b882e31177f554bd79141fc015 Mon Sep 17 00:00:00 2001 From: Haren Myneni <haren@linux.ibm.com> Date: Mon, 28 Feb 2022 17:15:04 -0800 Subject: [PATCH 126/179] powerpc/pseries/vas: Reopen windows with DLPAR core add VAS windows can be closed in the hypervisor due to lost credits when the core is removed and the kernel gets fault for NX requests on these inactive windows. If the NX requests are issued on these inactive windows, OS gets page faults and the paste failure will be returned to the user space. If the lost credits are available later with core add, reopen these windows and set them active. Later when the OS sees page faults on these active windows, it creates mapping on the new paste address. Then the user space can continue to use these windows and send HW compression requests to NX successfully. Signed-off-by: Haren Myneni <haren@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/d9f360e21355e6826142c81146acfa9b60bc7ecc.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 91 +++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index a297720bcdae..96178dd58adf 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -565,6 +565,88 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, return 0; } +/* + * VAS windows can be closed due to lost credits when the core is + * removed. So reopen them if credits are available due to DLPAR + * core add and set the window active status. When NX sees the page + * fault on the unmapped paste address, the kernel handles the fault + * by setting the remapping to new paste address if the window is + * active. + */ +static int reconfig_open_windows(struct vas_caps *vcaps, int creds) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *caps = &vcaps->caps; + struct pseries_vas_window *win = NULL, *tmp; + int rc, mv_ents = 0; + + /* + * Nothing to do if there are no closed windows. + */ + if (!vcaps->nr_close_wins) + return 0; + + /* + * For the core removal, the hypervisor reduces the credits + * assigned to the LPAR and the kernel closes VAS windows + * in the hypervisor depends on reduced credits. The kernel + * uses LIFO (the last windows that are opened will be closed + * first) and expects to open in the same order when credits + * are available. + * For example, 40 windows are closed when the LPAR lost 2 cores + * (dedicated). If 1 core is added, this LPAR can have 20 more + * credits. It means the kernel can reopen 20 windows. So move + * 20 entries in the VAS windows lost and reopen next 20 windows. + */ + if (vcaps->nr_close_wins > creds) + mv_ents = vcaps->nr_close_wins - creds; + + list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { + if (!mv_ents) + break; + + mv_ents--; + } + + list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { + /* + * Nothing to do on this window if it is not closed + * with VAS_WIN_NO_CRED_CLOSE + */ + if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) + continue; + + rc = allocate_setup_window(win, (u64 *)&domain[0], + caps->win_type); + if (rc) + return rc; + + rc = h_modify_vas_window(win); + if (rc) + goto out; + + mutex_lock(&win->vas_win.task_ref.mmap_mutex); + /* + * Set window status to active + */ + win->vas_win.status &= ~VAS_WIN_NO_CRED_CLOSE; + mutex_unlock(&win->vas_win.task_ref.mmap_mutex); + win->win_type = caps->win_type; + if (!--vcaps->nr_close_wins) + break; + } + + return 0; +out: + /* + * Window modify HCALL failed. So close the window to the + * hypervisor and return. + */ + free_irq_setup(win); + h_deallocate_vas_window(win->vas_win.winid); + return rc; +} + /* * The hypervisor reduces the available credits if the LPAR lost core. It * means the excessive windows should not be active and the user space @@ -673,7 +755,14 @@ static int vas_reconfig_capabilties(u8 type) * closed / reopened. Hold the vas_pseries_mutex so that the * the user space can not open new windows. */ - if (old_nr_creds > new_nr_creds) { + if (old_nr_creds < new_nr_creds) { + /* + * If the existing target credits is less than the new + * target, reopen windows if they are closed due to + * the previous DLPAR (core removal). + */ + rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds); + } else { /* * # active windows is more than new LPAR available * credits. So close the excessive windows. From b903737bc522e0ef3f45a2a60c364ff547572c9b Mon Sep 17 00:00:00 2001 From: Haren Myneni <haren@linux.ibm.com> Date: Mon, 28 Feb 2022 17:15:36 -0800 Subject: [PATCH 127/179] powerpc/pseries/vas: sysfs interface to export capabilities The hypervisor provides the available VAS GZIP capabilities such as default or QoS window type and the target available credits in each type. This patch creates sysfs entries and exports the target, used and the available credits for each feature. This interface can be used by the user space to determine the credits usage or to set the target credits in the case of QoS type (for DLPAR). /sys/devices/vas/vas0/gzip/default_capabilities (default GZIP capabilities) nr_total_credits /* Total credits available. Can be /* changed with DLPAR operation */ nr_used_credits /* Used credits */ /sys/devices/vas/vas0/gzip/qos_capabilities (QoS GZIP capabilities) nr_total_credits nr_used_credits Signed-off-by: Haren Myneni <haren@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/702d8b626ebfac2b52f4995eebeafe1c9a6fcb75.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/Makefile | 2 +- arch/powerpc/platforms/pseries/vas-sysfs.c | 226 +++++++++++++++++++++ arch/powerpc/platforms/pseries/vas.c | 6 + arch/powerpc/platforms/pseries/vas.h | 6 + 4 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/platforms/pseries/vas-sysfs.c diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index ee60b59024b4..29b522d2c755 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -29,6 +29,6 @@ obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o obj-$(CONFIG_SUSPEND) += suspend.o -obj-$(CONFIG_PPC_VAS) += vas.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-sysfs.o obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c new file mode 100644 index 000000000000..e24d3edb3021 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2022-23 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/kobject.h> +#include <linux/slab.h> +#include <linux/mm.h> + +#include "vas.h" + +#ifdef CONFIG_SYSFS +static struct kobject *pseries_vas_kobj; +static struct kobject *gzip_caps_kobj; + +struct vas_caps_entry { + struct kobject kobj; + struct vas_cop_feat_caps *caps; +}; + +#define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj) + +#define sysfs_caps_entry_read(_name) \ +static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) \ +{ \ + return sprintf(buf, "%d\n", atomic_read(&caps->_name)); \ +} + +struct vas_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct vas_cop_feat_caps *, char *); + ssize_t (*store)(struct vas_cop_feat_caps *, const char *, size_t); +}; + +#define VAS_ATTR_RO(_name) \ + sysfs_caps_entry_read(_name); \ + static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \ + 0444, _name##_show, NULL); + +/* + * Create sysfs interface: + * /sys/devices/vas/vas0/gzip/default_capabilities + * This directory contains the following VAS GZIP capabilities + * for the defaule credit type. + * /sys/devices/vas/vas0/gzip/default_capabilities/nr_total_credits + * Total number of default credits assigned to the LPAR which + * can be changed with DLPAR operation. + * /sys/devices/vas/vas0/gzip/default_capabilities/nr_used_credits + * Number of credits used by the user space. One credit will + * be assigned for each window open. + * + * /sys/devices/vas/vas0/gzip/qos_capabilities + * This directory contains the following VAS GZIP capabilities + * for the Quality of Service (QoS) credit type. + * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_total_credits + * Total number of QoS credits assigned to the LPAR. The user + * has to define this value using HMC interface. It can be + * changed dynamically by the user. + * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_used_credits + * Number of credits used by the user space. + */ + +VAS_ATTR_RO(nr_total_credits); +VAS_ATTR_RO(nr_used_credits); + +static struct attribute *vas_capab_attrs[] = { + &nr_total_credits_attribute.attr, + &nr_used_credits_attribute.attr, + NULL, +}; + +static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct vas_caps_entry *centry; + struct vas_cop_feat_caps *caps; + struct vas_sysfs_entry *entry; + + centry = to_caps_entry(kobj); + caps = centry->caps; + entry = container_of(attr, struct vas_sysfs_entry, attr); + + if (!entry->show) + return -EIO; + + return entry->show(caps, buf); +} + +static ssize_t vas_type_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct vas_caps_entry *centry; + struct vas_cop_feat_caps *caps; + struct vas_sysfs_entry *entry; + + centry = to_caps_entry(kobj); + caps = centry->caps; + entry = container_of(attr, struct vas_sysfs_entry, attr); + if (!entry->store) + return -EIO; + + return entry->store(caps, buf, count); +} + +static void vas_type_release(struct kobject *kobj) +{ + struct vas_caps_entry *centry = to_caps_entry(kobj); + kfree(centry); +} + +static const struct sysfs_ops vas_sysfs_ops = { + .show = vas_type_show, + .store = vas_type_store, +}; + +static struct kobj_type vas_attr_type = { + .release = vas_type_release, + .sysfs_ops = &vas_sysfs_ops, + .default_attrs = vas_capab_attrs, +}; + +static char *vas_caps_kobj_name(struct vas_cop_feat_caps *caps, + struct kobject **kobj) +{ + if (caps->descriptor == VAS_GZIP_QOS_CAPABILITIES) { + *kobj = gzip_caps_kobj; + return "qos_capabilities"; + } else if (caps->descriptor == VAS_GZIP_DEFAULT_CAPABILITIES) { + *kobj = gzip_caps_kobj; + return "default_capabilities"; + } else + return "Unknown"; +} + +/* + * Add feature specific capability dir entry. + * Ex: VDefGzip or VQosGzip + */ +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps) +{ + struct vas_caps_entry *centry; + struct kobject *kobj = NULL; + int ret = 0; + char *name; + + centry = kzalloc(sizeof(*centry), GFP_KERNEL); + if (!centry) + return -ENOMEM; + + kobject_init(¢ry->kobj, &vas_attr_type); + centry->caps = caps; + name = vas_caps_kobj_name(caps, &kobj); + + if (kobj) { + ret = kobject_add(¢ry->kobj, kobj, "%s", name); + + if (ret) { + pr_err("VAS: sysfs kobject add / event failed %d\n", + ret); + kobject_put(¢ry->kobj); + } + } + + return ret; +} + +static struct miscdevice vas_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "vas", +}; + +/* + * Add VAS and VasCaps (overall capabilities) dir entries. + */ +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps) +{ + int ret; + + ret = misc_register(&vas_miscdev); + if (ret < 0) { + pr_err("%s: register vas misc device failed\n", __func__); + return ret; + } + + /* + * The hypervisor does not expose multiple VAS instances, but can + * see multiple VAS instances on PowerNV. So create 'vas0' directory + * on pseries. + */ + pseries_vas_kobj = kobject_create_and_add("vas0", + &vas_miscdev.this_device->kobj); + if (!pseries_vas_kobj) { + pr_err("Failed to create VAS sysfs entry\n"); + return -ENOMEM; + } + + if ((vas_caps->feat_type & VAS_GZIP_QOS_FEAT_BIT) || + (vas_caps->feat_type & VAS_GZIP_DEF_FEAT_BIT)) { + gzip_caps_kobj = kobject_create_and_add("gzip", + pseries_vas_kobj); + if (!gzip_caps_kobj) { + pr_err("Failed to create VAS GZIP capability entry\n"); + kobject_put(pseries_vas_kobj); + return -ENOMEM; + } + } + + return 0; +} + +#else +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps) +{ + return 0; +} + +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps) +{ + return 0; +} +#endif diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 96178dd58adf..ca0ad191229d 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -560,6 +560,10 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, } } + rc = sysfs_add_vas_caps(caps); + if (rc) + return rc; + copypaste_feat = true; return 0; @@ -844,6 +848,8 @@ static int __init pseries_vas_init(void) caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); + sysfs_pseries_vas_init(&caps_all); + hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); if (!hv_cop_caps) { rc = -ENOMEM; diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 701363cfd7c1..f1bdb776021e 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -30,6 +30,9 @@ #define VAS_COPY_PASTE_USER_MODE 0x00000001 #define VAS_COP_OP_USER_MODE 0x00000010 +#define VAS_GZIP_QOS_CAPABILITIES 0x56516F73477A6970 +#define VAS_GZIP_DEFAULT_CAPABILITIES 0x56446566477A6970 + /* * Co-processor feature - GZIP QoS windows or GZIP default windows */ @@ -125,4 +128,7 @@ struct pseries_vas_window { char *name; int fault_virq; }; + +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); #endif /* _VAS_H */ From 45f06eac30e5abebecc66e41e7c89d5b4413bac1 Mon Sep 17 00:00:00 2001 From: Haren Myneni <haren@linux.ibm.com> Date: Mon, 28 Feb 2022 17:16:10 -0800 Subject: [PATCH 128/179] powerpc/pseries/vas: Add 'update_total_credits' entry for QoS capabilities pseries supports two types of credits - Default (uses normal priority FIFO) and Qality of service (QoS uses high priority FIFO). The user decides the number of QoS credits and sets this value with HMC interface. The total credits for QoS capabilities can be changed dynamically with HMC interface which invokes drmgr to communicate to the kernel. This patch creats 'update_total_credits' entry for QoS capabilities so that drmgr command can write the new target QoS credits in sysfs. Instead of using this value, the kernel gets the new QoS capabilities from the hypervisor whenever update_total_credits is updated to make sure sync with the QoS target credits in the hypervisor. Signed-off-by: Haren Myneni <haren@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/b01ef31a0f964686d00243e7de7f09c73c07e69e.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas-sysfs.c | 54 +++++++++++++++++++--- arch/powerpc/platforms/pseries/vas.c | 2 +- arch/powerpc/platforms/pseries/vas.h | 1 + 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c index e24d3edb3021..4a7fcde5afc0 100644 --- a/arch/powerpc/platforms/pseries/vas-sysfs.c +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -25,6 +25,27 @@ struct vas_caps_entry { #define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj) +/* + * This function is used to get the notification from the drmgr when + * QoS credits are changed. Though receiving the target total QoS + * credits here, get the official QoS capabilities from the hypervisor. + */ +static ssize_t update_total_credits_trigger(struct vas_cop_feat_caps *caps, + const char *buf, size_t count) +{ + int err; + u16 creds; + + err = kstrtou16(buf, 0, &creds); + if (!err) + err = vas_reconfig_capabilties(caps->win_type); + + if (err) + return -EINVAL; + + return count; +} + #define sysfs_caps_entry_read(_name) \ static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) \ { \ @@ -63,17 +84,29 @@ struct vas_sysfs_entry { * changed dynamically by the user. * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_used_credits * Number of credits used by the user space. + * /sys/devices/vas/vas0/gzip/qos_capabilities/update_total_credits + * Update total QoS credits dynamically */ VAS_ATTR_RO(nr_total_credits); VAS_ATTR_RO(nr_used_credits); -static struct attribute *vas_capab_attrs[] = { +static struct vas_sysfs_entry update_total_credits_attribute = + __ATTR(update_total_credits, 0200, NULL, update_total_credits_trigger); + +static struct attribute *vas_def_capab_attrs[] = { &nr_total_credits_attribute.attr, &nr_used_credits_attribute.attr, NULL, }; +static struct attribute *vas_qos_capab_attrs[] = { + &nr_total_credits_attribute.attr, + &nr_used_credits_attribute.attr, + &update_total_credits_attribute.attr, + NULL, +}; + static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -118,19 +151,29 @@ static const struct sysfs_ops vas_sysfs_ops = { .store = vas_type_store, }; -static struct kobj_type vas_attr_type = { +static struct kobj_type vas_def_attr_type = { .release = vas_type_release, .sysfs_ops = &vas_sysfs_ops, - .default_attrs = vas_capab_attrs, + .default_attrs = vas_def_capab_attrs, }; -static char *vas_caps_kobj_name(struct vas_cop_feat_caps *caps, +static struct kobj_type vas_qos_attr_type = { + .release = vas_type_release, + .sysfs_ops = &vas_sysfs_ops, + .default_attrs = vas_qos_capab_attrs, +}; + +static char *vas_caps_kobj_name(struct vas_caps_entry *centry, struct kobject **kobj) { + struct vas_cop_feat_caps *caps = centry->caps; + if (caps->descriptor == VAS_GZIP_QOS_CAPABILITIES) { + kobject_init(¢ry->kobj, &vas_qos_attr_type); *kobj = gzip_caps_kobj; return "qos_capabilities"; } else if (caps->descriptor == VAS_GZIP_DEFAULT_CAPABILITIES) { + kobject_init(¢ry->kobj, &vas_def_attr_type); *kobj = gzip_caps_kobj; return "default_capabilities"; } else @@ -152,9 +195,8 @@ int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps) if (!centry) return -ENOMEM; - kobject_init(¢ry->kobj, &vas_attr_type); centry->caps = caps; - name = vas_caps_kobj_name(caps, &kobj); + name = vas_caps_kobj_name(centry, &kobj); if (kobj) { ret = kobject_add(¢ry->kobj, kobj, "%s", name); diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index ca0ad191229d..591c7597db5a 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -722,7 +722,7 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) * changes. Reconfig window configurations based on the credits * availability from this new capabilities. */ -static int vas_reconfig_capabilties(u8 type) +int vas_reconfig_capabilties(u8 type) { struct hv_vas_cop_feat_caps *hv_caps; struct vas_cop_feat_caps *caps; diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index f1bdb776021e..4ddb1001a0aa 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -130,5 +130,6 @@ struct pseries_vas_window { }; int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); +int vas_reconfig_capabilties(u8 type); int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); #endif /* _VAS_H */ From 278fe1cc2205a05bfd92c794be3d207372b17289 Mon Sep 17 00:00:00 2001 From: Haren Myneni <haren@linux.ibm.com> Date: Sun, 27 Feb 2022 23:51:28 -0800 Subject: [PATCH 129/179] powerpc/pseries/vas: Define global hv_cop_caps struct The coprocessor capabilities struct is used to get default and QoS capabilities from the hypervisor during init, DLPAR event and migration. So instead of allocating this struct for each event, define global struct and reuse it which allows the migration code to avoid adding an error path. Also disable copy/paste feature flag if any capabilities HCALL is failed. Signed-off-by: Haren Myneni <haren@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Acked-by: Nathan Lynch <nathanl@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/57da6a270fcb9308cd57be7c88037029343080f7.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 49 ++++++++++++---------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 591c7597db5a..3bb219f54806 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -26,6 +26,7 @@ static struct vas_all_caps caps_all; static bool copypaste_feat; +static struct hv_vas_cop_feat_caps hv_cop_caps; static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; static DEFINE_MUTEX(vas_pseries_mutex); @@ -724,7 +725,6 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) */ int vas_reconfig_capabilties(u8 type) { - struct hv_vas_cop_feat_caps *hv_caps; struct vas_cop_feat_caps *caps; int old_nr_creds, new_nr_creds; struct vas_caps *vcaps; @@ -738,17 +738,13 @@ int vas_reconfig_capabilties(u8 type) vcaps = &vascaps[type]; caps = &vcaps->caps; - hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); - if (!hv_caps) - return -ENOMEM; - mutex_lock(&vas_pseries_mutex); rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps->feat, - (u64)virt_to_phys(hv_caps)); + (u64)virt_to_phys(&hv_cop_caps)); if (rc) goto out; - new_nr_creds = be16_to_cpu(hv_caps->target_lpar_creds); + new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); old_nr_creds = atomic_read(&caps->nr_total_credits); @@ -780,7 +776,6 @@ int vas_reconfig_capabilties(u8 type) out: mutex_unlock(&vas_pseries_mutex); - kfree(hv_caps); return rc; } /* @@ -822,9 +817,8 @@ static struct notifier_block pseries_vas_nb = { static int __init pseries_vas_init(void) { - struct hv_vas_cop_feat_caps *hv_cop_caps; struct hv_vas_all_caps *hv_caps; - int rc; + int rc = 0; /* * Linux supports user space COPY/PASTE only with Radix @@ -850,38 +844,37 @@ static int __init pseries_vas_init(void) sysfs_pseries_vas_init(&caps_all); - hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); - if (!hv_cop_caps) { - rc = -ENOMEM; - goto out; - } /* * QOS capabilities available */ if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, - VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); + VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); if (rc) - goto out_cop; + goto out; } /* * Default capabilities available */ - if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { + if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, - VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); - if (rc) - goto out_cop; + VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); + + if (!rc && copypaste_feat) { + if (firmware_has_feature(FW_FEATURE_LPAR)) + of_reconfig_notifier_register(&pseries_vas_nb); + + pr_info("GZIP feature is available\n"); + } else { + /* + * Should not happen, but only when get default + * capabilities HCALL failed. So disable copy paste + * feature. + */ + copypaste_feat = false; } - if (copypaste_feat && firmware_has_feature(FW_FEATURE_LPAR)) - of_reconfig_notifier_register(&pseries_vas_nb); - - pr_info("GZIP feature is available\n"); - -out_cop: - kfree(hv_cop_caps); out: kfree(hv_caps); return rc; From 716d7a2e3764cb79061371767bff1a691adb4e7f Mon Sep 17 00:00:00 2001 From: Haren Myneni <haren@linux.ibm.com> Date: Sun, 27 Feb 2022 23:52:08 -0800 Subject: [PATCH 130/179] powerpc/pseries/vas: Modify reconfig open/close functions for migration VAS is a hardware engine stays on the chip. So when the partition migrates, all VAS windows on the source system have to be closed and reopen them on the destination after migration. The kernel has to consider both DLPAR CPU and migration events to take action on VAS windows. So using VAS_WIN_NO_CRED_CLOSE and VAS_WIN_MIGRATE_CLOSE status bits and windows will be reopened after migration only after both status bits are cleared. This patch make changes to the current reconfig_open/close_windows functions to support migration: - Set VAS_WIN_MIGRATE_CLOSE to the window status when closes and reopen windows with the same status during resume. - Continue to close all windows even if deallocate HCALL failed (should not happen) since no way to stop migration with the current LPM implementation. - If the DLPAR CPU event happens while migration is in progress, set VAS_WIN_NO_CRED_CLOSE to the window status. Close window happens with the first event (migration or DLPAR) and Reopen window happens only with the last event (migration or DLPAR). Signed-off-by: Haren Myneni <haren@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/0aad580387cb58379496b4cbbd7c5596e9ea70be.camel@linux.ibm.com --- arch/powerpc/include/asm/vas.h | 2 + arch/powerpc/platforms/pseries/vas.c | 90 ++++++++++++++++++++++------ 2 files changed, 74 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 6baf7b9ffed4..83afcb6c194b 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -36,6 +36,8 @@ /* vas mmap() */ /* Window is closed in the hypervisor due to lost credit */ #define VAS_WIN_NO_CRED_CLOSE 0x00000001 +/* Window is closed due to migration */ +#define VAS_WIN_MIGRATE_CLOSE 0x00000002 /* * Get/Set bit fields diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 3bb219f54806..fbcf311da0ec 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -457,11 +457,12 @@ static int vas_deallocate_window(struct vas_window *vwin) mutex_lock(&vas_pseries_mutex); /* * VAS window is already closed in the hypervisor when - * lost the credit. So just remove the entry from - * the list, remove task references and free vas_window + * lost the credit or with migration. So just remove the entry + * from the list, remove task references and free vas_window * struct. */ - if (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) { + if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && + !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { rc = deallocate_free_window(win); if (rc) { mutex_unlock(&vas_pseries_mutex); @@ -578,12 +579,14 @@ static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, * by setting the remapping to new paste address if the window is * active. */ -static int reconfig_open_windows(struct vas_caps *vcaps, int creds) +static int reconfig_open_windows(struct vas_caps *vcaps, int creds, + bool migrate) { long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; struct vas_cop_feat_caps *caps = &vcaps->caps; struct pseries_vas_window *win = NULL, *tmp; int rc, mv_ents = 0; + int flag; /* * Nothing to do if there are no closed windows. @@ -602,8 +605,10 @@ static int reconfig_open_windows(struct vas_caps *vcaps, int creds) * (dedicated). If 1 core is added, this LPAR can have 20 more * credits. It means the kernel can reopen 20 windows. So move * 20 entries in the VAS windows lost and reopen next 20 windows. + * For partition migration, reopen all windows that are closed + * during resume. */ - if (vcaps->nr_close_wins > creds) + if ((vcaps->nr_close_wins > creds) && !migrate) mv_ents = vcaps->nr_close_wins - creds; list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { @@ -613,12 +618,35 @@ static int reconfig_open_windows(struct vas_caps *vcaps, int creds) mv_ents--; } + /* + * Open windows if they are closed only with migration or + * DLPAR (lost credit) before. + */ + if (migrate) + flag = VAS_WIN_MIGRATE_CLOSE; + else + flag = VAS_WIN_NO_CRED_CLOSE; + list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { /* - * Nothing to do on this window if it is not closed - * with VAS_WIN_NO_CRED_CLOSE + * This window is closed with DLPAR and migration events. + * So reopen the window with the last event. + * The user space is not suspended with the current + * migration notifier. So the user space can issue DLPAR + * CPU hotplug while migration in progress. In this case + * this window will be opened with the last event. */ - if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) + if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && + (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { + win->vas_win.status &= ~flag; + continue; + } + + /* + * Nothing to do on this window if it is not closed + * with this flag + */ + if (!(win->vas_win.status & flag)) continue; rc = allocate_setup_window(win, (u64 *)&domain[0], @@ -634,7 +662,7 @@ static int reconfig_open_windows(struct vas_caps *vcaps, int creds) /* * Set window status to active */ - win->vas_win.status &= ~VAS_WIN_NO_CRED_CLOSE; + win->vas_win.status &= ~flag; mutex_unlock(&win->vas_win.task_ref.mmap_mutex); win->win_type = caps->win_type; if (!--vcaps->nr_close_wins) @@ -661,20 +689,32 @@ out: * the user space to fall back to SW compression and manage with the * existing windows. */ -static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) +static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, + bool migrate) { struct pseries_vas_window *win, *tmp; struct vas_user_win_ref *task_ref; struct vm_area_struct *vma; - int rc = 0; + int rc = 0, flag; + + if (migrate) + flag = VAS_WIN_MIGRATE_CLOSE; + else + flag = VAS_WIN_NO_CRED_CLOSE; list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { /* * This window is already closed due to lost credit - * before. Go for next window. + * or for migration before. Go for next window. + * For migration, nothing to do since this window + * closed for DLPAR and will be reopened even on + * the destination system with other DLPAR operation. */ - if (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) + if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || + (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { + win->vas_win.status |= flag; continue; + } task_ref = &win->vas_win.task_ref; mutex_lock(&task_ref->mmap_mutex); @@ -683,7 +723,7 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) * Number of available credits are reduced, So select * and close windows. */ - win->vas_win.status |= VAS_WIN_NO_CRED_CLOSE; + win->vas_win.status |= flag; mmap_write_lock(task_ref->mm); /* @@ -706,12 +746,24 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds) * later when the process issued with close(FD). */ rc = deallocate_free_window(win); - if (rc) + /* + * This failure is from the hypervisor. + * No way to stop migration for these failures. + * So ignore error and continue closing other windows. + */ + if (rc && !migrate) return rc; vcap->nr_close_wins++; - if (!--excess_creds) + /* + * For migration, do not depend on lpar_creds in case if + * mismatch with the hypervisor value (should not happen). + * So close all active windows in the list and will be + * reopened windows based on the new lpar_creds on the + * destination system during resume. + */ + if (!migrate && !--excess_creds) break; } @@ -761,7 +813,8 @@ int vas_reconfig_capabilties(u8 type) * target, reopen windows if they are closed due to * the previous DLPAR (core removal). */ - rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds); + rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds, + false); } else { /* * # active windows is more than new LPAR available @@ -771,7 +824,8 @@ int vas_reconfig_capabilties(u8 type) nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; if (nr_active_wins > new_nr_creds) rc = reconfig_close_windows(vcaps, - nr_active_wins - new_nr_creds); + nr_active_wins - new_nr_creds, + false); } out: From 37e6764895ef7431f45ff603a548549d409993d2 Mon Sep 17 00:00:00 2001 From: Haren Myneni <haren@linux.ibm.com> Date: Wed, 2 Mar 2022 00:51:58 -0800 Subject: [PATCH 131/179] powerpc/pseries/vas: Add VAS migration handler Since the VAS windows belong to the VAS hardware resource, the hypervisor expects the partition to close them on source partition and reopen them after the partition migrated on the destination machine. This handler is called before pseries_suspend() to close these windows and again invoked after migration. All active windows for both default and QoS types will be closed and mark them inactive and reopened after migration with this handler. During the migration, the user space receives paste instruction failure if it issues copy/paste on these inactive windows. The current migration implementation does not freeze the user space and applications can continue to open VAS windows while migration is in progress. So when the migration_in_progress flag is set, VAS open window API returns -EBUSY. Signed-off-by: Haren Myneni <haren@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/05e45ff4f8babd2490ccb7ae923884f4aa21a7e5.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/mobility.c | 5 ++ arch/powerpc/platforms/pseries/vas.c | 98 ++++++++++++++++++++++- arch/powerpc/platforms/pseries/vas.h | 14 ++++ 3 files changed, 116 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 94077fa91959..78f3f74c7056 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -26,6 +26,7 @@ #include <asm/machdep.h> #include <asm/rtas.h> #include "pseries.h" +#include "vas.h" /* vas_migration_handler() */ #include "../../kernel/cacheinfo.h" static struct kobject *mobility_kobj; @@ -669,12 +670,16 @@ static int pseries_migrate_partition(u64 handle) if (ret) return ret; + vas_migration_handler(VAS_SUSPEND); + ret = pseries_suspend(handle); if (ret == 0) post_mobility_fixup(); else pseries_cancel_migration(handle, ret); + vas_migration_handler(VAS_RESUME); + return ret; } diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index fbcf311da0ec..1f59d78c77a1 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -30,6 +30,7 @@ static struct hv_vas_cop_feat_caps hv_cop_caps; static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; static DEFINE_MUTEX(vas_pseries_mutex); +static bool migration_in_progress; static long hcall_return_busy_check(long rc) { @@ -356,7 +357,10 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * same fault IRQ is not freed by the OS before. */ mutex_lock(&vas_pseries_mutex); - rc = allocate_setup_window(txwin, (u64 *)&domain[0], + if (migration_in_progress) + rc = -EBUSY; + else + rc = allocate_setup_window(txwin, (u64 *)&domain[0], cop_feat_caps->win_type); mutex_unlock(&vas_pseries_mutex); if (rc) @@ -869,6 +873,98 @@ static struct notifier_block pseries_vas_nb = { .notifier_call = pseries_vas_notifier, }; +/* + * For LPM, all windows have to be closed on the source partition + * before migration and reopen them on the destination partition + * after migration. So closing windows during suspend and + * reopen them during resume. + */ +int vas_migration_handler(int action) +{ + struct vas_cop_feat_caps *caps; + int old_nr_creds, new_nr_creds = 0; + struct vas_caps *vcaps; + int i, rc = 0; + + /* + * NX-GZIP is not enabled. Nothing to do for migration. + */ + if (!copypaste_feat) + return rc; + + mutex_lock(&vas_pseries_mutex); + + if (action == VAS_SUSPEND) + migration_in_progress = true; + else + migration_in_progress = false; + + for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { + vcaps = &vascaps[i]; + caps = &vcaps->caps; + old_nr_creds = atomic_read(&caps->nr_total_credits); + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, + vcaps->feat, + (u64)virt_to_phys(&hv_cop_caps)); + if (!rc) { + new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); + /* + * Should not happen. But incase print messages, close + * all windows in the list during suspend and reopen + * windows based on new lpar_creds on the destination + * system. + */ + if (old_nr_creds != new_nr_creds) { + pr_err("Target credits mismatch with the hypervisor\n"); + pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n", + action, old_nr_creds, new_nr_creds); + pr_err("Used creds: %d, Active creds: %d\n", + atomic_read(&caps->nr_used_credits), + vcaps->nr_open_windows - vcaps->nr_close_wins); + } + } else { + pr_err("state(%d): Get VAS capabilities failed with %d\n", + action, rc); + /* + * We can not stop migration with the current lpm + * implementation. So continue closing all windows in + * the list (during suspend) and return without + * opening windows (during resume) if VAS capabilities + * HCALL failed. + */ + if (action == VAS_RESUME) + goto out; + } + + switch (action) { + case VAS_SUSPEND: + rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, + true); + break; + case VAS_RESUME: + atomic_set(&caps->nr_total_credits, new_nr_creds); + rc = reconfig_open_windows(vcaps, new_nr_creds, true); + break; + default: + /* should not happen */ + pr_err("Invalid migration action %d\n", action); + rc = -EINVAL; + goto out; + } + + /* + * Ignore errors during suspend and return for resume. + */ + if (rc && (action == VAS_RESUME)) + goto out; + } + +out: + mutex_unlock(&vas_pseries_mutex); + return rc; +} + static int __init pseries_vas_init(void) { struct hv_vas_all_caps *hv_caps; diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 4ddb1001a0aa..34177881e998 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -33,6 +33,11 @@ #define VAS_GZIP_QOS_CAPABILITIES 0x56516F73477A6970 #define VAS_GZIP_DEFAULT_CAPABILITIES 0x56446566477A6970 +enum vas_migrate_action { + VAS_SUSPEND, + VAS_RESUME, +}; + /* * Co-processor feature - GZIP QoS windows or GZIP default windows */ @@ -132,4 +137,13 @@ struct pseries_vas_window { int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); int vas_reconfig_capabilties(u8 type); int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); + +#ifdef CONFIG_PPC_VAS +int vas_migration_handler(int action); +#else +static inline int vas_migration_handler(int action) +{ + return 0; +} +#endif #endif /* _VAS_H */ From fa1321b11bd01752f5be2415e74a0e1a7c378262 Mon Sep 17 00:00:00 2001 From: Jakob Koschel <jakobkoschel@gmail.com> Date: Mon, 28 Feb 2022 15:24:33 +0100 Subject: [PATCH 132/179] powerpc/sysdev: fix incorrect use to determine if list is empty 'gtm' will *always* be set by list_for_each_entry(). It is incorrect to assume that the iterator value will be NULL if the list is empty. Instead of checking the pointer it should be checked if the list is empty. Fixes: 83ff9dcf375c ("powerpc/sysdev: implement FSL GTM support") Signed-off-by: Jakob Koschel <jakobkoschel@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220228142434.576226-1-jakobkoschel@gmail.com --- arch/powerpc/sysdev/fsl_gtm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 8963eaffb1b7..39186ad6b3c3 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -86,7 +86,7 @@ static LIST_HEAD(gtms); */ struct gtm_timer *gtm_get_timer16(void) { - struct gtm *gtm = NULL; + struct gtm *gtm; int i; list_for_each_entry(gtm, >ms, list_node) { @@ -103,7 +103,7 @@ struct gtm_timer *gtm_get_timer16(void) spin_unlock_irq(>m->lock); } - if (gtm) + if (!list_empty(>ms)) return ERR_PTR(-EBUSY); return ERR_PTR(-ENODEV); } From d4679ac8ea2e5078704aa1c026db36580cc1bf9a Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Tue, 22 Feb 2022 22:34:49 +1100 Subject: [PATCH 133/179] powerpc/64s: Don't use DSISR for SLB faults Since commit 46ddcb3950a2 ("powerpc/mm: Show if a bad page fault on data is read or write.") we use page_fault_is_write(regs->dsisr) in __bad_page_fault() to determine if the fault is for a read or write, and change the message printed accordingly. But SLB faults, aka Data Segment Interrupts, don't set DSISR (Data Storage Interrupt Status Register) to a useful value. All ISA versions from v2.03 through v3.1 specify that the Data Segment Interrupt sets DSISR "to an undefined value". As far as I can see there's no mention of SLB faults setting DSISR in any BookIV content either. This manifests as accesses that should be a read being incorrectly reported as writes, for example, using the xmon "dump" command: 0:mon> d 0x5deadbeef0000000 5deadbeef0000000 [359526.415354][ C6] BUG: Unable to handle kernel data access on write at 0x5deadbeef0000000 [359526.415611][ C6] Faulting instruction address: 0xc00000000010a300 cpu 0x6: Vector: 380 (Data SLB Access) at [c00000000ffbf400] pc: c00000000010a300: mread+0x90/0x190 If we disassemble the PC, we see a load instruction: 0:mon> di c00000000010a300 c00000000010a300 89490000 lbz r10,0(r9) We can also see in exceptions-64s.S that the data_access_slb block doesn't set IDSISR=1, which means it doesn't load DSISR into pt_regs. So the value we're using to determine if the fault is a read/write is some stale value in pt_regs from a previous page fault. Rework the printing logic to separate the SLB fault case out, and only print read/write in the cases where we can determine it. The result looks like eg: 0:mon> d 0x5deadbeef0000000 5deadbeef0000000 [ 721.779525][ C6] BUG: Unable to handle kernel data access at 0x5deadbeef0000000 [ 721.779697][ C6] Faulting instruction address: 0xc00000000014cbe0 cpu 0x6: Vector: 380 (Data SLB Access) at [c00000000ffbf390] 0:mon> d 0 0000000000000000 [ 742.793242][ C6] BUG: Kernel NULL pointer dereference at 0x00000000 [ 742.793316][ C6] Faulting instruction address: 0xc00000000014cbe0 cpu 0x6: Vector: 380 (Data SLB Access) at [c00000000ffbf390] Fixes: 46ddcb3950a2 ("powerpc/mm: Show if a bad page fault on data is read or write.") Reported-by: Nageswara R Sastry <rnsastry@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Link: https://lore.kernel.org/r/20220222113449.319193-1-mpe@ellerman.id.au --- arch/powerpc/mm/fault.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index eb8ecd7343a9..7ba6d3eff636 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -567,18 +567,24 @@ NOKPROBE_SYMBOL(hash__do_page_fault); static void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); + const char *msg; /* kernel has accessed a bad area */ + if (regs->dar < PAGE_SIZE) + msg = "Kernel NULL pointer dereference"; + else + msg = "Unable to handle kernel data access"; + switch (TRAP(regs)) { case INTERRUPT_DATA_STORAGE: - case INTERRUPT_DATA_SEGMENT: case INTERRUPT_H_DATA_STORAGE: - pr_alert("BUG: %s on %s at 0x%08lx\n", - regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : - "Unable to handle kernel data access", + pr_alert("BUG: %s on %s at 0x%08lx\n", msg, is_write ? "write" : "read", regs->dar); break; + case INTERRUPT_DATA_SEGMENT: + pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar); + break; case INTERRUPT_INST_STORAGE: case INTERRUPT_INST_SEGMENT: pr_alert("BUG: Unable to handle kernel instruction fetch%s", From 591b4b268435f00d2f0b81f786c2c7bd5ef66416 Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Wed, 23 Feb 2022 12:58:21 +1100 Subject: [PATCH 134/179] powerpc/code-patching: Pre-map patch area Paul reported a warning with DEBUG_ATOMIC_SLEEP=y: BUG: sleeping function called from invalid context at include/linux/sched/mm.h:256 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: swapper/0 preempt_count: 0, expected: 0 ... Call Trace: dump_stack_lvl+0xa0/0xec (unreliable) __might_resched+0x2f4/0x310 kmem_cache_alloc+0x220/0x4b0 __pud_alloc+0x74/0x1d0 hash__map_kernel_page+0x2cc/0x390 do_patch_instruction+0x134/0x4a0 arch_jump_label_transform+0x64/0x78 __jump_label_update+0x148/0x180 static_key_enable_cpuslocked+0xd0/0x120 static_key_enable+0x30/0x50 check_kvm_guest+0x60/0x88 pSeries_smp_probe+0x54/0xb0 smp_prepare_cpus+0x3e0/0x430 kernel_init_freeable+0x20c/0x43c kernel_init+0x30/0x1a0 ret_from_kernel_thread+0x5c/0x64 Peter pointed out that this is because do_patch_instruction() has disabled interrupts, but then map_patch_area() calls map_kernel_page() then hash__map_kernel_page() which does a sleeping memory allocation. We only see the warning in KVM guests with SMT enabled, which is not particularly common, or on other platforms if CONFIG_KPROBES is disabled, also not common. The reason we don't see it in most configurations is that another path that happens to have interrupts enabled has allocated the required page tables for us, eg. there's a path in kprobes init that does that. That's just pure luck though. As Christophe suggested, the simplest solution is to do a dummy map/unmap when we initialise the patching, so that any required page table levels are pre-allocated before the first call to do_patch_instruction(). This works because the unmap doesn't free any page tables that were allocated by the map, it just clears the PTE, leaving the page table levels there for the next map. Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> Debugged-by: Peter Zijlstra <peterz@infradead.org> Suggested-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220223015821.473097-1-mpe@ellerman.id.au --- arch/powerpc/lib/code-patching.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 906d43463366..00c68e7fb11e 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -43,9 +43,14 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) #ifdef CONFIG_STRICT_KERNEL_RWX static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); +static int map_patch_area(void *addr, unsigned long text_poke_addr); +static void unmap_patch_area(unsigned long addr); + static int text_area_cpu_up(unsigned int cpu) { struct vm_struct *area; + unsigned long addr; + int err; area = get_vm_area(PAGE_SIZE, VM_ALLOC); if (!area) { @@ -53,6 +58,15 @@ static int text_area_cpu_up(unsigned int cpu) cpu); return -1; } + + // Map/unmap the area to ensure all page tables are pre-allocated + addr = (unsigned long)area->addr; + err = map_patch_area(empty_zero_page, addr); + if (err) + return err; + + unmap_patch_area(addr); + this_cpu_write(text_poke_area, area); return 0; From acd7408d2748533d767387cb4308692fba543658 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:35 +0530 Subject: [PATCH 135/179] powerpc/bpf: Skip branch range validation during first pass During the first pass, addrs[] is still being populated. So, all branches to following instructions will appear to be going to the start of the JIT program. Ignore branch range validation for such instructions and assume those to be in range. Branch range validation will happen during the second pass after addrs[] is setup properly. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/bc517413d11636e20dbfc88503dad14bcbe391e2.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index b75507fc8f6b..25a7190bcee9 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -27,7 +27,7 @@ #define PPC_JMP(dest) \ do { \ long offset = (long)(dest) - (ctx->idx * 4); \ - if (!is_offset_in_branch_range(offset)) { \ + if ((dest) != 0 && !is_offset_in_branch_range(offset)) { \ pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ @@ -41,7 +41,7 @@ #define PPC_BCC_SHORT(cond, dest) \ do { \ long offset = (long)(dest) - (ctx->idx * 4); \ - if (!is_offset_in_cond_branch_range(offset)) { \ + if ((dest) != 0 && !is_offset_in_cond_branch_range(offset)) { \ pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ From bafb5898de5d2f15133774cb049fe55720b9c92f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:36 +0530 Subject: [PATCH 136/179] powerpc/bpf: Emit a single branch instruction for known short branch ranges PPC_BCC() emits two instructions to accommodate scenarios where we need to branch outside the range of a conditional branch. PPC_BCC_SHORT() emits a single branch instruction and can be used when the branch is known to be within a conditional branch range. Convert some of the uses of PPC_BCC() in the powerpc BPF JIT over to PPC_BCC_SHORT() where we know the branch range. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/edbca01377d1d5f472868bf6d8962b0a0d85b96f.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp32.c | 8 ++++---- arch/powerpc/net/bpf_jit_comp64.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 43643f1c1034..5ba5340a6387 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -229,7 +229,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); EMIT(PPC_RAW_CMPLW(b2p_index, _R0)); EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) @@ -238,7 +238,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT)); /* tail_call_cnt++; */ EMIT(PPC_RAW_ADDIC(_R0, _R0, 1)); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* prog = array->ptrs[index]; */ EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); @@ -251,7 +251,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * goto out; */ EMIT(PPC_RAW_CMPLWI(_R3, 0)); - PPC_BCC(COND_EQ, out); + PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); @@ -842,7 +842,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (BPF_MODE(code) == BPF_PROBE_MEM) { PPC_LI32(_R0, TASK_SIZE - off); EMIT(PPC_RAW_CMPLW(src_reg, _R0)); - PPC_BCC(COND_GT, (ctx->idx + 5) * 4); + PPC_BCC_SHORT(COND_GT, (ctx->idx + 4) * 4); EMIT(PPC_RAW_LI(dst_reg, 0)); /* * For BPF_DW case, "li reg_h,0" would be needed when diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index e1e8c934308a..b1ed8611091d 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -225,7 +225,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) @@ -233,7 +233,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o */ PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* * tail_call_cnt++; @@ -251,7 +251,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * goto out; */ EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); - PPC_BCC(COND_EQ, out); + PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); @@ -807,7 +807,7 @@ emit_clear: else /* BOOK3S_64 */ PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET); EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2])); - PPC_BCC(COND_GT, (ctx->idx + 4) * 4); + PPC_BCC_SHORT(COND_GT, (ctx->idx + 3) * 4); EMIT(PPC_RAW_LI(dst_reg, 0)); /* * Check if 'off' is word aligned because PPC_BPF_LL() From 0ffdbce6f4a89bb7c0002904d6438ec83cf05ce7 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:37 +0530 Subject: [PATCH 137/179] powerpc/bpf: Handle large branch ranges with BPF_EXIT In some scenarios, it is possible that the program epilogue is outside the branch range for a BPF_EXIT instruction. Instead of rejecting such programs, emit epilogue as an alternate exit point from the program. Track the location of the same so that subsequent exits can take either of the two paths. Reported-by: Jordan Niethe <jniethe5@gmail.com> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/33aa2e92645a92712be23b18035a2c6dcb92ff8d.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 2 ++ arch/powerpc/net/bpf_jit_comp.c | 22 +++++++++++++++++++++- arch/powerpc/net/bpf_jit_comp32.c | 7 +++++-- arch/powerpc/net/bpf_jit_comp64.c | 7 +++++-- 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 25a7190bcee9..e58cf29bb0cf 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -148,6 +148,7 @@ struct codegen_context { unsigned int stack_size; int b2p[ARRAY_SIZE(b2p)]; unsigned int exentry_idx; + unsigned int alt_exit_addr; }; #ifdef CONFIG_PPC32 @@ -183,6 +184,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr); int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, int insn_idx, int jmp_off, int dst_reg); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 56dd1f4e3e44..141e64585b64 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -89,6 +89,22 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, return 0; } +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) +{ + if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { + PPC_JMP(exit_addr); + } else if (ctx->alt_exit_addr) { + if (WARN_ON(!is_offset_in_branch_range((long)ctx->alt_exit_addr - (ctx->idx * 4)))) + return -1; + PPC_JMP(ctx->alt_exit_addr); + } else { + ctx->alt_exit_addr = ctx->idx * 4; + bpf_jit_build_epilogue(image, ctx); + } + + return 0; +} + struct powerpc64_jit_data { struct bpf_binary_header *header; u32 *addrs; @@ -177,8 +193,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * If we have seen a tail call, we need a second pass. * This is because bpf_jit_emit_common_epilogue() is called * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + * We also need a second pass if we ended up with too large + * a program so as to ensure BPF_EXIT branches are in range. */ - if (cgctx.seen & SEEN_TAILCALL) { + if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) { cgctx.idx = 0; if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { fp = org_fp; @@ -193,6 +211,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * calculate total size from idx. */ bpf_jit_build_prologue(0, &cgctx); + addrs[fp->len] = cgctx.idx * 4; bpf_jit_build_epilogue(0, &cgctx); fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; @@ -233,6 +252,7 @@ skip_init_ctx: for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; + cgctx.alt_exit_addr = 0; bpf_jit_build_prologue(code_base, &cgctx); if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) { bpf_jit_binary_free(bpf_hdr); diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 5ba5340a6387..8e743b7bf8f5 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -937,8 +937,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * the epilogue. If we _are_ the last instruction, * we'll just fall through to the epilogue. */ - if (i != flen - 1) - PPC_JMP(exit_addr); + if (i != flen - 1) { + ret = bpf_jit_emit_exit_insn(image, ctx, _R0, exit_addr); + if (ret) + return ret; + } /* else fall through to the epilogue */ break; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index b1ed8611091d..371bd5a16859 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -871,8 +871,11 @@ emit_clear: * the epilogue. If we _are_ the last instruction, * we'll just fall through to the epilogue. */ - if (i != flen - 1) - PPC_JMP(exit_addr); + if (i != flen - 1) { + ret = bpf_jit_emit_exit_insn(image, ctx, b2p[TMP_REG_1], exit_addr); + if (ret) + return ret; + } /* else fall through to the epilogue */ break; From c2067f7f88830cdd020c775ffefe84a8177337af Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:38 +0530 Subject: [PATCH 138/179] powerpc64/bpf: Do not save/restore LR on each call to bpf_stf_barrier() Instead of saving and restoring LR before each invocation to bpf_stf_barrier(), set SEEN_FUNC flag so that we save/restore LR in prologue/epilogue. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/4446f25478d82a2a4ac9dab2ebdfd88ddf923eb7.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 371bd5a16859..27ac2fc76702 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -690,11 +690,10 @@ emit_clear: EMIT(PPC_RAW_ORI(_R31, _R31, 0)); break; case STF_BARRIER_FALLBACK: - EMIT(PPC_RAW_MFLR(b2p[TMP_REG_1])); + ctx->seen |= SEEN_FUNC; PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier)); EMIT(PPC_RAW_MTCTR(12)); EMIT(PPC_RAW_BCTRL()); - EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); break; case STF_BARRIER_NONE: break; From 1d4866d5652f7a19dcbed0c4e366c3402c7775b7 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:39 +0530 Subject: [PATCH 139/179] powerpc64/bpf: Use r12 for constant blinding In preparation for preserving kernel toc in r2, switch BPF_REG_AX from r2 to r12. r12 is not used by bpf JIT except during external helper/bpf calls, or with BPF_NOSPEC. These sequences aren't emitted when BPF_REG_AX is used for constant blinding and other purposes. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/e109f98617eacb4512c17a48525e94eda42889e6.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index b63b35e45e55..82cdfee41278 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -56,7 +56,7 @@ const int b2p[MAX_BPF_JIT_REG + 2] = { /* frame pointer aka BPF_REG_10 */ [BPF_REG_FP] = 31, /* eBPF jit internal registers */ - [BPF_REG_AX] = 2, + [BPF_REG_AX] = 12, [TMP_REG_1] = 9, [TMP_REG_2] = 10 }; From 4eeac2b0aaadc3d1943d348d8565f7cfb93272b9 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:40 +0530 Subject: [PATCH 140/179] powerpc64: Set PPC64_ELF_ABI_v[1|2] macros to 1 Set macros to 1 so that they can be used with __is_defined(). Suggested-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/abad4868416ddfd42893f99c0cad8e5faf998095.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/include/asm/types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h index 97da77bc48c9..84078c28c1a2 100644 --- a/arch/powerpc/include/asm/types.h +++ b/arch/powerpc/include/asm/types.h @@ -13,9 +13,9 @@ #ifdef __powerpc64__ #if defined(_CALL_ELF) && _CALL_ELF == 2 -#define PPC64_ELF_ABI_v2 +#define PPC64_ELF_ABI_v2 1 #else -#define PPC64_ELF_ABI_v1 +#define PPC64_ELF_ABI_v1 1 #endif #endif /* __powerpc64__ */ From b10cb163c4b31b03ac5014abbfd0b868913fd8e3 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:41 +0530 Subject: [PATCH 141/179] powerpc64/bpf elfv2: Setup kernel TOC in r2 on entry In preparation for using kernel TOC, load the same in r2 on entry. With elfv1, the kernel TOC is already setup by our caller. We adjust the number of instructions to skip on a tail call accordingly. We get rid of the #ifdef in bpf_jit_emit_tail_call() since FUNCTION_DESCR_SIZE is itself under a #ifdef. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/18a05a4ceec14a8617c9dd4b7128d0afa83fd14e.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 27ac2fc76702..44314ee60155 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -73,6 +73,9 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; + if (__is_defined(PPC64_ELF_ABI_v2)) + PPC_BPF_LL(_R2, _R13, offsetof(struct paca_struct, kernel_toc)); + /* * Initialize tail_call_cnt if we do tail calls. * Otherwise, put in NOPs so that it can be skipped when we are @@ -87,8 +90,6 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_NOP()); } -#define BPF_TAILCALL_PROLOGUE_SIZE 8 - if (bpf_has_stack_frame(ctx)) { /* * We need a stack frame, but we don't necessarily need to @@ -217,6 +218,10 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o */ int b2p_bpf_array = b2p[BPF_REG_2]; int b2p_index = b2p[BPF_REG_3]; + int bpf_tailcall_prologue_size = 8; + + if (__is_defined(PPC64_ELF_ABI_v2)) + bpf_tailcall_prologue_size += 4; /* skip past the toc load */ /* * if (index >= array->map.max_entries) @@ -255,13 +260,8 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* goto *(prog->bpf_func + prologue_size); */ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); -#ifdef PPC64_ELF_ABI_v1 - /* skip past the function descriptor */ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], - FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE)); -#else - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE)); -#endif + FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); /* tear down stack, restore NVRs, ... */ From 43d636f8b4fd2ee668e75e835fae2fcf4bc0f699 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:42 +0530 Subject: [PATCH 142/179] powerpc64/bpf elfv1: Do not load TOC before calling functions BPF helpers always reside in core kernel and all BPF programs use the kernel TOC. As such, there is no need to load the TOC before calling helpers or other BPF functions. Drop code to do the same. Add a check to ensure we don't proceed if this assumption ever changes in future. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/a3cd3da4d24d95d845cd10382b1af083600c9074.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 2 +- arch/powerpc/net/bpf_jit_comp.c | 4 +++- arch/powerpc/net/bpf_jit_comp32.c | 8 +++++-- arch/powerpc/net/bpf_jit_comp64.c | 39 ++++++++++++++++--------------- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index e58cf29bb0cf..ea384ae836cc 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -178,7 +178,7 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) ctx->seen &= ~(1 << (31 - i)); } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, int pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 141e64585b64..635f7448ff79 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -59,7 +59,9 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, */ tmp_idx = ctx->idx; ctx->idx = addrs[i] / 4; - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + if (ret) + return ret; /* * Restore ctx->idx here. This is safe as the length diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 8e743b7bf8f5..511d2a203e7d 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -193,7 +193,7 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_BLR()); } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { s32 rel = (s32)func - (s32)(image + ctx->idx); @@ -209,6 +209,8 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun EMIT(PPC_RAW_MTCTR(_R0)); EMIT(PPC_RAW_BCTRL()); } + + return 0; } static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -961,7 +963,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), _R1, 12)); } - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + if (ret) + return ret; EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, _R3)); EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), _R4)); diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 44314ee60155..e9fd4694226f 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -147,9 +147,13 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_BLR()); } -static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, - u64 func) +static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func) { + unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; + + if (WARN_ON_ONCE(!core_kernel_text(func_addr))) + return -EINVAL; + #ifdef PPC64_ELF_ABI_v1 /* func points to the function descriptor */ PPC_LI64(b2p[TMP_REG_2], func); @@ -157,25 +161,23 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); /* ... and move it to CTR */ EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); - /* - * Load TOC from function descriptor at offset 8. - * We can clobber r2 since we get called through a - * function pointer (so caller will save/restore r2) - * and since we don't use a TOC ourself. - */ - PPC_BPF_LL(2, b2p[TMP_REG_2], 8); #else /* We can clobber r12 */ PPC_FUNC_ADDR(12, func); EMIT(PPC_RAW_MTCTR(12)); #endif EMIT(PPC_RAW_BCTRL()); + + return 0; } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { unsigned int i, ctx_idx = ctx->idx; + if (WARN_ON_ONCE(func && is_module_text_address(func))) + return -EINVAL; + /* Load function address into r12 */ PPC_LI64(12, func); @@ -193,19 +195,14 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun EMIT(PPC_RAW_NOP()); #ifdef PPC64_ELF_ABI_v1 - /* - * Load TOC from function descriptor at offset 8. - * We can clobber r2 since we get called through a - * function pointer (so caller will save/restore r2) - * and since we don't use a TOC ourself. - */ - PPC_BPF_LL(2, 12, 8); /* Load actual entry point from function descriptor */ PPC_BPF_LL(12, 12, 0); #endif EMIT(PPC_RAW_MTCTR(12)); EMIT(PPC_RAW_BCTRL()); + + return 0; } static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -890,9 +887,13 @@ emit_clear: return ret; if (func_addr_fixed) - bpf_jit_emit_func_call_hlp(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr); else - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + if (ret) + return ret; + /* move return value from r3 to BPF_REG_0 */ EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); break; From feb6307289d85262c5aed04d6f192d38abba7c45 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:43 +0530 Subject: [PATCH 143/179] powerpc64/bpf: Optimize instruction sequence used for function calls When calling BPF helpers, we load the function address to call into a register. This can result in upto 5 instructions. Optimize this by instead using the kernel toc in r2 and adjusting offset to the BPF helper. This works since all BPF helpers are part of kernel text, and all BPF programs/functions utilize the kernel TOC. Further more: - load the actual function entry address in elf v1, rather than loading it through the function descriptor address. - load the Local Entry Point (LEP) in elf v2 skipping TOC setup. - consolidate code across elf abi v1 and v2 by using r12 on both. Reported-by: Anton Blanchard <anton@ozlabs.org> Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/1233c7544e60dcb021c52b1f840b0f21a87b33ed.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index e9fd4694226f..bff200723e72 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -150,22 +150,20 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func) { unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; + long reladdr; if (WARN_ON_ONCE(!core_kernel_text(func_addr))) return -EINVAL; -#ifdef PPC64_ELF_ABI_v1 - /* func points to the function descriptor */ - PPC_LI64(b2p[TMP_REG_2], func); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); - /* ... and move it to CTR */ - EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); -#else - /* We can clobber r12 */ - PPC_FUNC_ADDR(12, func); - EMIT(PPC_RAW_MTCTR(12)); -#endif + reladdr = func_addr - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("eBPF: address of %ps out of range of kernel_toc.\n", (void *)func); + return -ERANGE; + } + + EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr))); + EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr))); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); return 0; @@ -178,6 +176,9 @@ int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func if (WARN_ON_ONCE(func && is_module_text_address(func))) return -EINVAL; + /* skip past descriptor if elf v1 */ + func += FUNCTION_DESCR_SIZE; + /* Load function address into r12 */ PPC_LI64(12, func); @@ -194,11 +195,6 @@ int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func for (i = ctx->idx - ctx_idx; i < 5; i++) EMIT(PPC_RAW_NOP()); -#ifdef PPC64_ELF_ABI_v1 - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(12, 12, 0); -#endif - EMIT(PPC_RAW_MTCTR(12)); EMIT(PPC_RAW_BCTRL()); From 74bbe3f08463c48a27088be4823a5803b7f7d9a1 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:44 +0530 Subject: [PATCH 144/179] powerpc/bpf: Rename PPC_BL_ABS() to PPC_BL() PPC_BL_ABS() is just doing a relative branch with link. The name suggests that it is for branching to an absolute address, which is incorrect. Rename the macro to a more appropriate PPC_BL(). Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/f0e57b6c7a6ee40dba645535b70da46f46e8af5e.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 6 +++--- arch/powerpc/net/bpf_jit_comp32.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index ea384ae836cc..dd1b338ba064 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -34,9 +34,9 @@ EMIT(PPC_RAW_BRANCH(offset)); \ } while (0) -/* blr; (unconditional 'branch' with link) to absolute address */ -#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \ - (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) +/* bl (unconditional 'branch' with link) */ +#define PPC_BL(dest) EMIT(PPC_INST_BL | (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) + /* "cond" here covers BO:BI fields. */ #define PPC_BCC_SHORT(cond, dest) \ do { \ diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 511d2a203e7d..b72fac52c3ca 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -198,7 +198,7 @@ int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func s32 rel = (s32)func - (s32)(image + ctx->idx); if (image && rel < 0x2000000 && rel >= -0x2000000) { - PPC_BL_ABS(func); + PPC_BL(func); EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); From 391c271f4deb7356482d12f962a6fc018b6a3fb0 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:45 +0530 Subject: [PATCH 145/179] powerpc64/bpf: Convert some of the uses of PPC_BPF_[LL|STL] to PPC_BPF_[LD|STD] PPC_BPF_[LL|STL] are macros meant for scenarios where we may have to deal with a non-word aligned offset. Limit their usage to only those scenarios by converting the rest to just use PPC_BPF_[LD|STD]. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/0eb472428165a307f6fdaf22b0c33cbf13a9a635.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index bff200723e72..411ac41dba42 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -74,7 +74,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) int i; if (__is_defined(PPC64_ELF_ABI_v2)) - PPC_BPF_LL(_R2, _R13, offsetof(struct paca_struct, kernel_toc)); + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); /* * Initialize tail_call_cnt if we do tail calls. @@ -84,7 +84,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) if (ctx->seen & SEEN_TAILCALL) { EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); /* this goes in the redzone */ - PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); + EMIT(PPC_RAW_STD(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8))); } else { EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); @@ -97,7 +97,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ if (ctx->seen & SEEN_FUNC) { EMIT(PPC_RAW_MFLR(_R0)); - PPC_BPF_STL(0, 1, PPC_LR_STKOFF); + EMIT(PPC_RAW_STD(0, 1, PPC_LR_STKOFF)); } PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); @@ -110,7 +110,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) if (bpf_is_seen_register(ctx, b2p[i])) - PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); + EMIT(PPC_RAW_STD(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]))); /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) @@ -125,13 +125,13 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) if (bpf_is_seen_register(ctx, b2p[i])) - PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); + EMIT(PPC_RAW_LD(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]))); /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); if (ctx->seen & SEEN_FUNC) { - PPC_BPF_LL(0, 1, PPC_LR_STKOFF); + EMIT(PPC_RAW_LD(0, 1, PPC_LR_STKOFF)); EMIT(PPC_RAW_MTLR(0)); } } @@ -229,7 +229,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; */ - PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + EMIT(PPC_RAW_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx))); EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); PPC_BCC_SHORT(COND_GE, out); @@ -237,12 +237,12 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * tail_call_cnt++; */ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); - PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + EMIT(PPC_RAW_STD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx))); /* prog = array->ptrs[index]; */ EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); + EMIT(PPC_RAW_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs))); /* * if (prog == NULL) @@ -252,7 +252,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); + EMIT(PPC_RAW_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func))); EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); @@ -628,7 +628,7 @@ bpf_alu32_trunc: break; case 64: /* Store the value to stack and then use byte-reverse loads */ - PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); + EMIT(PPC_RAW_STD(dst_reg, 1, bpf_jit_stack_local(ctx))); EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); if (cpu_has_feature(CPU_FTR_ARCH_206)) { EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); From 794abc08d75e9f2833f493090af14b748e182c5f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:46 +0530 Subject: [PATCH 146/179] powerpc64/bpf: Get rid of PPC_BPF_[LL|STL|STLU] macros All these macros now have a single user. Expand their usage in place. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/e0526fc7633a34f983a7a330712b55bdfaf20482.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit64.h | 22 ---------------------- arch/powerpc/net/bpf_jit_comp64.c | 21 +++++++++++++++------ 2 files changed, 15 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 82cdfee41278..199348b72966 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -64,28 +64,6 @@ const int b2p[MAX_BPF_JIT_REG + 2] = { /* PPC NVR range -- update this if we ever use NVRs below r27 */ #define BPF_PPC_NVR_MIN 27 -/* - * WARNING: These can use TMP_REG_2 if the offset is not at word boundary, - * so ensure that it isn't in use already. - */ -#define PPC_BPF_LL(r, base, i) do { \ - if ((i) % 4) { \ - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ - EMIT(PPC_RAW_LDX(r, base, \ - b2p[TMP_REG_2])); \ - } else \ - EMIT(PPC_RAW_LD(r, base, i)); \ - } while(0) -#define PPC_BPF_STL(r, base, i) do { \ - if ((i) % 4) { \ - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ - EMIT(PPC_RAW_STDX(r, base, \ - b2p[TMP_REG_2])); \ - } else \ - EMIT(PPC_RAW_STD(r, base, i)); \ - } while(0) -#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0) - #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 411ac41dba42..eeda636cd7be 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -100,7 +100,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_STD(0, 1, PPC_LR_STKOFF)); } - PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); + EMIT(PPC_RAW_STDU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size))); } /* @@ -726,7 +726,12 @@ emit_clear: PPC_LI32(b2p[TMP_REG_1], imm); src_reg = b2p[TMP_REG_1]; } - PPC_BPF_STL(src_reg, dst_reg, off); + if (off % 4) { + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], off)); + EMIT(PPC_RAW_STDX(src_reg, dst_reg, b2p[TMP_REG_2])); + } else { + EMIT(PPC_RAW_STD(src_reg, dst_reg, off)); + } break; /* @@ -802,9 +807,8 @@ emit_clear: PPC_BCC_SHORT(COND_GT, (ctx->idx + 3) * 4); EMIT(PPC_RAW_LI(dst_reg, 0)); /* - * Check if 'off' is word aligned because PPC_BPF_LL() - * (BPF_DW case) generates two instructions if 'off' is not - * word-aligned and one instruction otherwise. + * Check if 'off' is word aligned for BPF_DW, because + * we might generate two instructions. */ if (BPF_SIZE(code) == BPF_DW && (off & 3)) PPC_JMP((ctx->idx + 3) * 4); @@ -823,7 +827,12 @@ emit_clear: EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); break; case BPF_DW: - PPC_BPF_LL(dst_reg, src_reg, off); + if (off % 4) { + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], off)); + EMIT(PPC_RAW_LDX(dst_reg, src_reg, b2p[TMP_REG_1])); + } else { + EMIT(PPC_RAW_LD(dst_reg, src_reg, off)); + } break; } From 7b187dcdb5d348aa916dcda769313512c08e85a5 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:47 +0530 Subject: [PATCH 147/179] powerpc/bpf: Cleanup bpf_jit.h - PPC_EX32() is only used by ppc32 JIT. Move it to bpf_jit_comp32.c - PPC_LI64() is only valid in ppc64. #ifdef it - PPC_FUNC_ADDR() is not used anymore. Remove it. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/58f5b66b2f8546bbbee620f62103a8e97a63eb7c.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 10 +--------- arch/powerpc/net/bpf_jit_comp32.c | 2 ++ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index dd1b338ba064..42a9adda31eb 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -59,10 +59,7 @@ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) -#ifdef CONFIG_PPC32 -#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) -#endif - +#ifdef CONFIG_PPC64 #define PPC_LI64(d, i) do { \ if ((long)(i) >= -2147483648 && \ (long)(i) < 2147483648) \ @@ -85,11 +82,6 @@ EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ 0xffff)); \ } } while (0) - -#ifdef CONFIG_PPC64 -#define PPC_FUNC_ADDR(d,i) do { PPC_LI64(d, i); } while(0) -#else -#define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0) #endif /* diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index b72fac52c3ca..1dda7e3a3e9b 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -36,6 +36,8 @@ /* BPF register usage */ #define TMP_REG (MAX_BPF_JIT_REG + 0) +#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) + /* BPF to ppc register mappings */ const int b2p[MAX_BPF_JIT_REG + 1] = { /* function return value */ From 576a6c3a00c1a2a3645e039b126b52f6c7755e54 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:48 +0530 Subject: [PATCH 148/179] powerpc/bpf: Move bpf_jit64.h into bpf_jit_comp64.c There is no need for a separate header anymore. Move the contents of bpf_jit64.h into bpf_jit_comp64.c Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/b873a8e6eff7d91bf2a2cabdd53082aadfe20761.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit64.h | 69 ------------------------------- arch/powerpc/net/bpf_jit_comp64.c | 54 +++++++++++++++++++++++- 2 files changed, 53 insertions(+), 70 deletions(-) delete mode 100644 arch/powerpc/net/bpf_jit64.h diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h deleted file mode 100644 index 199348b72966..000000000000 --- a/arch/powerpc/net/bpf_jit64.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * bpf_jit64.h: BPF JIT compiler for PPC64 - * - * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> - * IBM Corporation - */ -#ifndef _BPF_JIT64_H -#define _BPF_JIT64_H - -#include "bpf_jit.h" - -/* - * Stack layout: - * Ensure the top half (upto local_tmp_var) stays consistent - * with our redzone usage. - * - * [ prev sp ] <------------- - * [ nv gpr save area ] 5*8 | - * [ tail_call_cnt ] 8 | - * [ local_tmp_var ] 16 | - * fp (r31) --> [ ebpf stack space ] upto 512 | - * [ frame header ] 32/112 | - * sp (r1) ---> [ stack pointer ] -------------- - */ - -/* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (5*8) -/* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 24 -/* stack frame excluding BPF stack, ensure this is quadword aligned */ -#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ - BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) - -#ifndef __ASSEMBLY__ - -/* BPF register usage */ -#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) -#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) - -/* BPF to ppc register mappings */ -const int b2p[MAX_BPF_JIT_REG + 2] = { - /* function return value */ - [BPF_REG_0] = 8, - /* function arguments */ - [BPF_REG_1] = 3, - [BPF_REG_2] = 4, - [BPF_REG_3] = 5, - [BPF_REG_4] = 6, - [BPF_REG_5] = 7, - /* non volatile registers */ - [BPF_REG_6] = 27, - [BPF_REG_7] = 28, - [BPF_REG_8] = 29, - [BPF_REG_9] = 30, - /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = 31, - /* eBPF jit internal registers */ - [BPF_REG_AX] = 12, - [TMP_REG_1] = 9, - [TMP_REG_2] = 10 -}; - -/* PPC NVR range -- update this if we ever use NVRs below r27 */ -#define BPF_PPC_NVR_MIN 27 - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index eeda636cd7be..3e4ed5560947 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -17,7 +17,59 @@ #include <linux/bpf.h> #include <asm/security_features.h> -#include "bpf_jit64.h" +#include "bpf_jit.h" + +/* + * Stack layout: + * Ensure the top half (upto local_tmp_var) stays consistent + * with our redzone usage. + * + * [ prev sp ] <------------- + * [ nv gpr save area ] 5*8 | + * [ tail_call_cnt ] 8 | + * [ local_tmp_var ] 16 | + * fp (r31) --> [ ebpf stack space ] upto 512 | + * [ frame header ] 32/112 | + * sp (r1) ---> [ stack pointer ] -------------- + */ + +/* for gpr non volatile registers BPG_REG_6 to 10 */ +#define BPF_PPC_STACK_SAVE (5*8) +/* for bpf JIT code internal usage */ +#define BPF_PPC_STACK_LOCALS 24 +/* stack frame excluding BPF stack, ensure this is quadword aligned */ +#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ + BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) + +/* BPF register usage */ +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) + +/* BPF to ppc register mappings */ +const int b2p[MAX_BPF_JIT_REG + 2] = { + /* function return value */ + [BPF_REG_0] = 8, + /* function arguments */ + [BPF_REG_1] = 3, + [BPF_REG_2] = 4, + [BPF_REG_3] = 5, + [BPF_REG_4] = 6, + [BPF_REG_5] = 7, + /* non volatile registers */ + [BPF_REG_6] = 27, + [BPF_REG_7] = 28, + [BPF_REG_8] = 29, + [BPF_REG_9] = 30, + /* frame pointer aka BPF_REG_10 */ + [BPF_REG_FP] = 31, + /* eBPF jit internal registers */ + [BPF_REG_AX] = 12, + [TMP_REG_1] = 9, + [TMP_REG_2] = 10 +}; + +/* PPC NVR range -- update this if we ever use NVRs below r27 */ +#define BPF_PPC_NVR_MIN 27 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { From 036d559c0bdea75bf4840ba6780790d08572480c Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:49 +0530 Subject: [PATCH 149/179] powerpc/bpf: Use _Rn macros for GPRs Use _Rn macros to specify register names to make their usage clear. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/7df626b8cdc6141d4295ac16137c82ad570b6637.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp32.c | 30 +++++++------- arch/powerpc/net/bpf_jit_comp64.c | 68 +++++++++++++++---------------- 2 files changed, 49 insertions(+), 49 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 1dda7e3a3e9b..1c86b489232a 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -41,23 +41,23 @@ /* BPF to ppc register mappings */ const int b2p[MAX_BPF_JIT_REG + 1] = { /* function return value */ - [BPF_REG_0] = 12, + [BPF_REG_0] = _R12, /* function arguments */ - [BPF_REG_1] = 4, - [BPF_REG_2] = 6, - [BPF_REG_3] = 8, - [BPF_REG_4] = 10, - [BPF_REG_5] = 22, + [BPF_REG_1] = _R4, + [BPF_REG_2] = _R6, + [BPF_REG_3] = _R8, + [BPF_REG_4] = _R10, + [BPF_REG_5] = _R22, /* non volatile registers */ - [BPF_REG_6] = 24, - [BPF_REG_7] = 26, - [BPF_REG_8] = 28, - [BPF_REG_9] = 30, + [BPF_REG_6] = _R24, + [BPF_REG_7] = _R26, + [BPF_REG_8] = _R28, + [BPF_REG_9] = _R30, /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = 18, + [BPF_REG_FP] = _R18, /* eBPF jit internal registers */ - [BPF_REG_AX] = 20, - [TMP_REG] = 31, /* 32 bits */ + [BPF_REG_AX] = _R20, + [TMP_REG] = _R31, /* 32 bits */ }; static int bpf_to_ppc(struct codegen_context *ctx, int reg) @@ -66,8 +66,8 @@ static int bpf_to_ppc(struct codegen_context *ctx, int reg) } /* PPC NVR range -- update this if we ever use NVRs below r17 */ -#define BPF_PPC_NVR_MIN 17 -#define BPF_PPC_TC 16 +#define BPF_PPC_NVR_MIN _R17 +#define BPF_PPC_TC _R16 static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) { diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 3e4ed5560947..ac06efa70223 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -48,28 +48,28 @@ /* BPF to ppc register mappings */ const int b2p[MAX_BPF_JIT_REG + 2] = { /* function return value */ - [BPF_REG_0] = 8, + [BPF_REG_0] = _R8, /* function arguments */ - [BPF_REG_1] = 3, - [BPF_REG_2] = 4, - [BPF_REG_3] = 5, - [BPF_REG_4] = 6, - [BPF_REG_5] = 7, + [BPF_REG_1] = _R3, + [BPF_REG_2] = _R4, + [BPF_REG_3] = _R5, + [BPF_REG_4] = _R6, + [BPF_REG_5] = _R7, /* non volatile registers */ - [BPF_REG_6] = 27, - [BPF_REG_7] = 28, - [BPF_REG_8] = 29, - [BPF_REG_9] = 30, + [BPF_REG_6] = _R27, + [BPF_REG_7] = _R28, + [BPF_REG_8] = _R29, + [BPF_REG_9] = _R30, /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = 31, + [BPF_REG_FP] = _R31, /* eBPF jit internal registers */ - [BPF_REG_AX] = 12, - [TMP_REG_1] = 9, - [TMP_REG_2] = 10 + [BPF_REG_AX] = _R12, + [TMP_REG_1] = _R9, + [TMP_REG_2] = _R10 }; /* PPC NVR range -- update this if we ever use NVRs below r27 */ -#define BPF_PPC_NVR_MIN 27 +#define BPF_PPC_NVR_MIN _R27 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { @@ -136,7 +136,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) if (ctx->seen & SEEN_TAILCALL) { EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); /* this goes in the redzone */ - EMIT(PPC_RAW_STD(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8))); + EMIT(PPC_RAW_STD(b2p[TMP_REG_1], _R1, -(BPF_PPC_STACK_SAVE + 8))); } else { EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); @@ -149,10 +149,10 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ if (ctx->seen & SEEN_FUNC) { EMIT(PPC_RAW_MFLR(_R0)); - EMIT(PPC_RAW_STD(0, 1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF)); } - EMIT(PPC_RAW_STDU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size))); + EMIT(PPC_RAW_STDU(_R1, _R1, -(BPF_PPC_STACKFRAME + ctx->stack_size))); } /* @@ -162,11 +162,11 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) if (bpf_is_seen_register(ctx, b2p[i])) - EMIT(PPC_RAW_STD(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]))); + EMIT(PPC_RAW_STD(b2p[i], _R1, bpf_jit_stack_offsetof(ctx, b2p[i]))); /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) - EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, + EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -177,14 +177,14 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) if (bpf_is_seen_register(ctx, b2p[i])) - EMIT(PPC_RAW_LD(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]))); + EMIT(PPC_RAW_LD(b2p[i], _R1, bpf_jit_stack_offsetof(ctx, b2p[i]))); /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { - EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); + EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME + ctx->stack_size)); if (ctx->seen & SEEN_FUNC) { - EMIT(PPC_RAW_LD(0, 1, PPC_LR_STKOFF)); - EMIT(PPC_RAW_MTLR(0)); + EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTLR(_R0)); } } } @@ -194,7 +194,7 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); /* Move result to r3 */ - EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0])); + EMIT(PPC_RAW_MR(_R3, b2p[BPF_REG_0])); EMIT(PPC_RAW_BLR()); } @@ -232,7 +232,7 @@ int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func func += FUNCTION_DESCR_SIZE; /* Load function address into r12 */ - PPC_LI64(12, func); + PPC_LI64(_R12, func); /* For bpf-to-bpf function calls, the callee's address is unknown * until the last extra pass. As seen above, we use PPC_LI64() to @@ -247,7 +247,7 @@ int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func for (i = ctx->idx - ctx_idx; i < 5; i++) EMIT(PPC_RAW_NOP()); - EMIT(PPC_RAW_MTCTR(12)); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); return 0; @@ -281,7 +281,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; */ - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx))); + EMIT(PPC_RAW_LD(b2p[TMP_REG_1], _R1, bpf_jit_stack_tailcallcnt(ctx))); EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); PPC_BCC_SHORT(COND_GE, out); @@ -289,7 +289,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * tail_call_cnt++; */ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); - EMIT(PPC_RAW_STD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx))); + EMIT(PPC_RAW_STD(b2p[TMP_REG_1], _R1, bpf_jit_stack_tailcallcnt(ctx))); /* prog = array->ptrs[index]; */ EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); @@ -680,8 +680,8 @@ bpf_alu32_trunc: break; case 64: /* Store the value to stack and then use byte-reverse loads */ - EMIT(PPC_RAW_STD(dst_reg, 1, bpf_jit_stack_local(ctx))); - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_STD(dst_reg, _R1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], _R1, bpf_jit_stack_local(ctx))); if (cpu_has_feature(CPU_FTR_ARCH_206)) { EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); } else { @@ -736,8 +736,8 @@ emit_clear: break; case STF_BARRIER_FALLBACK: ctx->seen |= SEEN_FUNC; - PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier)); - EMIT(PPC_RAW_MTCTR(12)); + PPC_LI64(_R12, dereference_kernel_function_descriptor(bpf_stf_barrier)); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); break; case STF_BARRIER_NONE: @@ -952,7 +952,7 @@ emit_clear: return ret; /* move return value from r3 to BPF_REG_0 */ - EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); + EMIT(PPC_RAW_MR(b2p[BPF_REG_0], _R3)); break; /* From 3a3fc9bf103974d9a886fa37087d5d491c806e00 Mon Sep 17 00:00:00 2001 From: Jordan Niethe <jniethe5@gmail.com> Date: Mon, 14 Feb 2022 16:11:50 +0530 Subject: [PATCH 150/179] powerpc64/bpf: Store temp registers' bpf to ppc mapping In bpf_jit_build_body(), the mapping of TMP_REG_1 and TMP_REG_2's bpf register to ppc register is evalulated at every use despite not changing. Instead, determine the ppc register once and store the result. Signed-off-by: Jordan Niethe <jniethe5@gmail.com> [Rebased, converted additional usage sites] Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/0944e2f0fa6dd254ea401f1c946fb6c9a5294278.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 197 +++++++++++++----------------- 1 file changed, 86 insertions(+), 111 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index ac06efa70223..b4de0c35c8a4 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -357,6 +357,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 dst_reg = b2p[insn[i].dst_reg]; u32 src_reg = b2p[insn[i].src_reg]; u32 size = BPF_SIZE(code); + u32 tmp1_reg = b2p[TMP_REG_1]; + u32 tmp2_reg = b2p[TMP_REG_2]; s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -407,8 +409,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } else if (imm >= -32768 && imm < 32768) { EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); } else { - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ @@ -418,8 +420,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } else if (imm > -32768 && imm <= 32768) { EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(-imm))); } else { - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ @@ -434,32 +436,28 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm >= -32768 && imm < 32768) EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm))); else { - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); if (BPF_CLASS(code) == BPF_ALU) - EMIT(PPC_RAW_MULW(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_MULD(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULD(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg)); - EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(tmp1_reg, src_reg, tmp1_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg)); - EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULD(tmp1_reg, src_reg, tmp1_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg)); break; @@ -478,35 +476,23 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } } - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); switch (BPF_CLASS(code)) { case BPF_ALU: if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2], - dst_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], - b2p[TMP_REG_1], - b2p[TMP_REG_2])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(tmp2_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_MULW(tmp1_reg, tmp1_reg, tmp2_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else - EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, tmp1_reg)); break; case BPF_ALU64: if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2], - dst_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], - b2p[TMP_REG_1], - b2p[TMP_REG_2])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(tmp2_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_MULD(tmp1_reg, tmp1_reg, tmp2_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else - EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, tmp1_reg)); break; } goto bpf_alu32_trunc; @@ -528,8 +514,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); else { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ @@ -540,8 +526,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp1_reg)); } else { if (IMM_L(imm)) EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); @@ -557,8 +543,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, tmp1_reg)); } else { if (IMM_L(imm)) EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); @@ -659,11 +645,11 @@ bpf_alu32_trunc: switch (imm) { case 16: /* Rotate 8 bits left & mask with 0x0000ff00 */ - EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23)); + EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 16, 23)); /* Rotate 8 bits right & insert LSB to reg */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31)); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 24, 31)); /* Move result back to dst_reg */ - EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_MR(dst_reg, tmp1_reg)); break; case 32: /* @@ -671,28 +657,28 @@ bpf_alu32_trunc: * 2 bytes are already in their final position * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) */ - EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23)); - EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, tmp1_reg)); break; case 64: /* Store the value to stack and then use byte-reverse loads */ EMIT(PPC_RAW_STD(dst_reg, _R1, bpf_jit_stack_local(ctx))); - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], _R1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_ADDI(tmp1_reg, _R1, bpf_jit_stack_local(ctx))); if (cpu_has_feature(CPU_FTR_ARCH_206)) { - EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LDBRX(dst_reg, 0, tmp1_reg)); } else { - EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LWBRX(dst_reg, 0, tmp1_reg)); if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32)); - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4)); - EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1])); + EMIT(PPC_RAW_LI(tmp2_reg, 4)); + EMIT(PPC_RAW_LWBRX(tmp2_reg, tmp2_reg, tmp1_reg)); if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) - EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32)); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2])); + EMIT(PPC_RAW_SLDI(tmp2_reg, tmp2_reg, 32)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp2_reg)); } break; } @@ -731,7 +717,7 @@ emit_clear: break; case STF_BARRIER_SYNC_ORI: EMIT(PPC_RAW_SYNC()); - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], _R13, 0)); + EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0)); EMIT(PPC_RAW_ORI(_R31, _R31, 0)); break; case STF_BARRIER_FALLBACK: @@ -751,36 +737,36 @@ emit_clear: case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); - src_reg = b2p[TMP_REG_1]; + EMIT(PPC_RAW_LI(tmp1_reg, imm)); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); - src_reg = b2p[TMP_REG_1]; + EMIT(PPC_RAW_LI(tmp1_reg, imm)); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI32(b2p[TMP_REG_1], imm); - src_reg = b2p[TMP_REG_1]; + PPC_LI32(tmp1_reg, imm); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI32(b2p[TMP_REG_1], imm); - src_reg = b2p[TMP_REG_1]; + PPC_LI32(tmp1_reg, imm); + src_reg = tmp1_reg; } if (off % 4) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], off)); - EMIT(PPC_RAW_STDX(src_reg, dst_reg, b2p[TMP_REG_2])); + EMIT(PPC_RAW_LI(tmp2_reg, off)); + EMIT(PPC_RAW_STDX(src_reg, dst_reg, tmp2_reg)); } else { EMIT(PPC_RAW_STD(src_reg, dst_reg, off)); } @@ -800,14 +786,14 @@ emit_clear: /* *(u32 *)(dst + off) += src */ /* Get EA into TMP_REG_1 */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); + EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off)); tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ - EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_LWARX(tmp2_reg, 0, tmp1_reg, 0)); /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); + EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); /* store result back */ - EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_STWCX(tmp2_reg, 0, tmp1_reg)); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); break; @@ -820,11 +806,11 @@ emit_clear: } /* *(u64 *)(dst + off) += src */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); + EMIT(PPC_RAW_ADDI(tmp1_reg, dst_reg, off)); tmp_idx = ctx->idx * 4; - EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); - EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); - EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LDARX(tmp2_reg, 0, tmp1_reg, 0)); + EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); + EMIT(PPC_RAW_STDCX(tmp2_reg, 0, tmp1_reg)); PPC_BCC_SHORT(COND_NE, tmp_idx); break; @@ -850,12 +836,12 @@ emit_clear: * set dst_reg=0 and move on. */ if (BPF_MODE(code) == BPF_PROBE_MEM) { - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off)); + EMIT(PPC_RAW_ADDI(tmp1_reg, src_reg, off)); if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) - PPC_LI64(b2p[TMP_REG_2], 0x8000000000000000ul); + PPC_LI64(tmp2_reg, 0x8000000000000000ul); else /* BOOK3S_64 */ - PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET); - EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2])); + PPC_LI64(tmp2_reg, PAGE_OFFSET); + EMIT(PPC_RAW_CMPLD(tmp1_reg, tmp2_reg)); PPC_BCC_SHORT(COND_GT, (ctx->idx + 3) * 4); EMIT(PPC_RAW_LI(dst_reg, 0)); /* @@ -880,8 +866,8 @@ emit_clear: break; case BPF_DW: if (off % 4) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], off)); - EMIT(PPC_RAW_LDX(dst_reg, src_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_LI(tmp1_reg, off)); + EMIT(PPC_RAW_LDX(dst_reg, src_reg, tmp1_reg)); } else { EMIT(PPC_RAW_LD(dst_reg, src_reg, off)); } @@ -925,7 +911,7 @@ emit_clear: * we'll just fall through to the epilogue. */ if (i != flen - 1) { - ret = bpf_jit_emit_exit_insn(image, ctx, b2p[TMP_REG_1], exit_addr); + ret = bpf_jit_emit_exit_insn(image, ctx, tmp1_reg, exit_addr); if (ret) return ret; } @@ -1058,14 +1044,10 @@ cond_branch: case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: if (BPF_CLASS(code) == BPF_JMP) { - EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg, - src_reg)); + EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, src_reg)); } else { - int tmp_reg = b2p[TMP_REG_1]; - - EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, - 31)); + EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, 0, 0, 31)); } break; case BPF_JMP | BPF_JNE | BPF_K: @@ -1094,14 +1076,12 @@ cond_branch: EMIT(PPC_RAW_CMPLDI(dst_reg, imm)); } else { /* sign-extending load */ - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); /* ... but unsigned comparison */ if (is_jmp32) - EMIT(PPC_RAW_CMPLW(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPLW(dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_CMPLD(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPLD(dst_reg, tmp1_reg)); } break; } @@ -1126,13 +1106,11 @@ cond_branch: else EMIT(PPC_RAW_CMPDI(dst_reg, imm)); } else { - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); if (is_jmp32) - EMIT(PPC_RAW_CMPW(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPW(dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_CMPD(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPD(dst_reg, tmp1_reg)); } break; } @@ -1141,19 +1119,16 @@ cond_branch: /* andi does not sign-extend the immediate */ if (imm >= 0 && imm < 32768) /* PPC_ANDI is _only/always_ dot-form */ - EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm)); + EMIT(PPC_RAW_ANDI(tmp1_reg, dst_reg, imm)); else { - int tmp_reg = b2p[TMP_REG_1]; - - PPC_LI32(tmp_reg, imm); + PPC_LI32(tmp1_reg, imm); if (BPF_CLASS(code) == BPF_JMP) { - EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg, - tmp_reg)); + EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, + tmp1_reg)); } else { - EMIT(PPC_RAW_AND(tmp_reg, dst_reg, - tmp_reg)); - EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, - 0, 0, 31)); + EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, + 0, 0, 31)); } } break; From 49c3af43e65fbcc13860e0cf5fb2507b13e9724c Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com> Date: Mon, 14 Feb 2022 16:11:51 +0530 Subject: [PATCH 151/179] powerpc/bpf: Simplify bpf_to_ppc() and adopt it for powerpc64 Convert bpf_to_ppc() to a macro to help simplify its usage since codegen_context is available in all places it is used. Adopt it also for powerpc64 for uniformity and get rid of the global b2p structure. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/09f0540ce3e0cd4120b5b33993b5e73b6ef9e979.1644834730.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit.h | 11 ++-- arch/powerpc/net/bpf_jit_comp.c | 8 +-- arch/powerpc/net/bpf_jit_comp32.c | 98 +++++++++++++++---------------- arch/powerpc/net/bpf_jit_comp64.c | 93 ++++++++++++++--------------- 4 files changed, 102 insertions(+), 108 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 42a9adda31eb..979701d360da 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -119,12 +119,6 @@ #define SEEN_FUNC 0x20000000 /* might call external helpers */ #define SEEN_TAILCALL 0x40000000 /* uses tail calls */ -#ifdef CONFIG_PPC64 -extern const int b2p[MAX_BPF_JIT_REG + 2]; -#else -extern const int b2p[MAX_BPF_JIT_REG + 1]; -#endif - struct codegen_context { /* * This is used to track register usage as well @@ -138,11 +132,13 @@ struct codegen_context { unsigned int seen; unsigned int idx; unsigned int stack_size; - int b2p[ARRAY_SIZE(b2p)]; + int b2p[MAX_BPF_JIT_REG + 2]; unsigned int exentry_idx; unsigned int alt_exit_addr; }; +#define bpf_to_ppc(r) (ctx->b2p[r]) + #ifdef CONFIG_PPC32 #define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */ #else @@ -170,6 +166,7 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) ctx->seen &= ~(1 << (31 - i)); } +void bpf_jit_init_reg_mapping(struct codegen_context *ctx); int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, int pass); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 635f7448ff79..fc160d33c839 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -72,13 +72,13 @@ static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, tmp_idx = ctx->idx; ctx->idx = addrs[i] / 4; #ifdef CONFIG_PPC32 - PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm); - PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm); + PPC_LI32(bpf_to_ppc(insn[i].dst_reg) - 1, (u32)insn[i + 1].imm); + PPC_LI32(bpf_to_ppc(insn[i].dst_reg), (u32)insn[i].imm); for (j = ctx->idx - addrs[i] / 4; j < 4; j++) EMIT(PPC_RAW_NOP()); #else func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32); - PPC_LI64(b2p[insn[i].dst_reg], func_addr); + PPC_LI64(bpf_to_ppc(insn[i].dst_reg), func_addr); /* overwrite rest with nops */ for (j = ctx->idx - addrs[i] / 4; j < 5; j++) EMIT(PPC_RAW_NOP()); @@ -179,7 +179,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } memset(&cgctx, 0, sizeof(struct codegen_context)); - memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p)); + bpf_jit_init_reg_mapping(&cgctx); /* Make sure that the stack is quadword aligned. */ cgctx.stack_size = round_up(fp->aux->stack_depth, 16); diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 1c86b489232a..e46ed1e8c6ca 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -33,42 +33,38 @@ /* stack frame, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size) -/* BPF register usage */ -#define TMP_REG (MAX_BPF_JIT_REG + 0) - #define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) -/* BPF to ppc register mappings */ -const int b2p[MAX_BPF_JIT_REG + 1] = { - /* function return value */ - [BPF_REG_0] = _R12, - /* function arguments */ - [BPF_REG_1] = _R4, - [BPF_REG_2] = _R6, - [BPF_REG_3] = _R8, - [BPF_REG_4] = _R10, - [BPF_REG_5] = _R22, - /* non volatile registers */ - [BPF_REG_6] = _R24, - [BPF_REG_7] = _R26, - [BPF_REG_8] = _R28, - [BPF_REG_9] = _R30, - /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = _R18, - /* eBPF jit internal registers */ - [BPF_REG_AX] = _R20, - [TMP_REG] = _R31, /* 32 bits */ -}; - -static int bpf_to_ppc(struct codegen_context *ctx, int reg) -{ - return ctx->b2p[reg]; -} - /* PPC NVR range -- update this if we ever use NVRs below r17 */ #define BPF_PPC_NVR_MIN _R17 #define BPF_PPC_TC _R16 +/* BPF register usage */ +#define TMP_REG (MAX_BPF_JIT_REG + 0) + +/* BPF to ppc register mappings */ +void bpf_jit_init_reg_mapping(struct codegen_context *ctx) +{ + /* function return value */ + ctx->b2p[BPF_REG_0] = _R12; + /* function arguments */ + ctx->b2p[BPF_REG_1] = _R4; + ctx->b2p[BPF_REG_2] = _R6; + ctx->b2p[BPF_REG_3] = _R8; + ctx->b2p[BPF_REG_4] = _R10; + ctx->b2p[BPF_REG_5] = _R22; + /* non volatile registers */ + ctx->b2p[BPF_REG_6] = _R24; + ctx->b2p[BPF_REG_7] = _R26; + ctx->b2p[BPF_REG_8] = _R28; + ctx->b2p[BPF_REG_9] = _R30; + /* frame pointer aka BPF_REG_10 */ + ctx->b2p[BPF_REG_FP] = _R18; + /* eBPF jit internal registers */ + ctx->b2p[BPF_REG_AX] = _R20; + ctx->b2p[TMP_REG] = _R31; /* 32 bits */ +} + static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) { if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC) @@ -118,8 +114,8 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) int i; /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), _R3)); - EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); /* @@ -128,7 +124,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, _R1, + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); else EMIT(PPC_RAW_NOP()); @@ -150,15 +146,15 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i))); /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) { + EMIT(PPC_RAW_LWZ(bpf_to_ppc(BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); } /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { - EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); - EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), _R1, + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) { + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_FP) - 1, 0)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -178,7 +174,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_0))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); bpf_jit_emit_common_epilogue(image, ctx); @@ -223,8 +219,8 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * r5-r6/BPF_REG_2 - pointer to bpf_array * r7-r8/BPF_REG_3 - index in bpf_array */ - int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2); - int b2p_index = bpf_to_ppc(ctx, BPF_REG_3); + int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); + int b2p_index = bpf_to_ppc(BPF_REG_3); /* * if (index >= array->map.max_entries) @@ -270,7 +266,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_MTCTR(_R3)); - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_1))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1))); /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -294,11 +290,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * for (i = 0; i < flen; i++) { u32 code = insn[i].code; - u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg); + u32 dst_reg = bpf_to_ppc(insn[i].dst_reg); u32 dst_reg_h = dst_reg - 1; - u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); + u32 src_reg = bpf_to_ppc(insn[i].src_reg); u32 src_reg_h = src_reg - 1; - u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + u32 tmp_reg = bpf_to_ppc(TMP_REG); u32 size = BPF_SIZE(code); s16 off = insn[i].off; s32 imm = insn[i].imm; @@ -960,17 +956,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (ret < 0) return ret; - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, 8)); - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), _R1, 12)); + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) { + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5) - 1, _R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5), _R1, 12)); } ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); if (ret) return ret; - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, _R3)); - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), _R4)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0) - 1, _R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R4)); break; /* diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index b4de0c35c8a4..585f257da045 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -46,27 +46,28 @@ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* BPF to ppc register mappings */ -const int b2p[MAX_BPF_JIT_REG + 2] = { +void bpf_jit_init_reg_mapping(struct codegen_context *ctx) +{ /* function return value */ - [BPF_REG_0] = _R8, + ctx->b2p[BPF_REG_0] = _R8; /* function arguments */ - [BPF_REG_1] = _R3, - [BPF_REG_2] = _R4, - [BPF_REG_3] = _R5, - [BPF_REG_4] = _R6, - [BPF_REG_5] = _R7, + ctx->b2p[BPF_REG_1] = _R3; + ctx->b2p[BPF_REG_2] = _R4; + ctx->b2p[BPF_REG_3] = _R5; + ctx->b2p[BPF_REG_4] = _R6; + ctx->b2p[BPF_REG_5] = _R7; /* non volatile registers */ - [BPF_REG_6] = _R27, - [BPF_REG_7] = _R28, - [BPF_REG_8] = _R29, - [BPF_REG_9] = _R30, + ctx->b2p[BPF_REG_6] = _R27; + ctx->b2p[BPF_REG_7] = _R28; + ctx->b2p[BPF_REG_8] = _R29; + ctx->b2p[BPF_REG_9] = _R30; /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = _R31, + ctx->b2p[BPF_REG_FP] = _R31; /* eBPF jit internal registers */ - [BPF_REG_AX] = _R12, - [TMP_REG_1] = _R9, - [TMP_REG_2] = _R10 -}; + ctx->b2p[BPF_REG_AX] = _R12; + ctx->b2p[TMP_REG_1] = _R9; + ctx->b2p[TMP_REG_2] = _R10; +} /* PPC NVR range -- update this if we ever use NVRs below r27 */ #define BPF_PPC_NVR_MIN _R27 @@ -79,7 +80,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * - the bpf program uses its stack area * The latter condition is deduced from the usage of BPF_REG_FP */ - return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]); + return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)); } /* @@ -134,9 +135,9 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0)); /* this goes in the redzone */ - EMIT(PPC_RAW_STD(b2p[TMP_REG_1], _R1, -(BPF_PPC_STACK_SAVE + 8))); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8))); } else { EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); @@ -161,12 +162,12 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * in the protected zone below the previous stack frame */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, b2p[i])) - EMIT(PPC_RAW_STD(b2p[i], _R1, bpf_jit_stack_offsetof(ctx, b2p[i]))); + if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) + EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) - EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], _R1, + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -176,8 +177,8 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, b2p[i])) - EMIT(PPC_RAW_LD(b2p[i], _R1, bpf_jit_stack_offsetof(ctx, b2p[i]))); + if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) + EMIT(PPC_RAW_LD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { @@ -194,7 +195,7 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); /* Move result to r3 */ - EMIT(PPC_RAW_MR(_R3, b2p[BPF_REG_0])); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); EMIT(PPC_RAW_BLR()); } @@ -261,8 +262,8 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * r4/BPF_REG_2 - pointer to bpf_array * r5/BPF_REG_3 - index in bpf_array */ - int b2p_bpf_array = b2p[BPF_REG_2]; - int b2p_index = b2p[BPF_REG_3]; + int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); + int b2p_index = bpf_to_ppc(BPF_REG_3); int bpf_tailcall_prologue_size = 8; if (__is_defined(PPC64_ELF_ABI_v2)) @@ -272,42 +273,42 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * if (index >= array->map.max_entries) * goto out; */ - EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(TMP_REG_1), b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); - EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(TMP_REG_1))); PPC_BCC_SHORT(COND_GE, out); /* * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; */ - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], _R1, bpf_jit_stack_tailcallcnt(ctx))); - EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); + EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT)); PPC_BCC_SHORT(COND_GE, out); /* * tail_call_cnt++; */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); - EMIT(PPC_RAW_STD(b2p[TMP_REG_1], _R1, bpf_jit_stack_tailcallcnt(ctx))); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1)); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); /* prog = array->ptrs[index]; */ - EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); - EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs))); + EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8)); + EMIT(PPC_RAW_ADD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), b2p_bpf_array)); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_array, ptrs))); /* * if (prog == NULL) * goto out; */ - EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), 0)); PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func))); - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); - EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); + EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1))); /* tear down stack, restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -354,11 +355,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * for (i = 0; i < flen; i++) { u32 code = insn[i].code; - u32 dst_reg = b2p[insn[i].dst_reg]; - u32 src_reg = b2p[insn[i].src_reg]; + u32 dst_reg = bpf_to_ppc(insn[i].dst_reg); + u32 src_reg = bpf_to_ppc(insn[i].src_reg); u32 size = BPF_SIZE(code); - u32 tmp1_reg = b2p[TMP_REG_1]; - u32 tmp2_reg = b2p[TMP_REG_2]; + u32 tmp1_reg = bpf_to_ppc(TMP_REG_1); + u32 tmp2_reg = bpf_to_ppc(TMP_REG_2); s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -938,7 +939,7 @@ emit_clear: return ret; /* move return value from r3 to BPF_REG_0 */ - EMIT(PPC_RAW_MR(b2p[BPF_REG_0], _R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R3)); break; /* From 0f54bddefe7f5e4c98bea6f945ebdf85d1c44117 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar <ganeshgr@linux.ibm.com> Date: Fri, 7 Jan 2022 19:44:26 +0530 Subject: [PATCH 152/179] powerpc/pseries: Parse control memory access error Add support to parse and log control memory access error for pseries. These changes are made according to PAPR v2.11 10.3.2.2.12. Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220107141428.67862-1-ganeshgr@linux.ibm.com --- arch/powerpc/platforms/pseries/ras.c | 36 ++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 74c9b1b5bc66..2a158e828c99 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -60,11 +60,17 @@ struct pseries_mc_errorlog { * XX 2: Reserved. * XXX 3: Type of UE error. * - * For error_type != MC_ERROR_TYPE_UE + * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB * XXXXXXXX * X 1: Effective address provided. * XXXXX 5: Reserved. * XX 2: Type of SLB/ERAT/TLB error. + * + * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS + * XXXXXXXX + * X 1: Error causing address provided. + * XXX 3: Type of error. + * XXXX 4: Reserved. */ u8 sub_err_type; u8 reserved_1[6]; @@ -80,6 +86,7 @@ struct pseries_mc_errorlog { #define MC_ERROR_TYPE_TLB 0x04 #define MC_ERROR_TYPE_D_CACHE 0x05 #define MC_ERROR_TYPE_I_CACHE 0x07 +#define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08 /* RTAS pseries MCE error sub types */ #define MC_ERROR_UE_INDETERMINATE 0 @@ -90,6 +97,7 @@ struct pseries_mc_errorlog { #define UE_EFFECTIVE_ADDR_PROVIDED 0x40 #define UE_LOGICAL_ADDR_PROVIDED 0x20 +#define MC_EFFECTIVE_ADDR_PROVIDED 0x80 #define MC_ERROR_SLB_PARITY 0 #define MC_ERROR_SLB_MULTIHIT 1 @@ -103,6 +111,9 @@ struct pseries_mc_errorlog { #define MC_ERROR_TLB_MULTIHIT 2 #define MC_ERROR_TLB_INDETERMINATE 3 +#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0 +#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1 + static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) { switch (mlog->error_type) { @@ -112,6 +123,8 @@ static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) case MC_ERROR_TYPE_ERAT: case MC_ERROR_TYPE_TLB: return (mlog->sub_err_type & 0x03); + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + return (mlog->sub_err_type & 0x70) >> 4; default: return 0; } @@ -658,7 +671,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_ERAT: @@ -675,7 +688,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_TLB: @@ -692,7 +705,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE; break; } - if (mce_log->sub_err_type & 0x80) + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) eaddr = be64_to_cpu(mce_log->effective_address); break; case MC_ERROR_TYPE_D_CACHE: @@ -701,6 +714,21 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, case MC_ERROR_TYPE_I_CACHE: mce_err.error_type = MCE_ERROR_TYPE_ICACHE; break; + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + mce_err.error_type = MCE_ERROR_TYPE_RA; + switch (err_sub_type) { + case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK: + mce_err.u.ra_error_type = + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; + break; + case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS: + mce_err.u.ra_error_type = + MCE_RA_ERROR_LOAD_STORE_FOREIGN; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + break; case MC_ERROR_TYPE_UNKNOWN: default: mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; From 0f4ef8a3bf784f250abc7d0155ae4e9fa22d8210 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar <ganeshgr@linux.ibm.com> Date: Fri, 7 Jan 2022 19:44:27 +0530 Subject: [PATCH 153/179] selftests/powerpc: Add test for real address error handling Add test for real address or control memory address access error handling, using NX-GZIP engine. The error is injected by accessing the control memory address using illegal instruction, on successful handling the process attempting to access control memory address using illegal instruction receives SIGBUS. Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220107141428.67862-2-ganeshgr@linux.ibm.com --- tools/testing/selftests/powerpc/Makefile | 3 +- tools/testing/selftests/powerpc/mce/Makefile | 7 ++ .../selftests/powerpc/mce/inject-ra-err.c | 65 +++++++++++++++++++ tools/testing/selftests/powerpc/mce/vas-api.h | 1 + 4 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/mce/Makefile create mode 100644 tools/testing/selftests/powerpc/mce/inject-ra-err.c create mode 120000 tools/testing/selftests/powerpc/mce/vas-api.h diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 0830e63818c1..4830372d7416 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -31,7 +31,8 @@ SUB_DIRS = alignment \ vphn \ math \ ptrace \ - security + security \ + mce endif diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile new file mode 100644 index 000000000000..2424513982d9 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/Makefile @@ -0,0 +1,7 @@ +#SPDX-License-Identifier: GPL-2.0-or-later + +TEST_GEN_PROGS := inject-ra-err + +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/mce/inject-ra-err.c b/tools/testing/selftests/powerpc/mce/inject-ra-err.c new file mode 100644 index 000000000000..94323c34d9a6 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/inject-ra-err.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "vas-api.h" +#include "utils.h" + +static bool faulted; + +static void sigbus_handler(int n, siginfo_t *info, void *ctxt_v) +{ + ucontext_t *ctxt = (ucontext_t *)ctxt_v; + struct pt_regs *regs = ctxt->uc_mcontext.regs; + + faulted = true; + regs->nip += 4; +} + +static int test_ra_error(void) +{ + struct vas_tx_win_open_attr attr; + int fd, *paste_addr; + char *devname = "/dev/crypto/nx-gzip"; + struct sigaction act = { + .sa_sigaction = sigbus_handler, + .sa_flags = SA_SIGINFO, + }; + + memset(&attr, 0, sizeof(attr)); + attr.version = 1; + attr.vas_id = 0; + + SKIP_IF(access(devname, F_OK)); + + fd = open(devname, O_RDWR); + FAIL_IF(fd < 0); + FAIL_IF(ioctl(fd, VAS_TX_WIN_OPEN, &attr) < 0); + FAIL_IF(sigaction(SIGBUS, &act, NULL) != 0); + + paste_addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0ULL); + + /* The following assignment triggers exception */ + mb(); + *paste_addr = 1; + mb(); + + FAIL_IF(!faulted); + + return 0; +} + +int main(void) +{ + return test_harness(test_ra_error, "inject-ra-err"); +} + diff --git a/tools/testing/selftests/powerpc/mce/vas-api.h b/tools/testing/selftests/powerpc/mce/vas-api.h new file mode 120000 index 000000000000..1455c1bcd351 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/vas-api.h @@ -0,0 +1 @@ +../../../../../arch/powerpc/include/uapi/asm/vas-api.h \ No newline at end of file From 0a182611d149b5b747014fbb230ec35b20a45c86 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar <ganeshgr@linux.ibm.com> Date: Fri, 7 Jan 2022 19:44:28 +0530 Subject: [PATCH 154/179] powerpc/mce: Modify the real address error logging messages To avoid ambiguity, modify the strings in real address error logging messages to "foreign/control memory" from "foreign", Since the error discriptions in P9 user manual and P10 user manual are different for same type of errors. P9 User Manual for MCE: DSISR:59 Host real address to foreign space during translation. DSISR:60 Host real address to foreign space on a load or store access. P10 User Manual for MCE: DSISR:59 D-side tablewalk used a host real address in the control memory address range. DSISR:60 D-side operand access to control memory address space. Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220107141428.67862-3-ganeshgr@linux.ibm.com --- arch/powerpc/kernel/mce.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 2503dd4713b9..811c867ad6c6 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -401,14 +401,14 @@ void machine_check_print_event_info(struct machine_check_event *evt, static const char *mc_ra_types[] = { "Indeterminate", "Instruction fetch (bad)", - "Instruction fetch (foreign)", + "Instruction fetch (foreign/control memory)", "Page table walk ifetch (bad)", - "Page table walk ifetch (foreign)", + "Page table walk ifetch (foreign/control memory)", "Load (bad)", "Store (bad)", "Page table walk Load/Store (bad)", - "Page table walk Load/Store (foreign)", - "Load/Store (foreign)", + "Page table walk Load/Store (foreign/control memory)", + "Load/Store (foreign/control memory)", }; static const char *mc_link_types[] = { "Indeterminate", From cc15ff3275694fedc33cd3d53212a43eec7aa0bc Mon Sep 17 00:00:00 2001 From: Ganesh Goudar <ganeshgr@linux.ibm.com> Date: Thu, 20 Jan 2022 17:49:31 +0530 Subject: [PATCH 155/179] powerpc/mce: Avoid using irq_work_queue() in realmode In realmode mce handler we use irq_work_queue() to defer the processing of mce events, irq_work_queue() can only be called when translation is enabled because it touches memory outside RMA, hence we enable translation before calling irq_work_queue and disable on return, though it is not safe to do in realmode. To avoid this, program the decrementer and call the event processing functions from timer handler. Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220120121931.517974-1-ganeshgr@linux.ibm.com --- arch/powerpc/include/asm/machdep.h | 2 + arch/powerpc/include/asm/mce.h | 13 +++++ arch/powerpc/include/asm/paca.h | 1 + arch/powerpc/kernel/mce.c | 60 +++++++++++++----------- arch/powerpc/kernel/time.c | 2 + arch/powerpc/platforms/pseries/pseries.h | 1 + arch/powerpc/platforms/pseries/ras.c | 32 +------------ arch/powerpc/platforms/pseries/setup.c | 1 + 8 files changed, 53 insertions(+), 59 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 06ac7ef07c85..358d171ae8e0 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -94,6 +94,8 @@ struct machdep_calls { /* Called during machine check exception to retrive fixup address. */ bool (*mce_check_early_recovery)(struct pt_regs *regs); + void (*machine_check_log_err)(void); + /* Motherboard/chipset features. This is a kind of general purpose * hook used to control some machine specific features (like reset * lines, chip power control, etc...). diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 331d944280b8..c9f0936bd3c9 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -235,8 +235,21 @@ extern void machine_check_print_event_info(struct machine_check_event *evt, unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); extern void mce_common_process_ue(struct pt_regs *regs, struct mce_error_info *mce_err); +void mce_irq_work_queue(void); int mce_register_notifier(struct notifier_block *nb); int mce_unregister_notifier(struct notifier_block *nb); + +#ifdef CONFIG_PPC_BOOK3S_64 +void mce_run_irq_context_handlers(void); +#else +static inline void mce_run_irq_context_handlers(void) { }; +#endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC_BOOK3S_64 +void set_mce_pending_irq_work(void); +void clear_mce_pending_irq_work(void); +#endif /* CONFIG_PPC_BOOK3S_64 */ + #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); void flush_erat(void); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 295573a82c66..8330968ca346 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -288,6 +288,7 @@ struct paca_struct { #endif #ifdef CONFIG_PPC_BOOK3S_64 struct mce_info *mce_info; + u8 mce_pending_irq_work; #endif /* CONFIG_PPC_BOOK3S_64 */ } ____cacheline_aligned; diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 811c867ad6c6..671d727d06f7 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -28,19 +28,9 @@ #include "setup.h" -static void machine_check_process_queued_event(struct irq_work *work); -static void machine_check_ue_irq_work(struct irq_work *work); static void machine_check_ue_event(struct machine_check_event *evt); static void machine_process_ue_event(struct work_struct *work); -static struct irq_work mce_event_process_work = { - .func = machine_check_process_queued_event, -}; - -static struct irq_work mce_ue_event_irq_work = { - .func = machine_check_ue_irq_work, -}; - static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); @@ -89,6 +79,13 @@ static void mce_set_error_info(struct machine_check_event *mce, } } +void mce_irq_work_queue(void) +{ + /* Raise decrementer interrupt */ + arch_irq_work_raise(); + set_mce_pending_irq_work(); +} + /* * Decode and save high level MCE information into per cpu buffer which * is an array of machine_check_event structure. @@ -217,7 +214,7 @@ void release_mce_event(void) get_mce_event(NULL, true); } -static void machine_check_ue_irq_work(struct irq_work *work) +static void machine_check_ue_work(void) { schedule_work(&mce_ue_event_work); } @@ -239,7 +236,7 @@ static void machine_check_ue_event(struct machine_check_event *evt) evt, sizeof(*evt)); /* Queue work to process this event later. */ - irq_work_queue(&mce_ue_event_irq_work); + mce_irq_work_queue(); } /* @@ -249,7 +246,6 @@ void machine_check_queue_event(void) { int index; struct machine_check_event evt; - unsigned long msr; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; @@ -263,20 +259,7 @@ void machine_check_queue_event(void) memcpy(&local_paca->mce_info->mce_event_queue[index], &evt, sizeof(evt)); - /* - * Queue irq work to process this event later. Before - * queuing the work enable translation for non radix LPAR, - * as irq_work_queue may try to access memory outside RMO - * region. - */ - if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) { - msr = mfmsr(); - mtmsr(msr | MSR_IR | MSR_DR); - irq_work_queue(&mce_event_process_work); - mtmsr(msr); - } else { - irq_work_queue(&mce_event_process_work); - } + mce_irq_work_queue(); } void mce_common_process_ue(struct pt_regs *regs, @@ -338,7 +321,7 @@ static void machine_process_ue_event(struct work_struct *work) * process pending MCE event from the mce event queue. This function will be * called during syscall exit. */ -static void machine_check_process_queued_event(struct irq_work *work) +static void machine_check_process_queued_event(void) { int index; struct machine_check_event *evt; @@ -363,6 +346,27 @@ static void machine_check_process_queued_event(struct irq_work *work) } } +void set_mce_pending_irq_work(void) +{ + local_paca->mce_pending_irq_work = 1; +} + +void clear_mce_pending_irq_work(void) +{ + local_paca->mce_pending_irq_work = 0; +} + +void mce_run_irq_context_handlers(void) +{ + if (unlikely(local_paca->mce_pending_irq_work)) { + if (ppc_md.machine_check_log_err) + ppc_md.machine_check_log_err(); + machine_check_process_queued_event(); + machine_check_ue_work(); + clear_mce_pending_irq_work(); + } +} + void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cd0b8b71ecdd..17598bba54eb 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -70,6 +70,7 @@ #include <asm/vdso_datapage.h> #include <asm/firmware.h> #include <asm/asm-prototypes.h> +#include <asm/mce.h> /* powerpc clocksource/clockevent code */ @@ -638,6 +639,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) if (test_irq_work_pending()) { clear_irq_work_pending(); + mce_run_irq_context_handlers(); irq_work_run(); } diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 56c9ef9052e9..af162aeeae86 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -21,6 +21,7 @@ struct pt_regs; extern int pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); extern long pseries_machine_check_realmode(struct pt_regs *regs); +void pSeries_machine_check_log_err(void); #ifdef CONFIG_SMP extern void smp_init_pseries(void); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 2a158e828c99..f12516c3998c 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -23,11 +23,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock); static int ras_check_exception_token; -static void mce_process_errlog_event(struct irq_work *work); -static struct irq_work mce_errlog_process_work = { - .func = mce_process_errlog_event, -}; - #define EPOW_SENSOR_TOKEN 9 #define EPOW_SENSOR_INDEX 0 @@ -745,7 +740,6 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) struct pseries_errorlog *pseries_log; struct pseries_mc_errorlog *mce_log = NULL; int disposition = rtas_error_disposition(errp); - unsigned long msr; u8 error_type; if (!rtas_error_extended(errp)) @@ -759,40 +753,16 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) error_type = mce_log->error_type; disposition = mce_handle_err_realmode(disposition, error_type); - - /* - * Enable translation as we will be accessing per-cpu variables - * in save_mce_event() which may fall outside RMO region, also - * leave it enabled because subsequently we will be queuing work - * to workqueues where again per-cpu variables accessed, besides - * fwnmi_release_errinfo() crashes when called in realmode on - * pseries. - * Note: All the realmode handling like flushing SLB entries for - * SLB multihit is done by now. - */ out: - msr = mfmsr(); - mtmsr(msr | MSR_IR | MSR_DR); - disposition = mce_handle_err_virtmode(regs, errp, mce_log, disposition); - - /* - * Queue irq work to log this rtas event later. - * irq_work_queue uses per-cpu variables, so do this in virt - * mode as well. - */ - irq_work_queue(&mce_errlog_process_work); - - mtmsr(msr); - return disposition; } /* * Process MCE rtas errlog event. */ -static void mce_process_errlog_event(struct irq_work *work) +void pSeries_machine_check_log_err(void) { struct rtas_error_log *err; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 83a04d967a59..069d7b3bb142 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -1086,6 +1086,7 @@ define_machine(pseries) { .system_reset_exception = pSeries_system_reset_exception, .machine_check_early = pseries_machine_check_realmode, .machine_check_exception = pSeries_machine_check_exception, + .machine_check_log_err = pSeries_machine_check_log_err, #ifdef CONFIG_KEXEC_CORE .machine_kexec = pSeries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, From 3c14b73454cf9f6e2146443fdfbdfb912c0efed3 Mon Sep 17 00:00:00 2001 From: "Pratik R. Sampat" <psampat@linux.ibm.com> Date: Thu, 17 Feb 2022 16:23:20 +0530 Subject: [PATCH 156/179] powerpc/pseries: Interface to represent PAPR firmware attributes Adds a syscall interface to represent the energy and frequency related PAPR attributes on the system using the new H_CALL "H_GET_ENERGY_SCALE_INFO". H_GET_EM_PARMS H_CALL was previously responsible for exporting this information in the lparcfg, however the H_GET_EM_PARMS H_CALL will be deprecated P10 onwards. The H_GET_ENERGY_SCALE_INFO H_CALL is of the following call format: hcall( uint64 H_GET_ENERGY_SCALE_INFO, // Get energy scale info uint64 flags, // Per the flag request uint64 firstAttributeId,// The attribute id uint64 bufferAddress, // Guest physical address of the output buffer uint64 bufferSize // The size in bytes of the output buffer ); As specified in PAPR+ v2.11, section 14.14.3. This H_CALL can query either all the attributes at once with firstAttributeId = 0, flags = 0 as well as query only one attribute at a time with firstAttributeId = id, flags = 1. The output buffer consists of the following 1. number of attributes - 8 bytes 2. array offset to the data location - 8 bytes 3. version info - 1 byte 4. A data array of size num attributes, which contains the following: a. attribute ID - 8 bytes b. attribute value in number - 8 bytes c. attribute name in string - 64 bytes d. attribute value in string - 64 bytes The new H_CALL exports information in direct string value format, hence a new interface has been introduced in /sys/firmware/papr/energy_scale_info to export this information to userspace so that the firmware can add new values without the need for the kernel to be changed. The H_CALL returns the name, numeric value and string value (if exists) The format of exposing the sysfs information is as follows: /sys/firmware/papr/energy_scale_info/ |-- <id>/ |-- desc |-- value |-- value_desc (if exists) |-- <id>/ |-- desc |-- value |-- value_desc (if exists) ... The energy information that is exported is useful for userspace tools such as powerpc-utils. Currently these tools infer the "power_mode_data" value in the lparcfg, which in turn is obtained from the to be deprecated H_GET_EM_PARMS H_CALL. On future platforms, such userspace utilities will have to look at the data returned from the new H_CALL being populated in this new sysfs interface and report this information directly without the need of interpretation. Signed-off-by: Pratik R. Sampat <psampat@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220217105321.52941-2-psampat@linux.ibm.com --- .../sysfs-firmware-papr-energy-scale-info | 29 ++ arch/powerpc/include/asm/firmware.h | 4 +- arch/powerpc/include/asm/hvcall.h | 3 +- arch/powerpc/kvm/trace_hv.h | 1 + arch/powerpc/platforms/pseries/Makefile | 3 +- arch/powerpc/platforms/pseries/firmware.c | 1 + .../pseries/papr_platform_attributes.c | 361 ++++++++++++++++++ 7 files changed, 399 insertions(+), 3 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info create mode 100644 arch/powerpc/platforms/pseries/papr_platform_attributes.c diff --git a/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info b/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info new file mode 100644 index 000000000000..141a6b371469 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-papr-energy-scale-info @@ -0,0 +1,29 @@ +What: /sys/firmware/papr/energy_scale_info +Date: February 2022 +Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org> +Description: Directory hosting a set of platform attributes like + energy/frequency on Linux running as a PAPR guest. + + Each file in a directory contains a platform + attribute hierarchy pertaining to performance/ + energy-savings mode and processor frequency. + +What: /sys/firmware/papr/energy_scale_info/<id> +Date: February 2022 +Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org> +Description: Energy, frequency attributes directory for POWERVM servers + +What: /sys/firmware/papr/energy_scale_info/<id>/desc +Date: February 2022 +Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org> +Description: String description of the energy attribute of <id> + +What: /sys/firmware/papr/energy_scale_info/<id>/value +Date: February 2022 +Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org> +Description: Numeric value of the energy attribute of <id> + +What: /sys/firmware/papr/energy_scale_info/<id>/value_desc +Date: February 2022 +Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org> +Description: String value of the energy attribute of <id> diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 9b702d2b80fb..8dddd34b8ecf 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -54,6 +54,7 @@ #define FW_FEATURE_STUFF_TCE ASM_CONST(0x0000008000000000) #define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000) #define FW_FEATURE_FORM2_AFFINITY ASM_CONST(0x0000020000000000) +#define FW_FEATURE_ENERGY_SCALE_INFO ASM_CONST(0x0000040000000000) #ifndef __ASSEMBLY__ @@ -74,7 +75,8 @@ enum { FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR | - FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY, + FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY | + FW_FEATURE_ENERGY_SCALE_INFO, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR, FW_FEATURE_POWERNV_ALWAYS = 0, diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 9bcf345cb208..48f510ba9f4a 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -323,7 +323,8 @@ #define H_SCM_PERFORMANCE_STATS 0x418 #define H_RPT_INVALIDATE 0x448 #define H_SCM_FLUSH 0x44C -#define MAX_HCALL_OPCODE H_SCM_FLUSH +#define H_GET_ENERGY_SCALE_INFO 0x450 +#define MAX_HCALL_OPCODE H_GET_ENERGY_SCALE_INFO /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 830a126e095d..38cd0ed0a617 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -115,6 +115,7 @@ {H_VASI_STATE, "H_VASI_STATE"}, \ {H_ENABLE_CRQ, "H_ENABLE_CRQ"}, \ {H_GET_EM_PARMS, "H_GET_EM_PARMS"}, \ + {H_GET_ENERGY_SCALE_INFO, "H_GET_ENERGY_SCALE_INFO"}, \ {H_SET_MPP, "H_SET_MPP"}, \ {H_GET_MPP, "H_GET_MPP"}, \ {H_HOME_NODE_ASSOCIATIVITY, "H_HOME_NODE_ASSOCIATIVITY"}, \ diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 29b522d2c755..9764e1a2ed5c 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -6,7 +6,8 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ of_helpers.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o rng.o \ - pci.o pci_dlpar.o eeh_pseries.o msi.o + pci.o pci_dlpar.o eeh_pseries.o msi.o \ + papr_platform_attributes.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KEXEC_CORE) += kexec.o obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index f162156b7b68..09c119b2f623 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -66,6 +66,7 @@ hypertas_fw_features_table[] = { {FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"}, {FW_FEATURE_PAPR_SCM, "hcall-scm"}, {FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"}, + {FW_FEATURE_ENERGY_SCALE_INFO, "hcall-energy-scale-info"}, }; /* Build up the firmware features bitmask using the contents of diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c new file mode 100644 index 000000000000..515150417bb3 --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Platform energy and frequency attributes driver + * + * This driver creates a sys file at /sys/firmware/papr/ which encapsulates a + * directory structure containing files in keyword - value pairs that specify + * energy and frequency configuration of the system. + * + * The format of exposing the sysfs information is as follows: + * /sys/firmware/papr/energy_scale_info/ + * |-- <id>/ + * |-- desc + * |-- value + * |-- value_desc (if exists) + * |-- <id>/ + * |-- desc + * |-- value + * |-- value_desc (if exists) + * + * Copyright 2022 IBM Corp. + */ + +#include <asm/hvcall.h> +#include <asm/machdep.h> + +#include "pseries.h" + +/* + * Flag attributes to fetch either all or one attribute from the HCALL + * flag = BE(0) => fetch all attributes with firstAttributeId = 0 + * flag = BE(1) => fetch a single attribute with firstAttributeId = id + */ +#define ESI_FLAGS_ALL 0 +#define ESI_FLAGS_SINGLE (1ull << 63) + +#define KOBJ_MAX_ATTRS 3 + +#define ESI_HDR_SIZE sizeof(struct h_energy_scale_info_hdr) +#define ESI_ATTR_SIZE sizeof(struct energy_scale_attribute) +#define CURR_MAX_ESI_ATTRS 8 + +struct energy_scale_attribute { + __be64 id; + __be64 val; + u8 desc[64]; + u8 value_desc[64]; +} __packed; + +struct h_energy_scale_info_hdr { + __be64 num_attrs; + __be64 array_offset; + u8 data_header_version; +} __packed; + +struct papr_attr { + u64 id; + struct kobj_attribute kobj_attr; +}; + +struct papr_group { + struct attribute_group pg; + struct papr_attr pgattrs[KOBJ_MAX_ATTRS]; +}; + +static struct papr_group *papr_groups; +/* /sys/firmware/papr */ +static struct kobject *papr_kobj; +/* /sys/firmware/papr/energy_scale_info */ +static struct kobject *esi_kobj; + +/* + * Energy modes can change dynamically hence making a new hcall each time the + * information needs to be retrieved + */ +static int papr_get_attr(u64 id, struct energy_scale_attribute *esi) +{ + int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE); + int ret, max_esi_attrs = CURR_MAX_ESI_ATTRS; + struct energy_scale_attribute *curr_esi; + struct h_energy_scale_info_hdr *hdr; + char *buf; + + buf = kmalloc(esi_buf_size, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + +retry: + ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_SINGLE, + id, virt_to_phys(buf), + esi_buf_size); + + /* + * If the hcall fails with not enough memory for either the + * header or data, attempt to allocate more + */ + if (ret == H_PARTIAL || ret == H_P4) { + char *temp_buf; + + max_esi_attrs += 4; + esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); + + temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL); + if (temp_buf) + buf = temp_buf; + else + return -ENOMEM; + + goto retry; + } + + if (ret != H_SUCCESS) { + pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO"); + ret = -EIO; + goto out_buf; + } + + hdr = (struct h_energy_scale_info_hdr *) buf; + curr_esi = (struct energy_scale_attribute *) + (buf + be64_to_cpu(hdr->array_offset)); + + if (esi_buf_size < + be64_to_cpu(hdr->array_offset) + (be64_to_cpu(hdr->num_attrs) + * sizeof(struct energy_scale_attribute))) { + ret = -EIO; + goto out_buf; + } + + *esi = *curr_esi; + +out_buf: + kfree(buf); + + return ret; +} + +/* + * Extract and export the description of the energy scale attributes + */ +static ssize_t desc_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%s\n", esi.desc); +} + +/* + * Extract and export the numeric value of the energy scale attributes + */ +static ssize_t val_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%llu\n", be64_to_cpu(esi.val)); +} + +/* + * Extract and export the value description in string format of the energy + * scale attributes + */ +static ssize_t val_desc_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%s\n", esi.value_desc); +} + +static struct papr_ops_info { + const char *attr_name; + ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *kobj_attr, + char *buf); +} ops_info[KOBJ_MAX_ATTRS] = { + { "desc", desc_show }, + { "value", val_show }, + { "value_desc", val_desc_show }, +}; + +static void add_attr(u64 id, int index, struct papr_attr *attr) +{ + attr->id = id; + sysfs_attr_init(&attr->kobj_attr.attr); + attr->kobj_attr.attr.name = ops_info[index].attr_name; + attr->kobj_attr.attr.mode = 0444; + attr->kobj_attr.show = ops_info[index].show; +} + +static int add_attr_group(u64 id, struct papr_group *pg, bool show_val_desc) +{ + int i; + + for (i = 0; i < KOBJ_MAX_ATTRS; i++) { + if (!strcmp(ops_info[i].attr_name, "value_desc") && + !show_val_desc) { + continue; + } + add_attr(id, i, &pg->pgattrs[i]); + pg->pg.attrs[i] = &pg->pgattrs[i].kobj_attr.attr; + } + + return sysfs_create_group(esi_kobj, &pg->pg); +} + + +static int __init papr_init(void) +{ + int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE); + int ret, idx, i, max_esi_attrs = CURR_MAX_ESI_ATTRS; + struct h_energy_scale_info_hdr *esi_hdr; + struct energy_scale_attribute *esi_attrs; + uint64_t num_attrs; + char *esi_buf; + + if (!firmware_has_feature(FW_FEATURE_LPAR) || + !firmware_has_feature(FW_FEATURE_ENERGY_SCALE_INFO)) { + return -ENXIO; + } + + esi_buf = kmalloc(esi_buf_size, GFP_KERNEL); + if (esi_buf == NULL) + return -ENOMEM; + /* + * hcall( + * uint64 H_GET_ENERGY_SCALE_INFO, // Get energy scale info + * uint64 flags, // Per the flag request + * uint64 firstAttributeId, // The attribute id + * uint64 bufferAddress, // Guest physical address of the output buffer + * uint64 bufferSize); // The size in bytes of the output buffer + */ +retry: + + ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_ALL, 0, + virt_to_phys(esi_buf), esi_buf_size); + + /* + * If the hcall fails with not enough memory for either the + * header or data, attempt to allocate more + */ + if (ret == H_PARTIAL || ret == H_P4) { + char *temp_esi_buf; + + max_esi_attrs += 4; + esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); + + temp_esi_buf = krealloc(esi_buf, esi_buf_size, GFP_KERNEL); + if (temp_esi_buf) + esi_buf = temp_esi_buf; + else + return -ENOMEM; + + goto retry; + } + + if (ret != H_SUCCESS) { + pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO, ret: %d\n", ret); + goto out_free_esi_buf; + } + + esi_hdr = (struct h_energy_scale_info_hdr *) esi_buf; + num_attrs = be64_to_cpu(esi_hdr->num_attrs); + esi_attrs = (struct energy_scale_attribute *) + (esi_buf + be64_to_cpu(esi_hdr->array_offset)); + + if (esi_buf_size < + be64_to_cpu(esi_hdr->array_offset) + + (num_attrs * sizeof(struct energy_scale_attribute))) { + goto out_free_esi_buf; + } + + papr_groups = kcalloc(num_attrs, sizeof(*papr_groups), GFP_KERNEL); + if (!papr_groups) + goto out_free_esi_buf; + + papr_kobj = kobject_create_and_add("papr", firmware_kobj); + if (!papr_kobj) { + pr_warn("kobject_create_and_add papr failed\n"); + goto out_papr_groups; + } + + esi_kobj = kobject_create_and_add("energy_scale_info", papr_kobj); + if (!esi_kobj) { + pr_warn("kobject_create_and_add energy_scale_info failed\n"); + goto out_kobj; + } + + /* Allocate the groups before registering */ + for (idx = 0; idx < num_attrs; idx++) { + papr_groups[idx].pg.attrs = kcalloc(KOBJ_MAX_ATTRS + 1, + sizeof(*papr_groups[idx].pg.attrs), + GFP_KERNEL); + if (!papr_groups[idx].pg.attrs) + goto out_pgattrs; + + papr_groups[idx].pg.name = kasprintf(GFP_KERNEL, "%lld", + be64_to_cpu(esi_attrs[idx].id)); + if (papr_groups[idx].pg.name == NULL) + goto out_pgattrs; + } + + for (idx = 0; idx < num_attrs; idx++) { + bool show_val_desc = true; + + /* Do not add the value desc attr if it does not exist */ + if (strnlen(esi_attrs[idx].value_desc, + sizeof(esi_attrs[idx].value_desc)) == 0) + show_val_desc = false; + + if (add_attr_group(be64_to_cpu(esi_attrs[idx].id), + &papr_groups[idx], + show_val_desc)) { + pr_warn("Failed to create papr attribute group %s\n", + papr_groups[idx].pg.name); + idx = num_attrs; + goto out_pgattrs; + } + } + + kfree(esi_buf); + return 0; +out_pgattrs: + for (i = 0; i < idx ; i++) { + kfree(papr_groups[i].pg.attrs); + kfree(papr_groups[i].pg.name); + } + kobject_put(esi_kobj); +out_kobj: + kobject_put(papr_kobj); +out_papr_groups: + kfree(papr_groups); +out_free_esi_buf: + kfree(esi_buf); + + return -ENOMEM; +} + +machine_device_initcall(pseries, papr_init); From 57201d657eb76d735298405d3200a3b1f67197e1 Mon Sep 17 00:00:00 2001 From: "Pratik R. Sampat" <psampat@linux.ibm.com> Date: Thu, 17 Feb 2022 16:23:21 +0530 Subject: [PATCH 157/179] selftest/powerpc: Add PAPR sysfs attributes sniff test Include a testcase to check if the sysfs files for energy and frequency related have its related attribute files exist and populated Signed-off-by: Pratik R. Sampat <psampat@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220217105321.52941-3-psampat@linux.ibm.com --- tools/testing/selftests/powerpc/Makefile | 1 + .../powerpc/papr_attributes/.gitignore | 2 + .../powerpc/papr_attributes/Makefile | 7 ++ .../powerpc/papr_attributes/attr_test.c | 107 ++++++++++++++++++ 4 files changed, 117 insertions(+) create mode 100644 tools/testing/selftests/powerpc/papr_attributes/.gitignore create mode 100644 tools/testing/selftests/powerpc/papr_attributes/Makefile create mode 100644 tools/testing/selftests/powerpc/papr_attributes/attr_test.c diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 4830372d7416..6ba95cd19e42 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -30,6 +30,7 @@ SUB_DIRS = alignment \ eeh \ vphn \ math \ + papr_attributes \ ptrace \ security \ mce diff --git a/tools/testing/selftests/powerpc/papr_attributes/.gitignore b/tools/testing/selftests/powerpc/papr_attributes/.gitignore new file mode 100644 index 000000000000..d5f42b6d9e99 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +attr_test diff --git a/tools/testing/selftests/powerpc/papr_attributes/Makefile b/tools/testing/selftests/powerpc/papr_attributes/Makefile new file mode 100644 index 000000000000..e899712d49db --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_GEN_PROGS := attr_test + +top_srcdir = ../../../../.. +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c ../utils.c \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c new file mode 100644 index 000000000000..bab0dc06e90b --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PAPR Energy attributes sniff test + * This checks if the papr folders and contents are populated relating to + * the energy and frequency attributes + * + * Copyright 2022, Pratik Rajesh Sampat, IBM Corp. + */ + +#include <stdio.h> +#include <string.h> +#include <dirent.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <stdlib.h> + +#include "utils.h" + +enum energy_freq_attrs { + POWER_PERFORMANCE_MODE = 1, + IDLE_POWER_SAVER_STATUS = 2, + MIN_FREQ = 3, + STAT_FREQ = 4, + MAX_FREQ = 6, + PROC_FOLDING_STATUS = 8 +}; + +enum type { + INVALID, + STR_VAL, + NUM_VAL +}; + +int value_type(int id) +{ + int val_type; + + switch (id) { + case POWER_PERFORMANCE_MODE: + case IDLE_POWER_SAVER_STATUS: + val_type = STR_VAL; + break; + case MIN_FREQ: + case STAT_FREQ: + case MAX_FREQ: + case PROC_FOLDING_STATUS: + val_type = NUM_VAL; + break; + default: + val_type = INVALID; + } + + return val_type; +} + +int verify_energy_info(void) +{ + const char *path = "/sys/firmware/papr/energy_scale_info"; + struct dirent *entry; + struct stat s; + DIR *dirp; + + if (stat(path, &s) || !S_ISDIR(s.st_mode)) + return -1; + dirp = opendir(path); + + while ((entry = readdir(dirp)) != NULL) { + char file_name[64]; + int id, attr_type; + FILE *f; + + if (strcmp(entry->d_name, ".") == 0 || + strcmp(entry->d_name, "..") == 0) + continue; + + id = atoi(entry->d_name); + attr_type = value_type(id); + if (attr_type == INVALID) + return -1; + + /* Check if the files exist and have data in them */ + sprintf(file_name, "%s/%d/desc", path, id); + f = fopen(file_name, "r"); + if (!f || fgetc(f) == EOF) + return -1; + + sprintf(file_name, "%s/%d/value", path, id); + f = fopen(file_name, "r"); + if (!f || fgetc(f) == EOF) + return -1; + + if (attr_type == STR_VAL) { + sprintf(file_name, "%s/%d/value_desc", path, id); + f = fopen(file_name, "r"); + if (!f || fgetc(f) == EOF) + return -1; + } + } + + return 0; +} + +int main(void) +{ + return test_harness(verify_energy_info, "papr_attributes"); +} From 9bdb2eec3dde4d66b71cb4bbaebde0caed2cf0e3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Thu, 17 Feb 2022 13:01:56 +0100 Subject: [PATCH 158/179] powerpc/ftrace: Don't use lmw/stmw in ftrace_regs_caller() For the same reason as commit a85c728cb5e1 ("powerpc/32: Don't use lmw/stmw for saving/restoring non volatile regs"), don't use lmw/stmw in ftrace_regs_caller(). Use the same macros for PPC32 and PPC64. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/ec286d2cc6989668a96f14543275437d2f3f0e3a.1645099283.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_mprofile.S | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 89639e64acd1..76dab07fd8fd 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -43,18 +43,16 @@ _GLOBAL(ftrace_regs_caller) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) -#ifdef CONFIG_PPC64 SAVE_GPRS(2, 11, r1) +#ifdef CONFIG_PPC64 /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 beq ftrace_no_trace +#endif SAVE_GPRS(12, 31, r1) -#else - stmw r2, GPR2(r1) -#endif /* Save previous stack pointer (r1) */ addi r8, r1, SWITCH_FRAME_SIZE @@ -120,11 +118,7 @@ ftrace_regs_call: #endif /* Restore gprs */ -#ifdef CONFIG_PPC64 REST_GPRS(2, 31, r1) -#else - lmw r2, GPR2(r1) -#endif /* Restore possibly modified LR */ PPC_LL r0, _LINK(r1) From 228216716cb5f9a19d70bfc40bdc0d6a7fb7e72f Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Thu, 17 Feb 2022 13:01:57 +0100 Subject: [PATCH 159/179] powerpc/ftrace: Refactor ftrace_{regs_}caller ftrace_caller() and frace_regs_caller() have now a lot in common. Refactor them using GAS macros. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/9d7df9e4fc98a86051489f61d3c9bc67f92f7e27.1645099283.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_mprofile.S | 147 ++++++-------------- 1 file changed, 45 insertions(+), 102 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 76dab07fd8fd..630b2de9957b 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -32,7 +32,7 @@ * Our job is to save the register state into a struct pt_regs (on the stack) * and then arrange for the ftrace function to be called. */ -_GLOBAL(ftrace_regs_caller) +.macro ftrace_regs_entry allregs /* Save the original return address in A's stack frame */ #ifdef CONFIG_MPROFILE_KERNEL PPC_STL r0,LRSAVE(r1) @@ -43,7 +43,7 @@ _GLOBAL(ftrace_regs_caller) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) - SAVE_GPRS(2, 11, r1) + SAVE_GPRS(3, 10, r1) #ifdef CONFIG_PPC64 /* Ok to continue? */ @@ -52,17 +52,29 @@ _GLOBAL(ftrace_regs_caller) beq ftrace_no_trace #endif - SAVE_GPRS(12, 31, r1) + .if \allregs == 1 + SAVE_GPR(2, r1) + SAVE_GPRS(11, 31, r1) + .else +#ifdef CONFIG_LIVEPATCH_64 + SAVE_GPR(14, r1) +#endif + .endif /* Save previous stack pointer (r1) */ addi r8, r1, SWITCH_FRAME_SIZE PPC_STL r8, GPR1(r1) + .if \allregs == 1 /* Load special regs for save below */ mfmsr r8 mfctr r9 mfxer r10 mfcr r11 + .else + /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ + li r8, 0 + .endif /* Get the _mcount() call site out of LR */ mflr r7 @@ -96,19 +108,17 @@ _GLOBAL(ftrace_regs_caller) /* Save special regs */ PPC_STL r8, _MSR(r1) + .if \allregs == 1 PPC_STL r9, _CTR(r1) PPC_STL r10, _XER(r1) PPC_STL r11, _CCR(r1) + .endif /* Load &pt_regs in r6 for call below */ addi r6, r1, STACK_FRAME_OVERHEAD +.endm - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_regs_call -ftrace_regs_call: - bl ftrace_stub - nop - +.macro ftrace_regs_exit allregs /* Load ctr with the possibly modified NIP */ PPC_LL r3, _NIP(r1) mtctr r3 @@ -118,7 +128,14 @@ ftrace_regs_call: #endif /* Restore gprs */ + .if \allregs == 1 REST_GPRS(2, 31, r1) + .else + REST_GPRS(3, 10, r1) +#ifdef CONFIG_LIVEPATCH_64 + REST_GPR(14, r1) +#endif + .endif /* Restore possibly modified LR */ PPC_LL r0, _LINK(r1) @@ -137,6 +154,25 @@ ftrace_regs_call: bne- livepatch_handler #endif bctr /* jump after _mcount site */ +.endm + +_GLOBAL(ftrace_regs_caller) + ftrace_regs_entry 1 + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_regs_call +ftrace_regs_call: + bl ftrace_stub + nop + ftrace_regs_exit 1 + +_GLOBAL(ftrace_caller) + ftrace_regs_entry 0 + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + ftrace_regs_exit 0 _GLOBAL(ftrace_stub) blr @@ -151,99 +187,6 @@ ftrace_no_trace: bctr #endif -_GLOBAL(ftrace_caller) - /* Save the original return address in A's stack frame */ -#ifdef CONFIG_MPROFILE_KERNEL - PPC_STL r0, LRSAVE(r1) -#endif - - /* Create our stack frame + pt_regs */ - PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - SAVE_GPRS(3, 10, r1) - -#ifdef CONFIG_PPC64 - lbz r3, PACA_FTRACE_ENABLED(r13) - cmpdi r3, 0 - beq ftrace_no_trace -#endif - - /* Save previous stack pointer (r1) */ - addi r8, r1, SWITCH_FRAME_SIZE - PPC_STL r8, GPR1(r1) - - /* Get the _mcount() call site out of LR */ - mflr r7 - PPC_STL r7, _NIP(r1) - -#ifdef CONFIG_PPC64 - /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) - ld r2, PACATOC(r13) /* get kernel TOC in r2 */ - - addis r3, r2, function_trace_op@toc@ha - addi r3, r3, function_trace_op@toc@l - ld r5, 0(r3) -#else - lis r3,function_trace_op@ha - lwz r5,function_trace_op@l(r3) -#endif - -#ifdef CONFIG_LIVEPATCH_64 - SAVE_GPR(14, r1) - mr r14, r7 /* remember old NIP */ -#endif - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - PPC_STL r0, _LINK(r1) - mr r4, r0 - - /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ - li r8, 0 - PPC_STL r8, _MSR(r1) - - /* Load &pt_regs in r6 for call below */ - addi r6, r1, STACK_FRAME_OVERHEAD - - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop - - PPC_LL r3, _NIP(r1) - mtctr r3 - -#ifdef CONFIG_LIVEPATCH_64 - cmpd r14, r3 /* has NIP been altered? */ - REST_GPR(14, r1) -#endif - - /* Restore gprs */ - REST_GPRS(3, 10, r1) - -#ifdef CONFIG_PPC64 - /* Restore callee's TOC */ - ld r2, 24(r1) -#endif - - /* Restore possibly modified LR */ - PPC_LL r0, _LINK(r1) - mtlr r0 - - /* Pop our stack frame */ - addi r1, r1, SWITCH_FRAME_SIZE - -#ifdef CONFIG_LIVEPATCH_64 - /* Based on the cmpd above, if the NIP was altered handle livepatch */ - bne- livepatch_handler -#endif - - bctr /* jump after _mcount site */ - #ifdef CONFIG_LIVEPATCH_64 /* * This function runs in the mcount context, between two functions. As From a5f04d1f2724db036ba4087873c0691881932bc9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Thu, 17 Feb 2022 13:01:58 +0100 Subject: [PATCH 160/179] powerpc/ftrace: Regroup PPC64 specific operations in ftrace_mprofile.S CONFIG_MPROFILE_KERNEL is only for PPC64 and ftrace_mprofile.o is build on PPC64 only when CONFIG_MPROFILE_KERNEL is defined. Move saving of r0 inside #ifdef PPC64 Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/619dfb672bf4f1b777a4b3f8b4f14e637fea2716.1645099283.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_mprofile.S | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index 630b2de9957b..f5d31c458e6b 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -33,11 +33,6 @@ * and then arrange for the ftrace function to be called. */ .macro ftrace_regs_entry allregs - /* Save the original return address in A's stack frame */ -#ifdef CONFIG_MPROFILE_KERNEL - PPC_STL r0,LRSAVE(r1) -#endif - /* Create our stack frame + pt_regs */ PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) @@ -46,6 +41,8 @@ SAVE_GPRS(3, 10, r1) #ifdef CONFIG_PPC64 + /* Save the original return address in A's stack frame */ + std r0, LRSAVE+SWITCH_FRAME_SIZE(r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 From 2ca48dbb210767b9e7166d7d47febc8fcd1da6e1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Thu, 17 Feb 2022 13:01:59 +0100 Subject: [PATCH 161/179] powerpc/ftrace: Use STK_GOT in ftrace_mprofile.S Instead of open coding offset value 24, use STK_GOT when accessing got register in stack. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/9042bb30fa972056715fe5b6598a7c8049681293.1645099283.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/trace/ftrace_mprofile.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index f5d31c458e6b..4fa23e260cab 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -82,7 +82,7 @@ #ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ - std r2, 24(r1) + std r2, STK_GOT(r1) ld r2,PACATOC(r13) /* get kernel TOC in r2 */ addis r3,r2,function_trace_op@toc@ha @@ -140,7 +140,7 @@ #ifdef CONFIG_PPC64 /* Restore callee's TOC */ - ld r2, 24(r1) + ld r2, STK_GOT(r1) #endif /* Pop our stack frame */ From e86debbbb5f89c2575110cfdce89d1820577aa94 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 4 Mar 2022 18:04:02 +0100 Subject: [PATCH 162/179] powerpc: Cleanup asm-prototypes.c Last call to sys_swapcontext() from ASM was removed by commit fbcee2ebe8ed ("powerpc/32: Always save non volatile GPRs at syscall entry") sys_debug_setcontext() prototype not needed anymore since commit f3675644e172 ("powerpc/syscalls: signal_{32, 64} - switch to SYSCALL_DEFINE") sys_switch_endian() prototype not needed anymore since commit 81dac8177862 ("powerpc/64: Make sys_switch_endian() traceable") Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> [mpe: Keep _mcount() prototype to avoid modpost errors] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/3ed660a585df2080ea8412ec20fbf652f5bf013a.1646413435.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-prototypes.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 41b8a1e1144a..cca69093f03d 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -57,12 +57,7 @@ int enter_vmx_ops(void); void *exit_vmx_ops(void *dest); /* signals, syscalls and interrupts */ -long sys_swapcontext(struct ucontext __user *old_ctx, - struct ucontext __user *new_ctx, - long ctx_size); #ifdef CONFIG_PPC32 -long sys_debug_setcontext(struct ucontext __user *ctx, - int ndbg, struct sig_dbg_op __user *dbg); int ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp); @@ -81,7 +76,6 @@ unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs); long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low); -long sys_switch_endian(void); /* prom_init (OpenFirmware) */ unsigned long __init prom_init(unsigned long r3, unsigned long r4, From e15c703be48edc3b2f96b66d4f548dc88b44266c Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 4 Mar 2022 18:04:03 +0100 Subject: [PATCH 163/179] powerpc/smp: Declare current_set static current_set extern not needed anymore since commit eafd825ed710 ("powerpc/64: Simplify __secondary_start paca->kstack handling") Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/a55eb65c9d7319f0af3c31e3f6ba36522f10003d.1646413435.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-prototypes.h | 1 - arch/powerpc/kernel/smp.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index cca69093f03d..ce4e355c96a2 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -20,7 +20,6 @@ #include <uapi/asm/ucontext.h> /* SMP */ -extern struct task_struct *current_set[NR_CPUS]; extern struct task_struct *secondary_current; void start_secondary(void *unused); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index b7fd6a72aa76..7e30a6fe5adf 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -716,7 +716,7 @@ void smp_send_stop(void) } #endif /* CONFIG_NMI_IPI */ -struct task_struct *current_set[NR_CPUS]; +static struct task_struct *current_set[NR_CPUS]; static void smp_store_cpu_info(int id) { From a4abd55a2490fd6407ddb6810e41f64ebd66d3af Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 4 Mar 2022 18:04:04 +0100 Subject: [PATCH 164/179] powerpc/kexec: Declare kexec_paca static kexec_paca is exclusively used in kexec/core_64.c Declare it static. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/094983ee851644165b7700c73cac63cfe20596cd.1646413435.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-prototypes.h | 2 -- arch/powerpc/kexec/core_64.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index ce4e355c96a2..eede369ba94f 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -24,9 +24,7 @@ extern struct task_struct *secondary_current; void start_secondary(void *unused); /* kexec */ -struct paca_struct; struct kimage; -extern struct paca_struct kexec_paca; void kexec_copy_flush(struct kimage *image); /* pseries hcall tracing */ diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 635b5fc30b53..2d49dce129f2 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -291,7 +291,7 @@ static union thread_union kexec_stack __init_task_data = * For similar reasons to the stack above, the kexecing CPU needs to be on a * static PACA; we switch to kexec_paca. */ -struct paca_struct kexec_paca; +static struct paca_struct kexec_paca; /* Our assembly helper, in misc_64.S */ extern void kexec_sequence(void *newstack, unsigned long start, From 76222808fc253cb9ea66c3e0e8d1946933f25b70 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 4 Mar 2022 18:04:05 +0100 Subject: [PATCH 165/179] powerpc: Move C prototypes out of asm-prototypes.h We originally added asm-prototypes.h in commit 42f5b4cacd78 ("powerpc: Introduce asm-prototypes.h"). It's purpose was for prototypes of C functions that are only called from asm, in order to fix sparse warnings about missing prototypes. A few months later Nick added a different use case in commit 4efca4ed05cb ("kbuild: modversions for EXPORT_SYMBOL() for asm") for C prototypes for exported asm functions. This is basically the inverse of our original usage. Since then we've added various prototypes to asm-prototypes.h for both reasons, meaning we now need to unstitch it all. Dispatch prototypes of C functions into relevant headers and keep only the prototypes for functions defined in assembly. For the time being, leave prom_init() there because moving it into asm/prom.h or asm/setup.h conflicts with drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowrom.o This will be fixed later by untaggling asm/pci.h and asm/prom.h or by renaming the function in shadowrom.c Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/62d46904eca74042097acf4cb12c175e3067f3d1.1646413435.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-prototypes.h | 49 ------------------- arch/powerpc/include/asm/ftrace.h | 3 ++ arch/powerpc/include/asm/hvcall.h | 5 ++ arch/powerpc/include/asm/interrupt.h | 11 +++++ arch/powerpc/include/asm/kexec.h | 2 + arch/powerpc/include/asm/processor.h | 8 +++ arch/powerpc/include/asm/setup.h | 7 +++ arch/powerpc/include/asm/smp.h | 3 ++ arch/powerpc/include/asm/syscalls.h | 4 ++ arch/powerpc/kernel/early_32.c | 1 - arch/powerpc/kernel/interrupt.c | 1 - arch/powerpc/kernel/irq.c | 1 - arch/powerpc/kernel/mce.c | 1 - arch/powerpc/kernel/ptrace/ptrace.c | 1 - arch/powerpc/kernel/setup_64.c | 1 - arch/powerpc/kernel/smp.c | 1 - arch/powerpc/kernel/syscalls.c | 1 - arch/powerpc/kernel/tau_6xx.c | 1 - arch/powerpc/kernel/time.c | 1 - arch/powerpc/kernel/trace/ftrace.c | 1 - arch/powerpc/kexec/core_64.c | 1 - arch/powerpc/kvm/book3s_hv_builtin.c | 1 - arch/powerpc/kvm/book3s_hv_rm_xive.c | 1 - arch/powerpc/lib/vmx-helper.c | 1 - arch/powerpc/mm/book3s64/slb.c | 1 - arch/powerpc/mm/fault.c | 1 - arch/powerpc/platforms/powernv/idle.c | 1 - .../platforms/powernv/opal-tracepoints.c | 1 - arch/powerpc/platforms/pseries/lpar.c | 1 - 29 files changed, 43 insertions(+), 69 deletions(-) diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index eede369ba94f..d995c65d18ab 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -19,19 +19,6 @@ #include <uapi/asm/ucontext.h> -/* SMP */ -extern struct task_struct *secondary_current; -void start_secondary(void *unused); - -/* kexec */ -struct kimage; -void kexec_copy_flush(struct kimage *image); - -/* pseries hcall tracing */ -extern struct static_key hcall_tracepoint_key; -void __trace_hcall_entry(unsigned long opcode, unsigned long *args); -void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf); - /* Ultravisor */ #if defined(CONFIG_PPC_POWERNV) || defined(CONFIG_PPC_SVM) long ucall_norets(unsigned long opcode, ...); @@ -47,43 +34,12 @@ int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, int64_t a4, int64_t a5, int64_t a6, int64_t a7, int64_t opcode, uint64_t msr); -/* VMX copying */ -int enter_vmx_usercopy(void); -int exit_vmx_usercopy(void); -int enter_vmx_ops(void); -void *exit_vmx_ops(void *dest); - -/* signals, syscalls and interrupts */ -#ifdef CONFIG_PPC32 -int -ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, - struct __kernel_old_timeval __user *tvp); -unsigned long __init early_init(unsigned long dt_ptr); -void __init machine_init(u64 dt_ptr); -#endif -long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs); -notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); -notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs); -notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs); -#ifdef CONFIG_PPC64 -unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs); -unsigned long interrupt_exit_user_restart(struct pt_regs *regs); -unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs); -#endif - -long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, - u32 len_high, u32 len_low); - /* prom_init (OpenFirmware) */ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long pp, unsigned long r6, unsigned long r7, unsigned long kbase); -/* setup */ -void __init early_setup(unsigned long dt_ptr); -void early_setup_secondary(void); - /* misc runtime */ extern u64 __bswapdi2(u64); extern s64 __lshrdi3(s64, int); @@ -94,11 +50,6 @@ extern int __ucmpdi2(u64, u64); /* tracing */ void _mcount(void); -unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, - unsigned long sp); - -void pnv_power9_force_smt4_catch(void); -void pnv_power9_force_smt4_release(void); /* Transaction memory related */ void tm_enable(void); diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index ff034ae4e472..d83758acd1c7 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -19,6 +19,9 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, + unsigned long sp); + struct dyn_arch_ftrace { struct module *mod; }; diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 48f510ba9f4a..d92a20a85395 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -501,6 +501,11 @@ long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...); long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...); long plpar_hcall9_raw(unsigned long opcode, unsigned long *retbuf, ...); +/* pseries hcall tracing */ +extern struct static_key hcall_tracepoint_key; +void __trace_hcall_entry(unsigned long opcode, unsigned long *args); +void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf); + struct hvcall_mpp_data { unsigned long entitled_mem; unsigned long mapped_mem; diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index f3b2c93a5db1..f964ef5c57d8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -636,6 +636,17 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs) local_irq_enable(); } +long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, + unsigned long r0, struct pt_regs *regs); +notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs); +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs); +#ifdef CONFIG_PPC64 +unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs); +unsigned long interrupt_exit_user_restart(struct pt_regs *regs); +unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_INTERRUPT_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 8ebdd23d987c..2aefe14e1442 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -96,6 +96,8 @@ static inline bool kdump_in_progress(void) void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer, unsigned long start_address) __noreturn; +void kexec_copy_flush(struct kimage *image); + #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 2c8686d9e964..39c25021030f 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -411,6 +411,8 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern void power7_idle_type(unsigned long type); extern void arch300_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask); +void pnv_power9_force_smt4_catch(void); +void pnv_power9_force_smt4_release(void); extern int fix_alignment(struct pt_regs *); @@ -427,6 +429,12 @@ extern int fix_alignment(struct pt_regs *); int do_mathemu(struct pt_regs *regs); +/* VMX copying */ +int enter_vmx_usercopy(void); +int exit_vmx_usercopy(void); +int enter_vmx_ops(void); +void *exit_vmx_ops(void *dest); + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PROCESSOR_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index d0d3dd531c7f..049ca26893e6 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -76,6 +76,13 @@ static inline void setup_spectre_v2(void) {} #endif void __init do_btb_flush_fixups(void); +#ifdef CONFIG_PPC32 +unsigned long __init early_init(unsigned long dt_ptr); +void __init machine_init(u64 dt_ptr); +#endif +void __init early_setup(unsigned long dt_ptr); +void early_setup_secondary(void); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 007332a4a732..60ab739a5e3b 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -60,6 +60,9 @@ struct smp_ops_t { #endif }; +extern struct task_struct *secondary_current; + +void start_secondary(void *unused); extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern void smp_send_debugger_break(void); diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h index 7ee66ae5444d..a2b13e55254f 100644 --- a/arch/powerpc/include/asm/syscalls.h +++ b/arch/powerpc/include/asm/syscalls.h @@ -18,6 +18,10 @@ asmlinkage long sys_mmap2(unsigned long addr, size_t len, unsigned long fd, unsigned long pgoff); asmlinkage long ppc64_personality(unsigned long personality); asmlinkage long sys_rtas(struct rtas_args __user *uargs); +int ppc_select(int n, fd_set __user *inp, fd_set __user *outp, + fd_set __user *exp, struct __kernel_old_timeval __user *tvp); +long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, + u32 len_high, u32 len_low); #ifdef CONFIG_COMPAT unsigned long compat_sys_mmap2(unsigned long addr, size_t len, diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c index ef2ad4945904..03f1135ef64f 100644 --- a/arch/powerpc/kernel/early_32.c +++ b/arch/powerpc/kernel/early_32.c @@ -8,7 +8,6 @@ #include <linux/kernel.h> #include <asm/setup.h> #include <asm/sections.h> -#include <asm/asm-prototypes.h> /* * We're called here very early in the boot. diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 7cd6ce3ec423..784ea3289c84 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -5,7 +5,6 @@ #include <linux/compat.h> #include <linux/sched/debug.h> /* for show_regs */ -#include <asm/asm-prototypes.h> #include <asm/kup.h> #include <asm/cputime.h> #include <asm/hw_irq.h> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2cf31a97126c..752fb182eacb 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -64,7 +64,6 @@ #include <asm/udbg.h> #include <asm/smp.h> #include <asm/livepatch.h> -#include <asm/asm-prototypes.h> #include <asm/hw_irq.h> #include <asm/softirq_stack.h> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 671d727d06f7..18173199b79d 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -24,7 +24,6 @@ #include <asm/machdep.h> #include <asm/mce.h> #include <asm/nmi.h> -#include <asm/asm-prototypes.h> #include "setup.h" diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 1212a812a7ab..55742ef1f991 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -22,7 +22,6 @@ #include <linux/syscalls.h> #include <asm/switch_to.h> -#include <asm/asm-prototypes.h> #include <asm/debug.h> #define CREATE_TRACE_POINTS diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index be8577ac9397..e547066a06aa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -67,7 +67,6 @@ #include <asm/kup.h> #include <asm/early_ioremap.h> #include <asm/pgalloc.h> -#include <asm/asm-prototypes.h> #include "setup.h" diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 7e30a6fe5adf..de0f6f09a5dd 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -57,7 +57,6 @@ #include <asm/vdso.h> #include <asm/debug.h> #include <asm/kexec.h> -#include <asm/asm-prototypes.h> #include <asm/cpu_has_feature.h> #include <asm/ftrace.h> #include <asm/kup.h> diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 825931e400df..c4f5b4ce926f 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -35,7 +35,6 @@ #include <asm/syscalls.h> #include <asm/time.h> #include <asm/unistd.h> -#include <asm/asm-prototypes.h> static inline long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 8e83d19fe8fa..828d0f4106d2 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -29,7 +29,6 @@ #include <asm/cache.h> #include <asm/8xx_immap.h> #include <asm/machdep.h> -#include <asm/asm-prototypes.h> #include "setup.h" diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 17598bba54eb..958e2929776f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -69,7 +69,6 @@ #include <asm/smp.h> #include <asm/vdso_datapage.h> #include <asm/firmware.h> -#include <asm/asm-prototypes.h> #include <asm/mce.h> /* powerpc clocksource/clockevent code */ diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index f21b8fbd418e..4ee04aacf9f1 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -22,7 +22,6 @@ #include <linux/init.h> #include <linux/list.h> -#include <asm/asm-prototypes.h> #include <asm/cacheflush.h> #include <asm/code-patching.h> #include <asm/ftrace.h> diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 2d49dce129f2..6cc7793b8420 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -28,7 +28,6 @@ #include <asm/prom.h> #include <asm/smp.h> #include <asm/hw_breakpoint.h> -#include <asm/asm-prototypes.h> #include <asm/svm.h> #include <asm/ultravisor.h> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7d6d91338c3f..7e52d0beee77 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -15,7 +15,6 @@ #include <linux/cma.h> #include <linux/bitops.h> -#include <asm/asm-prototypes.h> #include <asm/cputable.h> #include <asm/interrupt.h> #include <asm/kvm_ppc.h> diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c index 6f18632e30e9..dd9880731bd6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xive.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xive.c @@ -16,7 +16,6 @@ #include <asm/pnv-pci.h> #include <asm/opal.h> #include <asm/smp.h> -#include <asm/asm-prototypes.h> #include <asm/xive.h> #include <asm/xive-regs.h> diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 62e6c3045252..f76a50291fd7 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -9,7 +9,6 @@ #include <linux/uaccess.h> #include <linux/hardirq.h> #include <asm/switch_to.h> -#include <asm/asm-prototypes.h> int enter_vmx_usercopy(void) { diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 31f4cef3adac..81091b9587f6 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -9,7 +9,6 @@ * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM */ -#include <asm/asm-prototypes.h> #include <asm/interrupt.h> #include <asm/mmu.h> #include <asm/mmu_context.h> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 7ba6d3eff636..d53fed4eccbd 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -35,7 +35,6 @@ #include <linux/kfence.h> #include <linux/pkeys.h> -#include <asm/asm-prototypes.h> #include <asm/firmware.h> #include <asm/interrupt.h> #include <asm/page.h> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 9942289f379b..a6677a111aca 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -12,7 +12,6 @@ #include <linux/device.h> #include <linux/cpu.h> -#include <asm/asm-prototypes.h> #include <asm/firmware.h> #include <asm/interrupt.h> #include <asm/machdep.h> diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index f16a43540e30..91b36541b9e5 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -2,7 +2,6 @@ #include <linux/percpu.h> #include <linux/jump_label.h> #include <asm/trace.h> -#include <asm/asm-prototypes.h> #ifdef CONFIG_JUMP_LABEL struct static_key opal_tracepoint_key = STATIC_KEY_INIT; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f8899d506ea4..760581c5752f 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -40,7 +40,6 @@ #include <asm/plpar_wrappers.h> #include <asm/kexec.h> #include <asm/fadump.h> -#include <asm/asm-prototypes.h> #include <asm/dtl.h> #include "pseries.h" From 1a76e520ee1831a81dabf8a9a58c6453f700026e Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Fri, 4 Mar 2022 17:12:22 +1100 Subject: [PATCH 166/179] powerpc/64e: Tie PPC_BOOK3E_64 to PPC_FSL_BOOK3E Since the IBM A2 CPU support was removed, see commit fb5a515704d7 ("powerpc: Remove platforms/wsp and associated pieces"), the only 64-bit Book3E CPUs we support are Freescale (NXP) ones. However our Kconfig still allows configurating a kernel that has 64-bit Book3E support, but no Freescale CPU support enabled. Such a kernel would never boot, it doesn't know about any CPUs. It also causes build errors, as reported by lkp, because PPC_BARRIER_NOSPEC is not enabled in such a configuration: powerpc64-linux-ld: arch/powerpc/net/bpf_jit_comp64.o:(.toc+0x0): undefined reference to `powerpc_security_features' To fix this, force PPC_FSL_BOOK3E to be selected whenever we are building a 64-bit Book3E kernel. Reported-by: kernel test robot <lkp@intel.com> Reported-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Suggested-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220304061222.2478720-1-mpe@ellerman.id.au --- arch/powerpc/platforms/Kconfig.cputype | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 87bc1929ee5a..e2e1fec91c6e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -107,6 +107,7 @@ config PPC_BOOK3S_64 config PPC_BOOK3E_64 bool "Embedded processors" + select PPC_FSL_BOOK3E select PPC_FPU # Make it a choice ? select PPC_SMP_MUXED_IPI select PPC_DOORBELL @@ -295,7 +296,7 @@ config FSL_BOOKE config PPC_FSL_BOOK3E bool select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 - select FSL_EMB_PERFMON + imply FSL_EMB_PERFMON select PPC_SMP_MUXED_IPI select PPC_DOORBELL select PPC_KUEP From d601fd24e6964967f115f036a840f4f28488f63f Mon Sep 17 00:00:00 2001 From: Hangyu Hua <hbh25y@gmail.com> Date: Wed, 2 Mar 2022 10:19:59 +0800 Subject: [PATCH 167/179] powerpc/secvar: fix refcount leak in format_show() Refcount leak will happen when format_show returns failure in multiple cases. Unified management of of_node_put can fix this problem. Signed-off-by: Hangyu Hua <hbh25y@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220302021959.10959-1-hbh25y@gmail.com --- arch/powerpc/kernel/secvar-sysfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index a0a78aba2083..1ee4640a2641 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -26,15 +26,18 @@ static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, const char *format; node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) - return -ENODEV; + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } rc = of_property_read_string(node, "format", &format); if (rc) - return rc; + goto out; rc = sprintf(buf, "%s\n", format); +out: of_node_put(node); return rc; From 5986f6b6575ac830ede9648cfb64353c58067a9f Mon Sep 17 00:00:00 2001 From: YueHaibing <yuehaibing@huawei.com> Date: Tue, 8 Mar 2022 18:09:28 +0800 Subject: [PATCH 168/179] powerpc/spufs: Fix build warning when CONFIG_PROC_FS=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/powerpc/platforms/cell/spufs/sched.c:1055:12: warning: ‘show_spu_loadavg’ defined but not used [-Wunused-function] static int show_spu_loadavg(struct seq_file *s, void *private) ^~~~~~~~~~~~~~~~ Move it into #ifdef block to fix this, also remove unneeded semicolon. Signed-off-by: YueHaibing <yuehaibing@huawei.com> Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220308100928.23540-1-yuehaibing@huawei.com --- arch/powerpc/platforms/cell/spufs/sched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index d058f6233e66..99bd027a7f7c 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1052,6 +1052,7 @@ void spuctx_switch_state(struct spu_context *ctx, } } +#ifdef CONFIG_PROC_FS static int show_spu_loadavg(struct seq_file *s, void *private) { int a, b, c; @@ -1073,7 +1074,8 @@ static int show_spu_loadavg(struct seq_file *s, void *private) atomic_read(&nr_spu_contexts), idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0; -}; +} +#endif int __init spu_sched_init(void) { From 6b3a3e12f8e6eea47428bb39aaf58832b50bb379 Mon Sep 17 00:00:00 2001 From: Rohan McLure <rmclure@linux.ibm.com> Date: Tue, 8 Mar 2022 10:14:14 +1100 Subject: [PATCH 169/179] powerpc: declare unmodified attribute_group usages const Inspired by (bd75b4ef4977: Constify static attribute_group structs), accepted by linux-next, reported: https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20220210202805.7750-4-rikard.falkeborn@gmail.com/ Nearly all singletons of type struct attribute_group are never modified, and so are candidates for being const. Declare them as const. Signed-off-by: Rohan McLure <rmclure@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220307231414.86560-1-rmclure@linux.ibm.com --- arch/powerpc/include/asm/spu.h | 4 ++-- arch/powerpc/perf/generic-compat-pmu.c | 4 ++-- arch/powerpc/perf/hv-24x7.c | 6 +++--- arch/powerpc/perf/hv-gpci.c | 8 ++++---- arch/powerpc/perf/imc-pmu.c | 6 +++--- arch/powerpc/perf/isa207-common.c | 2 +- arch/powerpc/perf/power10-pmu.c | 6 +++--- arch/powerpc/perf/power7-pmu.c | 4 ++-- arch/powerpc/perf/power8-pmu.c | 4 ++-- arch/powerpc/perf/power9-pmu.c | 6 +++--- arch/powerpc/platforms/cell/cbe_thermal.c | 2 +- arch/powerpc/platforms/cell/spu_base.c | 4 ++-- arch/powerpc/platforms/powernv/opal-core.c | 2 +- arch/powerpc/platforms/powernv/opal-dump.c | 2 +- arch/powerpc/platforms/powernv/opal-flash.c | 2 +- arch/powerpc/platforms/pseries/papr_scm.c | 2 +- arch/powerpc/platforms/pseries/power.c | 2 +- 17 files changed, 33 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h index 8a2d11ba0dae..96ad4510c895 100644 --- a/arch/powerpc/include/asm/spu.h +++ b/arch/powerpc/include/asm/spu.h @@ -249,8 +249,8 @@ void unregister_spu_syscalls(struct spufs_calls *calls); int spu_add_dev_attr(struct device_attribute *attr); void spu_remove_dev_attr(struct device_attribute *attr); -int spu_add_dev_attr_group(struct attribute_group *attrs); -void spu_remove_dev_attr_group(struct attribute_group *attrs); +int spu_add_dev_attr_group(const struct attribute_group *attrs); +void spu_remove_dev_attr_group(const struct attribute_group *attrs); extern void notify_spus_active(void); extern void do_notify_spus_active(void); diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index b6e25f75109d..f3db88aee4dd 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -130,7 +130,7 @@ static struct attribute *generic_compat_events_attr[] = { NULL }; -static struct attribute_group generic_compat_pmu_events_group = { +static const struct attribute_group generic_compat_pmu_events_group = { .name = "events", .attrs = generic_compat_events_attr, }; @@ -146,7 +146,7 @@ static struct attribute *generic_compat_pmu_format_attr[] = { NULL, }; -static struct attribute_group generic_compat_pmu_format_group = { +static const struct attribute_group generic_compat_pmu_format_group = { .name = "format", .attrs = generic_compat_pmu_format_attr, }; diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 1e8aa934e37e..12c1777187fc 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -204,7 +204,7 @@ static struct attribute *format_attrs[] = { NULL, }; -static struct attribute_group format_group = { +static const struct attribute_group format_group = { .name = "format", .attrs = format_attrs, }; @@ -1148,7 +1148,7 @@ static struct attribute *cpumask_attrs[] = { NULL, }; -static struct attribute_group cpumask_attr_group = { +static const struct attribute_group cpumask_attr_group = { .attrs = cpumask_attrs, }; @@ -1162,7 +1162,7 @@ static struct attribute *if_attrs[] = { NULL, }; -static struct attribute_group if_group = { +static const struct attribute_group if_group = { .name = "interface", .bin_attrs = if_bin_attrs, .attrs = if_attrs, diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index c756228a081f..5eb60ed5b5e8 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -65,12 +65,12 @@ static struct attribute *format_attrs[] = { NULL, }; -static struct attribute_group format_group = { +static const struct attribute_group format_group = { .name = "format", .attrs = format_attrs, }; -static struct attribute_group event_group = { +static const struct attribute_group event_group = { .name = "events", .attrs = hv_gpci_event_attrs, }; @@ -126,11 +126,11 @@ static struct attribute *cpumask_attrs[] = { NULL, }; -static struct attribute_group cpumask_attr_group = { +static const struct attribute_group cpumask_attr_group = { .attrs = cpumask_attrs, }; -static struct attribute_group interface_group = { +static const struct attribute_group interface_group = { .name = "interface", .attrs = interface_attrs, }; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index e7583fbcc8fa..526d4b767534 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -71,7 +71,7 @@ static struct attribute *imc_format_attrs[] = { NULL, }; -static struct attribute_group imc_format_group = { +static const struct attribute_group imc_format_group = { .name = "format", .attrs = imc_format_attrs, }; @@ -90,7 +90,7 @@ static struct attribute *trace_imc_format_attrs[] = { NULL, }; -static struct attribute_group trace_imc_format_group = { +static const struct attribute_group trace_imc_format_group = { .name = "format", .attrs = trace_imc_format_attrs, }; @@ -125,7 +125,7 @@ static struct attribute *imc_pmu_cpumask_attrs[] = { NULL, }; -static struct attribute_group imc_pmu_cpumask_attr_group = { +static const struct attribute_group imc_pmu_cpumask_attr_group = { .attrs = imc_pmu_cpumask_attrs, }; diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4037ea652522..a74d382ecbb7 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -37,7 +37,7 @@ static struct attribute *isa207_pmu_format_attr[] = { NULL, }; -struct attribute_group isa207_pmu_format_group = { +const struct attribute_group isa207_pmu_format_group = { .name = "format", .attrs = isa207_pmu_format_attr, }; diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 0975ad0b42c4..d3398100a60f 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -200,12 +200,12 @@ static struct attribute *power10_events_attr[] = { NULL }; -static struct attribute_group power10_pmu_events_group_dd1 = { +static const struct attribute_group power10_pmu_events_group_dd1 = { .name = "events", .attrs = power10_events_attr_dd1, }; -static struct attribute_group power10_pmu_events_group = { +static const struct attribute_group power10_pmu_events_group = { .name = "events", .attrs = power10_events_attr, }; @@ -253,7 +253,7 @@ static struct attribute *power10_pmu_format_attr[] = { NULL, }; -static struct attribute_group power10_pmu_format_group = { +static const struct attribute_group power10_pmu_format_group = { .name = "format", .attrs = power10_pmu_format_attr, }; diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 99b5ba314ea7..a74211410b8d 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -405,7 +405,7 @@ static struct attribute *power7_events_attr[] = { NULL }; -static struct attribute_group power7_pmu_events_group = { +static const struct attribute_group power7_pmu_events_group = { .name = "events", .attrs = power7_events_attr, }; @@ -417,7 +417,7 @@ static struct attribute *power7_pmu_format_attr[] = { NULL, }; -static struct attribute_group power7_pmu_format_group = { +static const struct attribute_group power7_pmu_format_group = { .name = "format", .attrs = power7_pmu_format_attr, }; diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index f21194b5604a..e37b1e714d2b 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -92,7 +92,7 @@ enum { */ /* PowerISA v2.07 format attribute structure*/ -extern struct attribute_group isa207_pmu_format_group; +extern const struct attribute_group isa207_pmu_format_group; /* Table of alternatives, sorted by column 0 */ static const unsigned int event_alternatives[][MAX_ALT] = { @@ -182,7 +182,7 @@ static struct attribute *power8_events_attr[] = { NULL }; -static struct attribute_group power8_pmu_events_group = { +static const struct attribute_group power8_pmu_events_group = { .name = "events", .attrs = power8_events_attr, }; diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 4b7c17e36100..c9eb5232e68b 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -96,7 +96,7 @@ extern u64 PERF_REG_EXTENDED_MASK; #define PVR_POWER9_CUMULUS 0x00002000 /* PowerISA v2.07 format attribute structure*/ -extern struct attribute_group isa207_pmu_format_group; +extern const struct attribute_group isa207_pmu_format_group; int p9_dd21_bl_ev[] = { PM_MRK_ST_DONE_L2, @@ -217,7 +217,7 @@ static struct attribute *power9_events_attr[] = { NULL }; -static struct attribute_group power9_pmu_events_group = { +static const struct attribute_group power9_pmu_events_group = { .name = "events", .attrs = power9_events_attr, }; @@ -253,7 +253,7 @@ static struct attribute *power9_pmu_format_attr[] = { NULL, }; -static struct attribute_group power9_pmu_format_group = { +static const struct attribute_group power9_pmu_format_group = { .name = "format", .attrs = power9_pmu_format_attr, }; diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index 2ece77f49bc3..abb5e527b4db 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -255,7 +255,7 @@ static struct attribute *spu_attributes[] = { NULL, }; -static struct attribute_group spu_attribute_group = { +static const struct attribute_group spu_attribute_group = { .name = "thermal", .attrs = spu_attributes, }; diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 83cea9e7ee72..2eecba3345c3 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -490,7 +490,7 @@ int spu_add_dev_attr(struct device_attribute *attr) } EXPORT_SYMBOL_GPL(spu_add_dev_attr); -int spu_add_dev_attr_group(struct attribute_group *attrs) +int spu_add_dev_attr_group(const struct attribute_group *attrs) { struct spu *spu; int rc = 0; @@ -529,7 +529,7 @@ void spu_remove_dev_attr(struct device_attribute *attr) } EXPORT_SYMBOL_GPL(spu_remove_dev_attr); -void spu_remove_dev_attr_group(struct attribute_group *attrs) +void spu_remove_dev_attr_group(const struct attribute_group *attrs) { struct spu *spu; diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index 0331f1973f0e..b97bc179f65a 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -603,7 +603,7 @@ static struct bin_attribute *mpipl_bin_attr[] = { }; -static struct attribute_group mpipl_group = { +static const struct attribute_group mpipl_group = { .attrs = mpipl_attr, .bin_attrs = mpipl_bin_attr, }; diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 410ed5b9de29..16c5860f1372 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -150,7 +150,7 @@ static struct attribute *initiate_attrs[] = { NULL, }; -static struct attribute_group initiate_attr_group = { +static const struct attribute_group initiate_attr_group = { .attrs = initiate_attrs, }; diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index 7e7d38b17420..18481a8c52fa 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -512,7 +512,7 @@ static struct attribute *image_op_attrs[] = { NULL /* need to NULL terminate the list of attributes */ }; -static struct attribute_group image_op_attr_group = { +static const struct attribute_group image_op_attr_group = { .attrs = image_op_attrs, }; diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 20aafd387840..1238b94b3cc1 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -1039,7 +1039,7 @@ static struct attribute *papr_nd_attributes[] = { NULL, }; -static struct attribute_group papr_nd_attribute_group = { +static const struct attribute_group papr_nd_attribute_group = { .name = "papr", .is_visible = papr_nd_attribute_visible, .attrs = papr_nd_attributes, diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c index ee343ec6ab94..3676cb297767 100644 --- a/arch/powerpc/platforms/pseries/power.c +++ b/arch/powerpc/platforms/pseries/power.c @@ -51,7 +51,7 @@ static struct attribute *g[] = { NULL, }; -static struct attribute_group attr_group = { +static const struct attribute_group attr_group = { .attrs = g, }; From 0b0057cc4193c7cd9c0829a440e4901b29ce4ff8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 11 Feb 2022 09:51:32 +0100 Subject: [PATCH 170/179] powerpc/bitops: Force inlining of fls() Building a kernel with CONFIG_CC_OPTIMISE_FOR_SIZE leads to the following functions being copied several times in vmlinux: 31 times __ilog2_u32() 34 times fls() Disassembly follows: c00f476c <fls>: c00f476c: 7c 63 00 34 cntlzw r3,r3 c00f4770: 20 63 00 20 subfic r3,r3,32 c00f4774: 4e 80 00 20 blr c00f4778 <__ilog2_u32>: c00f4778: 94 21 ff f0 stwu r1,-16(r1) c00f477c: 7c 08 02 a6 mflr r0 c00f4780: 90 01 00 14 stw r0,20(r1) c00f4784: 4b ff ff e9 bl c00f476c <fls> c00f4788: 80 01 00 14 lwz r0,20(r1) c00f478c: 38 63 ff ff addi r3,r3,-1 c00f4790: 7c 08 03 a6 mtlr r0 c00f4794: 38 21 00 10 addi r1,r1,16 c00f4798: 4e 80 00 20 blr When forcing inlining of fls(), we get c0008b80 <__ilog2_u32>: c0008b80: 7c 63 00 34 cntlzw r3,r3 c0008b84: 20 63 00 1f subfic r3,r3,31 c0008b88: 4e 80 00 20 blr vmlinux size gets reduced by 1 kbyte with that change. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/adc9c9d6378f6b5008246ca717993d7870188efb.1644569473.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/bitops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index ea5d27dda8cf..344fba3b16eb 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -287,7 +287,7 @@ static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr) * fls: find last (most-significant) bit set. * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static inline int fls(unsigned int x) +static __always_inline int fls(unsigned int x) { int lz; @@ -305,7 +305,7 @@ static inline int fls(unsigned int x) * 32-bit fls calls. */ #ifdef CONFIG_PPC64 -static inline int fls64(__u64 x) +static __always_inline int fls64(__u64 x) { int lz; From 792993919349fefba20f58ae4843c80e8b01f518 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Fri, 11 Feb 2022 15:16:51 +0100 Subject: [PATCH 171/179] powerpc/64: Force inlining of prevent_user_access() and set_kuap() A ppc64_defconfig build exhibits about 10 copied of prevent_user_access(). It also have one copy of set_kuap(). c000000000017340 <.prevent_user_access.constprop.0>: c00000000001a038: 4b ff d3 09 bl c000000000017340 <.prevent_user_access.constprop.0> c00000000001aabc: 4b ff c8 85 bl c000000000017340 <.prevent_user_access.constprop.0> c00000000001ab38: 4b ff c8 09 bl c000000000017340 <.prevent_user_access.constprop.0> c00000000001ade0: 4b ff c5 61 bl c000000000017340 <.prevent_user_access.constprop.0> c000000000039b90 <.prevent_user_access.constprop.0>: c00000000003ac08: 4b ff ef 89 bl c000000000039b90 <.prevent_user_access.constprop.0> c00000000003b9d0: 4b ff e1 c1 bl c000000000039b90 <.prevent_user_access.constprop.0> c00000000003ba54: 4b ff e1 3d bl c000000000039b90 <.prevent_user_access.constprop.0> c00000000003bbfc: 4b ff df 95 bl c000000000039b90 <.prevent_user_access.constprop.0> c00000000015dde0 <.prevent_user_access.constprop.0>: c0000000001612c0: 4b ff cb 21 bl c00000000015dde0 <.prevent_user_access.constprop.0> c000000000161b54: 4b ff c2 8d bl c00000000015dde0 <.prevent_user_access.constprop.0> c000000000188cf0 <.prevent_user_access.constprop.0>: c00000000018d658: 4b ff b6 99 bl c000000000188cf0 <.prevent_user_access.constprop.0> c00000000030fe20 <.prevent_user_access.constprop.0>: c0000000003123d4: 4b ff da 4d bl c00000000030fe20 <.prevent_user_access.constprop.0> c000000000313970: 4b ff c4 b1 bl c00000000030fe20 <.prevent_user_access.constprop.0> c0000000005e6bd0 <.prevent_user_access.constprop.0>: c0000000005e7d8c: 4b ff ee 45 bl c0000000005e6bd0 <.prevent_user_access.constprop.0> c0000000007bcae0 <.prevent_user_access.constprop.0>: c0000000007bda10: 4b ff f0 d1 bl c0000000007bcae0 <.prevent_user_access.constprop.0> c0000000007bda54: 4b ff f0 8d bl c0000000007bcae0 <.prevent_user_access.constprop.0> c0000000007bdd28: 4b ff ed b9 bl c0000000007bcae0 <.prevent_user_access.constprop.0> c0000000007c0390: 4b ff c7 51 bl c0000000007bcae0 <.prevent_user_access.constprop.0> c00000000094e4f0 <.prevent_user_access.constprop.0>: c000000000950e40: 4b ff d6 b1 bl c00000000094e4f0 <.prevent_user_access.constprop.0> c00000000097d2d0 <.prevent_user_access.constprop.0>: c0000000009813fc: 4b ff be d5 bl c00000000097d2d0 <.prevent_user_access.constprop.0> c000000000acd540 <.prevent_user_access.constprop.0>: c000000000ad1d60: 4b ff b7 e1 bl c000000000acd540 <.prevent_user_access.constprop.0> c000000000e5d680 <.prevent_user_access.constprop.0>: c000000000e64b60: 4b ff 8b 21 bl c000000000e5d680 <.prevent_user_access.constprop.0> c000000000e64b6c: 4b ff 8b 15 bl c000000000e5d680 <.prevent_user_access.constprop.0> c000000000e64c38: 4b ff 8a 49 bl c000000000e5d680 <.prevent_user_access.constprop.0> When building signal_64.c with -Winline the following messages appear: ./arch/powerpc/include/asm/book3s/64/kup.h:331:20: error: inlining failed in call to 'set_kuap': call is unlikely and code size would grow [-Werror=inline] ./arch/powerpc/include/asm/book3s/64/kup.h:401:20: error: inlining failed in call to 'prevent_user_access.constprop': call is unlikely and code size would grow [-Werror=inline] Those functions are used on hot pathes and have been expected to be inlined at all time. Force them inline. This patch reduces the kernel text size by 700 bytes, confirming that not inlining those functions is not worth it. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/eff9b2b211957fa2e8707e46f31674097fd563a3.1644588972.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/64/kup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 69fcf63eec94..54cf46808157 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -328,7 +328,7 @@ static inline unsigned long get_kuap(void) return mfspr(SPRN_AMR); } -static inline void set_kuap(unsigned long value) +static __always_inline void set_kuap(unsigned long value) { if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) return; @@ -398,7 +398,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user #endif /* !CONFIG_PPC_KUAP */ -static inline void prevent_user_access(unsigned long dir) +static __always_inline void prevent_user_access(unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); if (static_branch_unlikely(&uaccess_flush_key)) From 9f5196065eeb96fee1a15f2eae31fe1fc7623ade Mon Sep 17 00:00:00 2001 From: jing yangyang <cgel.zte@gmail.com> Date: Thu, 19 Aug 2021 19:49:01 -0700 Subject: [PATCH 172/179] powerpc/ps3: remove unneeded semicolons Eliminate the following coccicheck warnings: ./arch/powerpc/platforms/ps3/system-bus.c:606:2-3: Unneeded semicolon ./arch/powerpc/platforms/ps3/system-bus.c:765:2-3: Unneeded semicolon Reported-by: Zeal Robot <zealci@zte.com.cn> Signed-off-by: jing yangyang <jing.yangyang@zte.com.cn> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/01647102607ce9640c9f27786d976109a3c4ea7e.1629197153.git.jing.yangyang@zte.com.cn --- arch/powerpc/platforms/ps3/system-bus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index c8b50fec56bf..b637bf292047 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -603,7 +603,7 @@ static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page, default: /* not happned */ BUG(); - }; + } result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size, &bus_addr, iopte_flag); @@ -762,7 +762,7 @@ int ps3_system_bus_device_register(struct ps3_system_bus_device *dev) break; default: BUG(); - }; + } dev->core.of_node = NULL; set_dev_node(&dev->core, 0); From 3fd46e551f67f4303c3276a0d6cd20baf2d192c4 Mon Sep 17 00:00:00 2001 From: Hangyu Hua <hbh25y@gmail.com> Date: Wed, 23 Feb 2022 15:02:23 +0800 Subject: [PATCH 173/179] powerpc: 8xx: fix a return value error in mpc8xx_pic_init mpc8xx_pic_init() should return -ENOMEM instead of 0 when irq_domain_add_linear() return NULL. This cause mpc8xx_pics_init to continue executing even if mpc8xx_pic_host is NULL. Fixes: cc76404feaed ("powerpc/8xx: Fix possible device node reference leak") Signed-off-by: Hangyu Hua <hbh25y@gmail.com> Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220223070223.26845-1-hbh25y@gmail.com --- arch/powerpc/platforms/8xx/pic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index f2ba837249d6..04a6abf14c29 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,6 +153,7 @@ int __init mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; + goto out; } ret = 0; From d799769188529abc6cbf035a10087a51f7832b6b Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy <aik@ozlabs.ru> Date: Wed, 9 Mar 2022 17:18:22 +1100 Subject: [PATCH 174/179] powerpc/64: Add UADDR64 relocation support When ld detects unaligned relocations, it emits R_PPC64_UADDR64 relocations instead of R_PPC64_RELATIVE. Currently R_PPC64_UADDR64 are detected by arch/powerpc/tools/relocs_check.sh and expected not to work. Below is a simple chunk to trigger this behaviour (this disables optimization for the demonstration purposes only, this also happens with -O1/-O2 when CONFIG_PRINTK_INDEX=y, for example): \#pragma GCC push_options \#pragma GCC optimize ("O0") struct entry { const char *file; int line; } __attribute__((packed)); static const struct entry e1 = { .file = __FILE__, .line = __LINE__ }; static const struct entry e2 = { .file = __FILE__, .line = __LINE__ }; ... prom_printf("e1=%s %lx %lx\n", e1.file, (unsigned long) e1.file, mfmsr()); prom_printf("e2=%s %lx\n", e2.file, (unsigned long) e2.file); \#pragma GCC pop_options This adds support for UADDR64 for 64bit. This reuses __dynamic_symtab from the 32bit code which supports more relocation types already. Because RELACOUNT includes only R_PPC64_RELATIVE, this replaces it with RELASZ which is the size of all relocation records. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220309061822.168173-1-aik@ozlabs.ru --- arch/powerpc/kernel/reloc_64.S | 67 +++++++++++++++++++++--------- arch/powerpc/kernel/vmlinux.lds.S | 2 - arch/powerpc/tools/relocs_check.sh | 7 +--- 3 files changed, 48 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index 02d4719bf43a..232e4549defe 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -8,8 +8,10 @@ #include <asm/ppc_asm.h> RELA = 7 -RELACOUNT = 0x6ffffff9 +RELASZ = 8 +RELAENT = 9 R_PPC64_RELATIVE = 22 +R_PPC64_UADDR64 = 43 /* * r3 = desired final address of kernel @@ -25,29 +27,38 @@ _GLOBAL(relocate) add r9,r9,r12 /* r9 has runtime addr of .rela.dyn section */ ld r10,(p_st - 0b)(r12) add r10,r10,r12 /* r10 has runtime addr of _stext */ + ld r13,(p_sym - 0b)(r12) + add r13,r13,r12 /* r13 has runtime addr of .dynsym */ /* - * Scan the dynamic section for the RELA and RELACOUNT entries. + * Scan the dynamic section for the RELA, RELASZ and RELAENT entries. */ li r7,0 li r8,0 -1: ld r6,0(r11) /* get tag */ +.Ltags: + ld r6,0(r11) /* get tag */ cmpdi r6,0 - beq 4f /* end of list */ + beq .Lend_of_list /* end of list */ cmpdi r6,RELA bne 2f ld r7,8(r11) /* get RELA pointer in r7 */ - b 3f -2: addis r6,r6,(-RELACOUNT)@ha - cmpdi r6,RELACOUNT@l + b 4f +2: cmpdi r6,RELASZ bne 3f - ld r8,8(r11) /* get RELACOUNT value in r8 */ -3: addi r11,r11,16 - b 1b -4: cmpdi r7,0 /* check we have both RELA and RELACOUNT */ + ld r8,8(r11) /* get RELASZ value in r8 */ + b 4f +3: cmpdi r6,RELAENT + bne 4f + ld r12,8(r11) /* get RELAENT value in r12 */ +4: addi r11,r11,16 + b .Ltags +.Lend_of_list: + cmpdi r7,0 /* check we have RELA, RELASZ, RELAENT */ cmpdi cr1,r8,0 - beq 6f - beq cr1,6f + beq .Lout + beq cr1,.Lout + cmpdi r12,0 + beq .Lout /* * Work out linktime address of _stext and hence the @@ -62,23 +73,39 @@ _GLOBAL(relocate) /* * Run through the list of relocations and process the - * R_PPC64_RELATIVE ones. + * R_PPC64_RELATIVE and R_PPC64_UADDR64 ones. */ + divd r8,r8,r12 /* RELASZ / RELAENT */ mtctr r8 -5: ld r0,8(9) /* ELF64_R_TYPE(reloc->r_info) */ +.Lrels: ld r0,8(r9) /* ELF64_R_TYPE(reloc->r_info) */ cmpdi r0,R_PPC64_RELATIVE - bne 6f + bne .Luaddr64 ld r6,0(r9) /* reloc->r_offset */ ld r0,16(r9) /* reloc->r_addend */ + b .Lstore +.Luaddr64: + srdi r14,r0,32 /* ELF64_R_SYM(reloc->r_info) */ + clrldi r0,r0,32 + cmpdi r0,R_PPC64_UADDR64 + bne .Lnext + ld r6,0(r9) + ld r0,16(r9) + mulli r14,r14,24 /* 24 == sizeof(elf64_sym) */ + add r14,r14,r13 /* elf64_sym[ELF64_R_SYM] */ + ld r14,8(r14) + add r0,r0,r14 +.Lstore: add r0,r0,r3 stdx r0,r7,r6 - addi r9,r9,24 - bdnz 5b - -6: blr +.Lnext: + add r9,r9,r12 + bdnz .Lrels +.Lout: + blr .balign 8 p_dyn: .8byte __dynamic_start - 0b p_rela: .8byte __rela_dyn_start - 0b +p_sym: .8byte __dynamic_symtab - 0b p_st: .8byte _stext - 0b diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 2bcca818136a..fe22d940412f 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -281,9 +281,7 @@ SECTIONS . = ALIGN(8); .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { -#ifdef CONFIG_PPC32 __dynamic_symtab = .; -#endif *(.dynsym) } .dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) } diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh index 014e00e74d2b..63792af00417 100755 --- a/arch/powerpc/tools/relocs_check.sh +++ b/arch/powerpc/tools/relocs_check.sh @@ -39,6 +39,7 @@ $objdump -R "$vmlinux" | # R_PPC_NONE grep -F -w -v 'R_PPC64_RELATIVE R_PPC64_NONE +R_PPC64_UADDR64 R_PPC_ADDR16_LO R_PPC_ADDR16_HI R_PPC_ADDR16_HA @@ -54,9 +55,3 @@ fi num_bad=$(echo "$bad_relocs" | wc -l) echo "WARNING: $num_bad bad relocations" echo "$bad_relocs" - -# If we see this type of relocation it's an idication that -# we /may/ be using an old version of binutils. -if echo "$bad_relocs" | grep -q -F -w R_PPC64_UADDR64; then - echo "WARNING: You need at least binutils >= 2.19 to build a CONFIG_RELOCATABLE kernel" -fi From d64e3eab75a8e1e900c0fda2410a2df8893d8f85 Mon Sep 17 00:00:00 2001 From: Randy Dunlap <rdunlap@infradead.org> Date: Sat, 12 Mar 2022 22:59:36 -0800 Subject: [PATCH 175/179] powerpc/xive: fix return value of __setup handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __setup() handlers should return 1 to obsolete_checksetup() in init/main.c to indicate that the boot option has been handled. A return of 0 causes the boot option/value to be listed as an Unknown kernel parameter and added to init's (limited) argument or environment strings. Also, error return codes don't mean anything to obsolete_checksetup() -- only non-zero (usually 1) or zero. So return 1 from xive_off() and xive_store_eoi_cmdline(). Fixes: 243e25112d06 ("powerpc/xive: Native exploitation of the XIVE interrupt controller") Fixes: c21ee04f11ae ("powerpc/xive: Add a kernel parameter for StoreEOI") [lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru] Reported-by: Igor Zhbanov <i.zhbanov@omprussia.ru>: Signed-off-by: Randy Dunlap <rdunlap@infradead.org> Reviewed-by: Cédric Le Goater <clg@kaod.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220313065936.4363-1-rdunlap@infradead.org --- arch/powerpc/sysdev/xive/common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 32863b4daf72..bb5bda6b2357 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1708,20 +1708,20 @@ __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift) static int __init xive_off(char *arg) { xive_cmdline_disabled = true; - return 0; + return 1; } __setup("xive=off", xive_off); static int __init xive_store_eoi_cmdline(char *arg) { if (!arg) - return -EINVAL; + return 1; if (strncmp(arg, "off", 3) == 0) { pr_info("StoreEOI disabled on kernel command line\n"); xive_store_eoi = false; } - return 0; + return 1; } __setup("xive.store-eoi=", xive_store_eoi_cmdline); From 9d71165d3934e607070c4e48458c0cf161b1baea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Fri, 11 Mar 2022 12:47:33 +1000 Subject: [PATCH 176/179] powerpc/tm: Fix more userspace r13 corruption Commit cf13435b730a ("powerpc/tm: Fix userspace r13 corruption") fixes a problem in treclaim where a SLB miss can occur on the thread_struct->ckpt_regs while SCRATCH0 is live with the saved user r13 value, clobbering it with the kernel r13 and ultimately resulting in kernel r13 being stored in ckpt_regs. There is an equivalent problem in trechkpt where the user r13 value is loaded into r13 from chkpt_regs to be recheckpointed, but a SLB miss could occur on ckpt_regs accesses after that, which will result in r13 being clobbered with a kernel value and that will get recheckpointed and then restored to user registers. The same memory page is accessed right before this critical window where a SLB miss could cause corruption, so hitting the bug requires the SLB entry be removed within a small window of instructions, which is possible if a SLB related MCE hits there. PAPR also permits the hypervisor to discard this SLB entry (because slb_shadow->persistent is only set to SLB_NUM_BOLTED) although it's not known whether any implementations would do this (KVM does not). So this is an extremely unlikely bug, only found by inspection. Fix this by also storing user r13 in a temporary location on the kernel stack and don't change the r13 register from kernel r13 until the RI=0 critical section that does not fault. The SCRATCH0 change is not strictly part of the fix, it's only used in the RI=0 section so it does not have the same problem as the previous SCRATCH0 bug. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Acked-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220311024733.48926-1-npiggin@gmail.com --- arch/powerpc/kernel/tm.S | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 3beecc32940b..5a0f023a26e9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -443,7 +443,8 @@ restore_gprs: REST_GPR(0, r7) /* GPR0 */ REST_GPRS(2, 4, r7) /* GPR2-4 */ - REST_GPRS(8, 31, r7) /* GPR8-31 */ + REST_GPRS(8, 12, r7) /* GPR8-12 */ + REST_GPRS(14, 31, r7) /* GPR14-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 @@ -479,18 +480,24 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them after we - * clear MSR RI. + * Store user r1 and r5 and r13 on the stack (in the unused save + * areas / compiler reserved areas), so that we can access them after + * we clear MSR RI. */ REST_GPR(5, r7) std r5, -8(r1) - ld r5, GPR1(r7) + ld r5, GPR13(r7) std r5, -16(r1) + ld r5, GPR1(r7) + std r5, -24(r1) REST_GPR(7, r7) - /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ + /* Stash the stack pointer away for use after recheckpoint */ + std r1, PACAR1(r13) + + /* Clear MSR RI since we are about to clobber r13. EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -501,9 +508,9 @@ restore_gprs: * until we turn MSR RI back on. */ - SET_SCRATCH0(r1) ld r5, -8(r1) - ld r1, -16(r1) + ld r13, -16(r1) + ld r1, -24(r1) /* Commit register state as checkpointed state: */ TRECHKPT @@ -519,9 +526,9 @@ restore_gprs: */ GET_PACA(r13) - GET_SCRATCH0(r1) + ld r1, PACAR1(r13) - /* R1 is restored, so we are recoverable again. EE is still off */ + /* R13, R1 is restored, so we are recoverable again. EE is still off */ li r4, MSR_RI mtmsrd r4, 1 From cf74ff52e352112be78c4c4c3637a37ec36a6608 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Tue, 25 Jan 2022 00:39:29 +1000 Subject: [PATCH 177/179] powerpc/time: Fix KVM host re-arming a timer beyond decrementer range If the next host timer is beyond decrementer range, timer_rearm_host_dec will leave decrementer not programmed. This will not cause a problem for the host it will just set the decrementer correctly when the decrementer interrupt hits, it seems safer not to leave the next host decrementer interrupt timing able to be influenced by a guest. This code is only used in the P9 KVM paths so it's unlikely to be hit practically unless large decrementer is force disabled in the host. Fixes: 25aa145856cd ("powerpc/time: add API for KVM to re-arm the host timer/decrementer") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220124143930.3923442-2-npiggin@gmail.com --- arch/powerpc/kernel/time.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 958e2929776f..0bb80f0bd0a6 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -582,8 +582,9 @@ void timer_rearm_host_dec(u64 now) local_paca->irq_happened |= PACA_IRQ_DEC; } else { now = *next_tb - now; - if (now <= decrementer_max) - set_dec_or_work(now); + if (now > decrementer_max) + now = decrementer_max; + set_dec_or_work(now); } } EXPORT_SYMBOL_GPL(timer_rearm_host_dec); From 35de589cb8793573ed56a915af9cb4b5f15ad7d7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Tue, 25 Jan 2022 00:39:30 +1000 Subject: [PATCH 178/179] powerpc/time: improve decrementer clockevent processing The stop/shutdown op should not use decrementer_set_next_event because that sets decrementers_next_tb to now + decrementer_max, which means a decrementer interrupt that occurs after that time will call the clockevent event handler unexpectedly. Set next_tb to ~0 here to prevent any clock event call. Init all clockevents to stopped. Then the decrementer clockevent device always has event_handler set and applicable because we know the clock event device was not stopped. So make this call unconditional to show that it is always called. next_tb need not be set to ~0 before the event handler is called because it will stop the clockevent device if there is no other timer. Finally, the timer broadcast interrupt should not modify next_tb because it is not involved with the local decrementer clockevent on this CPU. This doesn't fix a known bug, just tidies the code. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220124143930.3923442-3-npiggin@gmail.com --- arch/powerpc/kernel/time.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 0bb80f0bd0a6..f5cbfe5efd25 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -107,7 +107,12 @@ struct clock_event_device decrementer_clockevent = { }; EXPORT_SYMBOL(decrementer_clockevent); -DEFINE_PER_CPU(u64, decrementers_next_tb); +/* + * This always puts next_tb beyond now, so the clock event will never fire + * with the usual comparison, no need for a separate test for stopped. + */ +#define DEC_CLOCKEVENT_STOPPED ~0ULL +DEFINE_PER_CPU(u64, decrementers_next_tb) = DEC_CLOCKEVENT_STOPPED; EXPORT_SYMBOL_GPL(decrementers_next_tb); static DEFINE_PER_CPU(struct clock_event_device, decrementers); @@ -645,9 +650,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) now = get_tb(); if (now >= *next_tb) { - *next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); + evt->event_handler(evt); __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; @@ -666,9 +669,6 @@ EXPORT_SYMBOL(timer_interrupt); #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void timer_broadcast_interrupt(void) { - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - *next_tb = ~(u64)0; tick_receive_broadcast(); __this_cpu_inc(irq_stat.broadcast_irqs_event); } @@ -894,7 +894,9 @@ static int decrementer_set_next_event(unsigned long evt, static int decrementer_shutdown(struct clock_event_device *dev) { - decrementer_set_next_event(decrementer_max, dev); + __this_cpu_write(decrementers_next_tb, DEC_CLOCKEVENT_STOPPED); + set_dec_or_work(decrementer_max); + return 0; } From fe2640bd7a62f1f7c3f55fbda31084085075bc30 Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Fri, 18 Mar 2022 14:42:19 +1100 Subject: [PATCH 179/179] powerpc/pseries: Fix use after free in remove_phb_dynamic() In remove_phb_dynamic() we use &phb->io_resource, after we've called device_unregister(&host_bridge->dev). But the unregister may have freed phb, because pcibios_free_controller_deferred() is the release function for the host_bridge. If there are no outstanding references when we call device_unregister() then phb will be freed out from under us. This has gone mainly unnoticed, but with slub_debug and page_poison enabled it can lead to a crash: PID: 7574 TASK: c0000000d492cb80 CPU: 13 COMMAND: "drmgr" #0 [c0000000e4f075a0] crash_kexec at c00000000027d7dc #1 [c0000000e4f075d0] oops_end at c000000000029608 #2 [c0000000e4f07650] __bad_page_fault at c0000000000904b4 #3 [c0000000e4f076c0] do_bad_slb_fault at c00000000009a5a8 #4 [c0000000e4f076f0] data_access_slb_common_virt at c000000000008b30 Data SLB Access [380] exception frame: R0: c000000000167250 R1: c0000000e4f07a00 R2: c000000002a46100 R3: c000000002b39ce8 R4: 00000000000000c0 R5: 00000000000000a9 R6: 3894674d000000c0 R7: 0000000000000000 R8: 00000000000000ff R9: 0000000000000100 R10: 6b6b6b6b6b6b6b6b R11: 0000000000008000 R12: c00000000023da80 R13: c0000009ffd38b00 R14: 0000000000000000 R15: 000000011c87f0f0 R16: 0000000000000006 R17: 0000000000000003 R18: 0000000000000002 R19: 0000000000000004 R20: 0000000000000005 R21: 000000011c87ede8 R22: 000000011c87c5a8 R23: 000000011c87d3a0 R24: 0000000000000000 R25: 0000000000000001 R26: c0000000e4f07cc8 R27: c00000004d1cc400 R28: c0080000031d00e8 R29: c00000004d23d800 R30: c00000004d1d2400 R31: c00000004d1d2540 NIP: c000000000167258 MSR: 8000000000009033 OR3: c000000000e9f474 CTR: 0000000000000000 LR: c000000000167250 XER: 0000000020040003 CCR: 0000000024088420 MQ: 0000000000000000 DAR: 6b6b6b6b6b6b6ba3 DSISR: c0000000e4f07920 Syscall Result: fffffffffffffff2 [NIP : release_resource+56] [LR : release_resource+48] #5 [c0000000e4f07a00] release_resource at c000000000167258 (unreliable) #6 [c0000000e4f07a30] remove_phb_dynamic at c000000000105648 #7 [c0000000e4f07ab0] dlpar_remove_slot at c0080000031a09e8 [rpadlpar_io] #8 [c0000000e4f07b50] remove_slot_store at c0080000031a0b9c [rpadlpar_io] #9 [c0000000e4f07be0] kobj_attr_store at c000000000817d8c #10 [c0000000e4f07c00] sysfs_kf_write at c00000000063e504 #11 [c0000000e4f07c20] kernfs_fop_write_iter at c00000000063d868 #12 [c0000000e4f07c70] new_sync_write at c00000000054339c #13 [c0000000e4f07d10] vfs_write at c000000000546624 #14 [c0000000e4f07d60] ksys_write at c0000000005469f4 #15 [c0000000e4f07db0] system_call_exception at c000000000030840 #16 [c0000000e4f07e10] system_call_vectored_common at c00000000000c168 To avoid it, we can take a reference to the host_bridge->dev until we're done using phb. Then when we drop the reference the phb will be freed. Fixes: 2dd9c11b9d4d ("powerpc/pseries: use pci_host_bridge.release_fn() to kfree(phb)") Reported-by: David Dai <zdai@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Tested-by: Sachin Sant <sachinp@linux.ibm.com> Link: https://lore.kernel.org/r/20220318034219.1188008-1-mpe@ellerman.id.au --- arch/powerpc/platforms/pseries/pci_dlpar.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 90c9d3531694..4ba824568119 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -78,6 +78,9 @@ int remove_phb_dynamic(struct pci_controller *phb) pseries_msi_free_domains(phb); + /* Keep a reference so phb isn't freed yet */ + get_device(&host_bridge->dev); + /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); @@ -101,6 +104,7 @@ int remove_phb_dynamic(struct pci_controller *phb) * the pcibios_free_controller_deferred() callback; * see pseries_root_bridge_prepare(). */ + put_device(&host_bridge->dev); return 0; }