From edce10ee21f3916f5da34e55bbc03103c604ba70 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Wed, 8 Dec 2021 14:07:40 +0100 Subject: [PATCH 1/7] s390/kexec_file: print some more error messages Be kind and give some more information on what went wrong. Signed-off-by: Philipp Rudo Link: https://lore.kernel.org/r/20211208130741.5821-2-prudo@redhat.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/machine_kexec_file.c | 29 +++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 9975ad200d74..a8bfa7c8cbba 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -7,6 +7,8 @@ * Author(s): Philipp Rudo */ +#define pr_fmt(fmt) "kexec: " fmt + #include #include #include @@ -290,9 +292,16 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab) { + const char *strtab, *name, *shstrtab; + const Elf_Shdr *sechdrs; Elf_Rela *relas; int i, r_type; + /* String & section header string table */ + sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; + strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; + shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; + relas = (void *)pi->ehdr + relsec->sh_offset; for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { @@ -304,15 +313,27 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, sym = (void *)pi->ehdr + symtab->sh_offset; sym += ELF64_R_SYM(relas[i].r_info); - if (sym->st_shndx == SHN_UNDEF) - return -ENOEXEC; + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; - if (sym->st_shndx == SHN_COMMON) + if (sym->st_shndx == SHN_UNDEF) { + pr_err("Undefined symbol: %s\n", name); return -ENOEXEC; + } + + if (sym->st_shndx == SHN_COMMON) { + pr_err("symbol '%s' in common section\n", name); + return -ENOEXEC; + } if (sym->st_shndx >= pi->ehdr->e_shnum && - sym->st_shndx != SHN_ABS) + sym->st_shndx != SHN_ABS) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); return -ENOEXEC; + } loc = pi->purgatory_buf; loc += section->sh_offset; From 41967a37b8eedfee15b81406a9f3015be90d3980 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Wed, 8 Dec 2021 14:07:41 +0100 Subject: [PATCH 2/7] s390/kexec_file: fix error handling when applying relocations arch_kexec_apply_relocations_add currently ignores all errors returned by arch_kexec_do_relocs. This means that every unknown relocation is silently skipped causing unpredictable behavior while the relocated code runs. Fix this by checking for errors and fail kexec_file_load if an unknown relocation type is encountered. The problem was found after gcc changed its behavior and used R_390_PLT32DBL relocations for brasl instruction and relied on ld to resolve the relocations in the final link in case direct calls are possible. As the purgatory code is only linked partially (option -r) ld didn't resolve the relocations leaving them for arch_kexec_do_relocs. But arch_kexec_do_relocs doesn't know how to handle R_390_PLT32DBL relocations so they were silently skipped. This ultimately caused an endless loop in the purgatory as the brasl instructions kept branching to itself. Fixes: 71406883fd35 ("s390/kexec_file: Add kexec_file_load system call") Reported-by: Tao Liu Signed-off-by: Philipp Rudo Link: https://lore.kernel.org/r/20211208130741.5821-3-prudo@redhat.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/machine_kexec_file.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index a8bfa7c8cbba..876cdd3c994e 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -296,6 +296,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *sechdrs; Elf_Rela *relas; int i, r_type; + int ret; /* String & section header string table */ sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; @@ -347,7 +348,11 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, addr = section->sh_addr + relas[i].r_offset; r_type = ELF64_R_TYPE(relas[i].r_info); - arch_kexec_do_relocs(r_type, loc, val, addr); + ret = arch_kexec_do_relocs(r_type, loc, val, addr); + if (ret) { + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } } return 0; } From ac8fc6af1ab62b2e5d57ddadc8bd4c9433c49a72 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 8 Dec 2021 16:15:03 +0100 Subject: [PATCH 3/7] s390/ftrace: remove preempt_disable()/preempt_enable() pair It looks like commit ce5e48036c9e76a2 ("ftrace: disable preemption when recursion locked") missed a spot in kprobe_ftrace_handler() in arch/s390/kernel/ftrace.c. Remove the superfluous preempt_disable/enable_notrace() there too. Fixes: ce5e48036c9e76a2 ("ftrace: disable preemption when recursion locked") Signed-off-by: Jerome Marchand Link: https://lore.kernel.org/r/20211208151503.1510381-1-jmarchan@redhat.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/ftrace.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 5510c7d10ddc..21d62d8b6b9a 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -290,7 +290,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -318,7 +317,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } __this_cpu_write(current_kprobe, NULL); out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); From abf0e8e4ef25478a4390115e6a953d589d1f9ffd Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Thu, 9 Dec 2021 08:38:17 +0100 Subject: [PATCH 4/7] s390/kexec: handle R_390_PLT32DBL rela in arch_kexec_apply_relocations_add() Starting with gcc 11.3, the C compiler will generate PLT-relative function calls even if they are local and do not require it. Later on during linking, the linker will replace all PLT-relative calls to local functions with PC-relative ones. Unfortunately, the purgatory code of kexec/kdump is not being linked as a regular executable or shared library would have been, and therefore, all PLT-relative addresses remain in the generated purgatory object code unresolved. This leads to the situation where the purgatory code is being executed during kdump with all PLT-relative addresses unresolved. And this results in endless loops within the purgatory code. Furthermore, the clang C compiler has always behaved like described above and this commit should fix kdump for kernels built with the latter. Because the purgatory code is no regular executable or shared library, contains only calls to local functions and has no PLT, all R_390_PLT32DBL relocation entries can be resolved just like a R_390_PC32DBL one. * https://refspecs.linuxfoundation.org/ELF/zSeries/lzsabi0_zSeries/x1633.html#AEN1699 Relocation entries of purgatory code generated with gcc 11.3 ------------------------------------------------------------ $ readelf -r linux/arch/s390/purgatory/purgatory.o Relocation section '.rela.text' at offset 0x370 contains 5 entries: Offset Info Type Sym. Value Sym. Name + Addend 00000000005c 000c00000013 R_390_PC32DBL 0000000000000000 purgatory_sha_regions + 2 00000000007a 000d00000014 R_390_PLT32DBL 0000000000000000 sha256_update + 2 00000000008c 000e00000014 R_390_PLT32DBL 0000000000000000 sha256_final + 2 000000000092 000800000013 R_390_PC32DBL 0000000000000000 .LC0 + 2 0000000000a0 000f00000014 R_390_PLT32DBL 0000000000000000 memcmp + 2 Relocation entries of purgatory code generated with gcc 11.2 ------------------------------------------------------------ $ readelf -r linux/arch/s390/purgatory/purgatory.o Relocation section '.rela.text' at offset 0x368 contains 5 entries: Offset Info Type Sym. Value Sym. Name + Addend 00000000005c 000c00000013 R_390_PC32DBL 0000000000000000 purgatory_sha_regions + 2 00000000007a 000d00000013 R_390_PC32DBL 0000000000000000 sha256_update + 2 00000000008c 000e00000013 R_390_PC32DBL 0000000000000000 sha256_final + 2 000000000092 000800000013 R_390_PC32DBL 0000000000000000 .LC0 + 2 0000000000a0 000f00000013 R_390_PC32DBL 0000000000000000 memcmp + 2 Signed-off-by: Alexander Egorenkov Reported-by: Tao Liu Suggested-by: Philipp Rudo Reviewed-by: Philipp Rudo Cc: Link: https://lore.kernel.org/r/20211209073817.82196-1-egorenar@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/machine_kexec_file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 876cdd3c994e..8f43575a4dd3 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -348,6 +348,10 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, addr = section->sh_addr + relas[i].r_offset; r_type = ELF64_R_TYPE(relas[i].r_info); + + if (r_type == R_390_PLT32DBL) + r_type = R_390_PC32DBL; + ret = arch_kexec_do_relocs(r_type, loc, val, addr); if (ret) { pr_err("Unknown rela relocation: %d\n", r_type); From 5dcf0c3084eb098bbb702f2f5ee55666047997d4 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 9 Dec 2021 15:21:06 +0100 Subject: [PATCH 5/7] s390: enable switchdev support in defconfig The HiperSockets Converged Interface (HSCI) introduced with commit 4e20e73e631a ("s390/qeth: Switchdev event handler") requires CONFIG_SWITCHDEV=y to be usable. Similarly when using Linux controlled SR-IOV capable PF devices with the mlx5_core driver CONFIG_SWITCHDEV=y as well as CONFIG_MLX5_ESWITCH=y are necessary to actually get link on the created VFs. So let's add these to the defconfig to make both types of devices usable. Note also that these options are already enabled in most current distribution kernels. Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 2 ++ arch/s390/configs/defconfig | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index b626bc6e0eaf..e45cc27716de 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -117,6 +117,7 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -511,6 +512,7 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y +CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 0056cab27372..1c750bfca2d8 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -109,6 +109,7 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -502,6 +503,7 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y +CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set From c9b12b59e2ea4c3c7cedec7efb071b649652f3a9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 6 Dec 2021 11:50:16 +0100 Subject: [PATCH 6/7] s390/entry: fix duplicate tracking of irq nesting level In the current code, when exiting from idle, rcu_irq_enter() is called twice during irq entry: irq_entry_enter()-> rcu_irq_enter() irq_enter() -> rcu_irq_enter() This may lead to wrong results from rcu_is_cpu_rrupt_from_idle() because of a wrong dynticks nmi nesting count. Fix this by only calling irq_enter_rcu(). Cc: # 5.12+ Reported-by: Mark Rutland Fixes: 56e62a737028 ("s390: convert to generic entry") Signed-off-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/kernel/irq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 0df83ecaa2e0..cb7099682340 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -138,7 +138,7 @@ void noinstr do_io_irq(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); int from_idle; - irq_enter(); + irq_enter_rcu(); if (user_mode(regs)) { update_timer_sys(); @@ -158,7 +158,8 @@ void noinstr do_io_irq(struct pt_regs *regs) do_irq_async(regs, IO_INTERRUPT); } while (MACHINE_IS_LPAR && irq_pending(regs)); - irq_exit(); + irq_exit_rcu(); + set_irq_regs(old_regs); irqentry_exit(regs, state); @@ -172,7 +173,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); int from_idle; - irq_enter(); + irq_enter_rcu(); if (user_mode(regs)) { update_timer_sys(); @@ -190,7 +191,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) do_irq_async(regs, EXT_INTERRUPT); - irq_exit(); + irq_exit_rcu(); set_irq_regs(old_regs); irqentry_exit(regs, state); From 85bf17b28f97ca2749968d8786dc423db320d9c2 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 10 Dec 2021 10:38:27 +0100 Subject: [PATCH 7/7] recordmcount.pl: look for jgnop instruction as well as bcrl on s390 On s390, recordmcount.pl is looking for "bcrl 0," instructions in the objdump -d outpout. However since binutils 2.37, objdump -d display "jgnop " for the same instruction. Update the mcount_regex so that it accepts both. Signed-off-by: Jerome Marchand Reviewed-by: Miroslav Benes Acked-by: Steven Rostedt (VMware) Cc: Link: https://lore.kernel.org/r/20211210093827.1623286-1-jmarchan@redhat.com Signed-off-by: Heiko Carstens --- scripts/recordmcount.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 7d631aaa0ae1..52a000b057a5 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -219,7 +219,7 @@ if ($arch eq "x86_64") { } elsif ($arch eq "s390" && $bits == 64) { if ($cc =~ /-DCC_USING_HOTPATCH/) { - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*brcl\\s*0,[0-9a-f]+ <([^\+]*)>\$"; + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(bcrl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$"; $mcount_adjust = 0; } $alignment = 8;