From e8fbc0d9cab6c1ee6403f42c0991b0c1d5dbc092 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 9 Oct 2024 18:04:40 +0200 Subject: [PATCH 1/6] x86/pvh: Call C code via the kernel virtual mapping Calling C code via a different mapping than it was linked at is problematic, because the compiler assumes that RIP-relative and absolute symbol references are interchangeable. GCC in particular may use RIP-relative per-CPU variable references even when not using -fpic. So call xen_prepare_pvh() via its kernel virtual mapping on x86_64, so that those RIP-relative references produce the correct values. This matches the pre-existing behavior for i386, which also invokes xen_prepare_pvh() via the kernel virtual mapping before invoking startup_32 with paging disabled again. Fixes: 7243b93345f7 ("xen/pvh: Bootstrap PVH guest") Tested-by: Jason Andryuk Reviewed-by: Jason Andryuk Signed-off-by: Ard Biesheuvel Message-ID: <20241009160438.3884381-8-ardb+git@google.com> Signed-off-by: Juergen Gross --- arch/x86/platform/pvh/head.S | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 64fca49cd88f..ce4fd8d33da4 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -172,7 +172,14 @@ SYM_CODE_START_LOCAL(pvh_start_xen) movq %rbp, %rbx subq $_pa(pvh_start_xen), %rbx movq %rbx, phys_base(%rip) - call xen_prepare_pvh + + /* Call xen_prepare_pvh() via the kernel virtual mapping */ + leaq xen_prepare_pvh(%rip), %rax + subq phys_base(%rip), %rax + addq $__START_KERNEL_map, %rax + ANNOTATE_RETPOLINE_SAFE + call *%rax + /* * Clear phys_base. __startup_64 will *add* to its value, * so reset to 0. From bb12f48cd150666b96858443e0fc26e92bb7e498 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 9 Oct 2024 18:04:41 +0200 Subject: [PATCH 2/6] x86/pvh: Use correct size value in GDT descriptor The limit field in a GDT descriptor is an inclusive bound, and therefore one less than the size of the covered range. Reviewed-by: Jason Andryuk Tested-by: Jason Andryuk Signed-off-by: Ard Biesheuvel Message-ID: <20241009160438.3884381-9-ardb+git@google.com> Signed-off-by: Juergen Gross --- arch/x86/platform/pvh/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index ce4fd8d33da4..5a196fb3ebd8 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -224,7 +224,7 @@ SYM_CODE_END(pvh_start_xen) .section ".init.data","aw" .balign 8 SYM_DATA_START_LOCAL(gdt) - .word gdt_end - gdt_start + .word gdt_end - gdt_start - 1 .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */ .word 0 SYM_DATA_END(gdt) From d5835423046c504d2b3c32cb9284d4465a7f28b1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 9 Oct 2024 18:04:42 +0200 Subject: [PATCH 3/6] x86/pvh: Omit needless clearing of phys_base Since commit d9ec1158056b ("x86/boot/64: Use RIP_REL_REF() to assign 'phys_base'") phys_base is assigned directly rather than added to, so it is no longer necessary to clear it after use. Reviewed-by: Jason Andryuk Tested-by: Jason Andryuk Signed-off-by: Ard Biesheuvel Message-ID: <20241009160438.3884381-10-ardb+git@google.com> Signed-off-by: Juergen Gross --- arch/x86/platform/pvh/head.S | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 5a196fb3ebd8..7ca51a4da217 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -180,13 +180,6 @@ SYM_CODE_START_LOCAL(pvh_start_xen) ANNOTATE_RETPOLINE_SAFE call *%rax - /* - * Clear phys_base. __startup_64 will *add* to its value, - * so reset to 0. - */ - xor %rbx, %rbx - movq %rbx, phys_base(%rip) - /* startup_64 expects boot_params in %rsi. */ lea pvh_bootparams(%rip), %rsi jmp startup_64 From 223abe96ac0d227b22d48ab447dd9384b7a6c9fa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 9 Oct 2024 18:04:43 +0200 Subject: [PATCH 4/6] x86/xen: Avoid relocatable quantities in Xen ELF notes Xen puts virtual and physical addresses into ELF notes that are treated by the linker as relocatable by default. Doing so is not only pointless, given that the ELF notes are only intended for consumption by Xen before the kernel boots. It is also a KASLR leak, given that the kernel's ELF notes are exposed via the world readable /sys/kernel/notes. So emit these constants in a way that prevents the linker from marking them as relocatable. This involves place-relative relocations (which subtract their own virtual address from the symbol value) and linker provided absolute symbols that add the address of the place to the desired value. Tested-by: Jason Andryuk Signed-off-by: Ard Biesheuvel Reviewed-by: Jason Andryuk Message-ID: <20241009160438.3884381-11-ardb+git@google.com> Signed-off-by: Juergen Gross --- arch/x86/kernel/vmlinux.lds.S | 19 +++++++++++++++++++ arch/x86/platform/pvh/head.S | 6 +++--- arch/x86/tools/relocs.c | 1 + arch/x86/xen/xen-head.S | 6 ++++-- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index b8c5741d2fb4..60e5ac9b64e6 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -528,3 +528,22 @@ INIT_PER_CPU(irq_stack_backing_store); #endif #endif /* CONFIG_X86_64 */ + +/* + * The symbols below are referenced using relative relocations in the + * respective ELF notes. This produces build time constants that the + * linker will never mark as relocatable. (Using just ABSOLUTE() is not + * sufficient for that). + */ +#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV +xen_elfnote_entry_value = + ABSOLUTE(xen_elfnote_entry) + ABSOLUTE(startup_xen); +#endif +xen_elfnote_hypercall_page_value = + ABSOLUTE(xen_elfnote_hypercall_page) + ABSOLUTE(hypercall_page); +#endif +#ifdef CONFIG_PVH +xen_elfnote_phys32_entry_value = + ABSOLUTE(xen_elfnote_phys32_entry) + ABSOLUTE(pvh_start_xen - LOAD_OFFSET); +#endif diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 7ca51a4da217..e6f39d77f0b4 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -52,7 +52,7 @@ #define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8) #define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8) -SYM_CODE_START_LOCAL(pvh_start_xen) +SYM_CODE_START(pvh_start_xen) UNWIND_HINT_END_OF_STACK cld @@ -300,5 +300,5 @@ SYM_DATA_END(pvh_level2_kernel_pgt) .long KERNEL_IMAGE_SIZE - 1) #endif - ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, - _ASM_PTR (pvh_start_xen - __START_KERNEL_map)) + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .global xen_elfnote_phys32_entry; + xen_elfnote_phys32_entry: _ASM_PTR xen_elfnote_phys32_entry_value - .) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index c101bed61940..3ede19ca8432 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -56,6 +56,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { [S_ABS] = "^(xen_irq_disable_direct_reloc$|" "xen_save_fl_direct_reloc$|" + "xen_elfnote_.+_offset$|" "VDSO|" "__kcfi_typeid_|" "__crc_)", diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 758bcd47b72d..7f6c69dbb816 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -94,7 +94,8 @@ SYM_CODE_END(xen_cpu_bringup_again) ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map) /* Map the p2m table to a 512GB-aligned user address. */ ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad (PUD_SIZE * PTRS_PER_PUD)) - ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .globl xen_elfnote_entry; + xen_elfnote_entry: _ASM_PTR xen_elfnote_entry_value - .) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables") ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, @@ -115,7 +116,8 @@ SYM_CODE_END(xen_cpu_bringup_again) #else # define FEATURES_DOM0 0 #endif - ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .globl xen_elfnote_hypercall_page; + xen_elfnote_hypercall_page: _ASM_PTR xen_elfnote_hypercall_page_value - .) ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long FEATURES_PV | FEATURES_PVH | FEATURES_DOM0) ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") From 5c6808d1a9dd83853ff39684aeb812be39e108e8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 9 Oct 2024 18:04:44 +0200 Subject: [PATCH 5/6] x86/pvh: Avoid absolute symbol references in .head.text The .head.text section contains code that may execute from a different address than it was linked at. This is fragile, given that the x86 ABI can refer to global symbols via absolute or relative references, and the toolchain assumes that these are interchangeable, which they are not in this particular case. For this reason, all absolute symbol references are being removed from code that is emitted into .head.text. Subsequently, build time validation may be added that ensures that no absolute ELF relocations exist at all in that ELF section. In the case of the PVH code, the absolute references are in 32-bit code, which gets emitted with R_X86_64_32 relocations, and these are even more problematic going forward, as it prevents running the linker in PIE mode. So update the 64-bit code to avoid _pa(), and to only rely on relative symbol references: these are always 32-bits wide, even in 64-bit code, and are resolved by the linker at build time. Reviewed-by: Jason Andryuk Tested-by: Jason Andryuk Signed-off-by: Ard Biesheuvel Message-ID: <20241009160438.3884381-12-ardb+git@google.com> Signed-off-by: Juergen Gross --- arch/x86/platform/pvh/head.S | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index e6f39d77f0b4..4733a5f467b8 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -6,7 +6,9 @@ .code32 .text +#ifdef CONFIG_X86_32 #define _pa(x) ((x) - __START_KERNEL_map) +#endif #define rva(x) ((x) - pvh_start_xen) #include @@ -72,8 +74,7 @@ SYM_CODE_START(pvh_start_xen) movl $0, %esp leal rva(gdt)(%ebp), %eax - leal rva(gdt_start)(%ebp), %ecx - movl %ecx, 2(%eax) + addl %eax, 2(%eax) lgdt (%eax) mov $PVH_DS_SEL,%eax @@ -103,10 +104,23 @@ SYM_CODE_START(pvh_start_xen) btsl $_EFER_LME, %eax wrmsr + /* + * Reuse the non-relocatable symbol emitted for the ELF note to + * subtract the build time physical address of pvh_start_xen() from + * its actual runtime address, without relying on absolute 32-bit ELF + * relocations, as these are not supported by the linker when running + * in -pie mode, and should be avoided in .head.text in general. + */ mov %ebp, %ebx - subl $_pa(pvh_start_xen), %ebx /* offset */ + subl rva(xen_elfnote_phys32_entry)(%ebp), %ebx jz .Lpagetable_done + /* + * Store the resulting load offset in phys_base. __pa() needs + * phys_base set to calculate the hypercall page in xen_pvh_init(). + */ + movl %ebx, rva(phys_base)(%ebp) + /* Fixup page-tables for relocation. */ leal rva(pvh_init_top_pgt)(%ebp), %edi movl $PTRS_PER_PGD, %ecx @@ -165,14 +179,6 @@ SYM_CODE_START(pvh_start_xen) xor %edx, %edx wrmsr - /* - * Calculate load offset and store in phys_base. __pa() needs - * phys_base set to calculate the hypercall page in xen_pvh_init(). - */ - movq %rbp, %rbx - subq $_pa(pvh_start_xen), %rbx - movq %rbx, phys_base(%rip) - /* Call xen_prepare_pvh() via the kernel virtual mapping */ leaq xen_prepare_pvh(%rip), %rax subq phys_base(%rip), %rax @@ -218,7 +224,7 @@ SYM_CODE_END(pvh_start_xen) .balign 8 SYM_DATA_START_LOCAL(gdt) .word gdt_end - gdt_start - 1 - .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */ + .long gdt_start - gdt .word 0 SYM_DATA_END(gdt) SYM_DATA_START_LOCAL(gdt_start) From afc545da381ba0c651b2658966ac737032676f01 Mon Sep 17 00:00:00 2001 From: Qiu-ji Chen Date: Tue, 5 Nov 2024 21:09:19 +0800 Subject: [PATCH 6/6] xen: Fix the issue of resource not being properly released in xenbus_dev_probe() This patch fixes an issue in the function xenbus_dev_probe(). In the xenbus_dev_probe() function, within the if (err) branch at line 313, the program incorrectly returns err directly without releasing the resources allocated by err = drv->probe(dev, id). As the return value is non-zero, the upper layers assume the processing logic has failed. However, the probe operation was performed earlier without a corresponding remove operation. Since the probe actually allocates resources, failing to perform the remove operation could lead to problems. To fix this issue, we followed the resource release logic of the xenbus_dev_remove() function by adding a new block fail_remove before the fail_put block. After entering the branch if (err) at line 313, the function will use a goto statement to jump to the fail_remove block, ensuring that the previously acquired resources are correctly released, thus preventing the reference count leak. This bug was identified by an experimental static analysis tool developed by our team. The tool specializes in analyzing reference count operations and detecting potential issues where resources are not properly managed. In this case, the tool flagged the missing release operation as a potential problem, which led to the development of this patch. Fixes: 4bac07c993d0 ("xen: add the Xenbus sysfs and virtual device hotplug driver") Cc: stable@vger.kernel.org Signed-off-by: Qiu-ji Chen Reviewed-by: Juergen Gross Message-ID: <20241105130919.4621-1-chenqiuji666@gmail.com> Signed-off-by: Juergen Gross --- drivers/xen/xenbus/xenbus_probe.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 9f097f1f4a4c..6d32ffb01136 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -313,7 +313,7 @@ int xenbus_dev_probe(struct device *_dev) if (err) { dev_warn(&dev->dev, "watch_otherend on %s failed.\n", dev->nodename); - return err; + goto fail_remove; } dev->spurious_threshold = 1; @@ -322,6 +322,12 @@ int xenbus_dev_probe(struct device *_dev) dev->nodename); return 0; +fail_remove: + if (drv->remove) { + down(&dev->reclaim_sem); + drv->remove(dev); + up(&dev->reclaim_sem); + } fail_put: module_put(drv->driver.owner); fail: