From f9005571701920551bcf54a500973fb61f2e1eda Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 31 Oct 2018 16:11:49 -0700 Subject: [PATCH 1/6] CONFIG_XEN_PV breaks xen_create_contiguous_region on ARM xen_create_contiguous_region has now only an implementation if CONFIG_XEN_PV is defined. However, on ARM we never set CONFIG_XEN_PV but we do have an implementation of xen_create_contiguous_region which is required for swiotlb-xen to work correctly (although it just sets *dma_handle). Cc: # 4.12 Fixes: 16624390816c ("xen: create xen_create/destroy_contiguous_region() stubs for PVHVM only builds") Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross CC: Jeff.Kubascik@dornerworks.com CC: Jarvis.Roach@dornerworks.com CC: Nathan.Studer@dornerworks.com CC: vkuznets@redhat.com CC: boris.ostrovsky@oracle.com CC: jgross@suse.com CC: julien.grall@arm.com Signed-off-by: Juergen Gross --- include/xen/xen-ops.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 18803ff76e27..4969817124a8 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -42,16 +42,12 @@ int xen_setup_shutdown_event(void); extern unsigned long *xen_contiguous_bitmap; -#ifdef CONFIG_XEN_PV +#if defined(CONFIG_XEN_PV) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle); void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); - -int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, - xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, - unsigned int domid, bool no_translate, struct page **pages); #else static inline int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, @@ -63,7 +59,13 @@ static inline int xen_create_contiguous_region(phys_addr_t pstart, static inline void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) { } +#endif +#if defined(CONFIG_XEN_PV) +int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, + xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, + unsigned int domid, bool no_translate, struct page **pages); +#else static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, unsigned int domid, From d9cccfa7c4d1d9ef967ec9308df7304a18609b30 Mon Sep 17 00:00:00 2001 From: Liam Merwick Date: Fri, 2 Nov 2018 14:04:23 +0000 Subject: [PATCH 2/6] xen/grant-table: Fix incorrect gnttab_dma_free_pages() pr_debug message If a call to xenmem_reservation_increase() in gnttab_dma_free_pages() fails it triggers a message "Failed to decrease reservation..." which should be "Failed to increase reservation..." Fixes: 9bdc7304f536 ('xen/grant-table: Allow allocating buffers suitable for DMA') Reported-by: Ross Philipson Signed-off-by: Liam Merwick Reviewed-by: Mark Kanda Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/grant-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 84575baceebc..97341fa75458 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -914,7 +914,7 @@ int gnttab_dma_free_pages(struct gnttab_dma_alloc_args *args) ret = xenmem_reservation_increase(args->nr_pages, args->frames); if (ret != args->nr_pages) { - pr_debug("Failed to decrease reservation for DMA buffer\n"); + pr_debug("Failed to increase reservation for DMA buffer\n"); ret = -EFAULT; } else { ret = 0; From 6cc4a0863c9709c512280c64e698d68443ac8053 Mon Sep 17 00:00:00 2001 From: Manjunath Patil Date: Tue, 30 Oct 2018 09:49:21 -0700 Subject: [PATCH 3/6] xen-blkfront: fix kernel panic with negotiate_mq error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit info->nr_rings isn't adjusted in case of ENOMEM error from negotiate_mq(). This leads to kernel panic in error path. Typical call stack involving panic - #8 page_fault at ffffffff8175936f [exception RIP: blkif_free_ring+33] RIP: ffffffffa0149491 RSP: ffff8804f7673c08 RFLAGS: 00010292 ... #9 blkif_free at ffffffffa0149aaa [xen_blkfront] #10 talk_to_blkback at ffffffffa014c8cd [xen_blkfront] #11 blkback_changed at ffffffffa014ea8b [xen_blkfront] #12 xenbus_otherend_changed at ffffffff81424670 #13 backend_changed at ffffffff81426dc3 #14 xenwatch_thread at ffffffff81422f29 #15 kthread at ffffffff810abe6a #16 ret_from_fork at ffffffff81754078 Cc: stable@vger.kernel.org Fixes: 7ed8ce1c5fc7 ("xen-blkfront: move negotiate_mq to cover all cases of new VBDs") Signed-off-by: Manjunath Patil Acked-by: Roger Pau Monné Signed-off-by: Juergen Gross --- drivers/block/xen-blkfront.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9eea83ae01c6..9ac3999a55b7 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1919,6 +1919,7 @@ static int negotiate_mq(struct blkfront_info *info) GFP_KERNEL); if (!info->rinfo) { xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure"); + info->nr_rings = 0; return -ENOMEM; } From 1457d8cf7664f34c4ba534c1073821a559a2f6f9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 7 Nov 2018 18:01:00 +0100 Subject: [PATCH 4/6] x86/xen: fix pv boot Commit 9da3f2b7405440 ("x86/fault: BUG() when uaccess helpers fault on kernel addresses") introduced a regression for booting Xen PV guests. Xen PV guests are using __put_user() and __get_user() for accessing the p2m map (physical to machine frame number map) as accesses might fail in case of not populated areas of the map. With above commit using __put_user() and __get_user() for accessing kernel pages is no longer valid. So replace the Xen hack by adding appropriate p2m access functions using the default fixup handler. Fixes: 9da3f2b7405440 ("x86/fault: BUG() when uaccess helpers fault on kernel addresses") Signed-off-by: Juergen Gross Reviewed-by: Andrew Cooper Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/page.h | 35 +++++++++++++++++++++++++++++---- arch/x86/xen/p2m.c | 3 +-- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 123e669bf363..790ce08e41f2 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include @@ -93,12 +93,39 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, */ static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val) { - return __put_user(val, (unsigned long __user *)addr); + int ret = 0; + + asm volatile("1: mov %[val], %[ptr]\n" + "2:\n" + ".section .fixup, \"ax\"\n" + "3: sub $1, %[ret]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [ret] "+r" (ret), [ptr] "=m" (*addr) + : [val] "r" (val)); + + return ret; } -static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val) +static inline int xen_safe_read_ulong(const unsigned long *addr, + unsigned long *val) { - return __get_user(*val, (unsigned long __user *)addr); + int ret = 0; + unsigned long rval = ~0ul; + + asm volatile("1: mov %[ptr], %[rval]\n" + "2:\n" + ".section .fixup, \"ax\"\n" + "3: sub $1, %[ret]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [ret] "+r" (ret), [rval] "+r" (rval) + : [ptr] "m" (*addr)); + *val = rval; + + return ret; } #ifdef CONFIG_XEN_PV diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index d6d74efd8912..4fe84436d5a7 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -656,8 +656,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) /* * The interface requires atomic updates on p2m elements. - * xen_safe_write_ulong() is using __put_user which does an atomic - * store via asm(). + * xen_safe_write_ulong() is using an atomic store via asm(). */ if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) return true; From d3132b3860f6cf35ff7609a76bbcdbb814bd027c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 8 Nov 2018 08:35:06 +0100 Subject: [PATCH 5/6] xen: fix xen_qlock_wait() Commit a856531951dc80 ("xen: make xen_qlock_wait() nestable") introduced a regression for Xen guests running fully virtualized (HVM or PVH mode). The Xen hypervisor wouldn't return from the poll hypercall with interrupts disabled in case of an interrupt (for PV guests it does). So instead of disabling interrupts in xen_qlock_wait() use a nesting counter to avoid calling xen_clear_irq_pending() in case xen_qlock_wait() is nested. Fixes: a856531951dc80 ("xen: make xen_qlock_wait() nestable") Cc: stable@vger.kernel.org Reported-by: Sander Eikelenboom Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Tested-by: Sander Eikelenboom Signed-off-by: Juergen Gross --- arch/x86/xen/spinlock.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 441c88262169..1c8a8816a402 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -21,6 +22,7 @@ static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(char *, irq_name); +static DEFINE_PER_CPU(atomic_t, xen_qlock_wait_nest); static bool xen_pvspin = true; static void xen_qlock_kick(int cpu) @@ -39,25 +41,25 @@ static void xen_qlock_kick(int cpu) */ static void xen_qlock_wait(u8 *byte, u8 val) { - unsigned long flags; int irq = __this_cpu_read(lock_kicker_irq); + atomic_t *nest_cnt = this_cpu_ptr(&xen_qlock_wait_nest); /* If kicker interrupts not initialized yet, just spin */ if (irq == -1 || in_nmi()) return; - /* Guard against reentry. */ - local_irq_save(flags); + /* Detect reentry. */ + atomic_inc(nest_cnt); - /* If irq pending already clear it. */ - if (xen_test_irq_pending(irq)) { + /* If irq pending already and no nested call clear it. */ + if (atomic_read(nest_cnt) == 1 && xen_test_irq_pending(irq)) { xen_clear_irq_pending(irq); } else if (READ_ONCE(*byte) == val) { /* Block until irq becomes pending (or a spurious wakeup) */ xen_poll_irq(irq); } - local_irq_restore(flags); + atomic_dec(nest_cnt); } static irqreturn_t dummy_handler(int irq, void *dev_id) From 3941552aec1e04d63999988a057ae09a1c56ebeb Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 1 Nov 2018 13:33:07 +0100 Subject: [PATCH 6/6] xen: remove size limit of privcmd-buf mapping interface Currently the size of hypercall buffers allocated via /dev/xen/hypercall is limited to a default of 64 memory pages. For live migration of guests this might be too small as the page dirty bitmask needs to be sized according to the size of the guest. This means migrating a 8GB sized guest is already exhausting the default buffer size for the dirty bitmap. There is no sensible way to set a sane limit, so just remove it completely. The device node's usage is limited to root anyway, so there is no additional DOS scenario added by allowing unlimited buffers. While at it make the error path for the -ENOMEM case a little bit cleaner by setting n_pages to the number of successfully allocated pages instead of the target size. Fixes: c51b3c639e01f2 ("xen: add new hypercall buffer mapping device") Cc: #4.18 Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/privcmd-buf.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/xen/privcmd-buf.c b/drivers/xen/privcmd-buf.c index df1ed37c3269..de01a6d0059d 100644 --- a/drivers/xen/privcmd-buf.c +++ b/drivers/xen/privcmd-buf.c @@ -21,15 +21,9 @@ MODULE_LICENSE("GPL"); -static unsigned int limit = 64; -module_param(limit, uint, 0644); -MODULE_PARM_DESC(limit, "Maximum number of pages that may be allocated by " - "the privcmd-buf device per open file"); - struct privcmd_buf_private { struct mutex lock; struct list_head list; - unsigned int allocated; }; struct privcmd_buf_vma_private { @@ -60,13 +54,10 @@ static void privcmd_buf_vmapriv_free(struct privcmd_buf_vma_private *vma_priv) { unsigned int i; - vma_priv->file_priv->allocated -= vma_priv->n_pages; - list_del(&vma_priv->list); for (i = 0; i < vma_priv->n_pages; i++) - if (vma_priv->pages[i]) - __free_page(vma_priv->pages[i]); + __free_page(vma_priv->pages[i]); kfree(vma_priv); } @@ -146,8 +137,7 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma) unsigned int i; int ret = 0; - if (!(vma->vm_flags & VM_SHARED) || count > limit || - file_priv->allocated + count > limit) + if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *), @@ -155,19 +145,15 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma) if (!vma_priv) return -ENOMEM; - vma_priv->n_pages = count; - count = 0; - for (i = 0; i < vma_priv->n_pages; i++) { + for (i = 0; i < count; i++) { vma_priv->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!vma_priv->pages[i]) break; - count++; + vma_priv->n_pages++; } mutex_lock(&file_priv->lock); - file_priv->allocated += count; - vma_priv->file_priv = file_priv; vma_priv->users = 1;