From b537bf429a682131f94df166e6daf39ec48fac03 Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Sun, 27 Feb 2022 19:15:03 -0800 Subject: [PATCH 1/9] xen: use time_is_before_eq_jiffies() instead of open coding it Use the helper function time_is_{before,after}_jiffies() to improve code readability. Signed-off-by: Wang Qing Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/1646018104-61415-1-git-send-email-wangqing@vivo.com Signed-off-by: Boris Ostrovsky --- drivers/xen/balloon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index a2c4fc49c483..dfe26fa17e95 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -794,7 +795,7 @@ static int __init balloon_wait_finish(void) if (balloon_state == BP_ECANCELED) { pr_warn_once("Initial ballooning failed, %ld pages need to be freed.\n", -credit); - if (jiffies - last_changed >= HZ * balloon_boot_timeout) + if (time_is_before_eq_jiffies(last_changed + HZ * balloon_boot_timeout)) panic("Initial ballooning failed!\n"); } From eed05744322da07dd7e419432dcedf3c2e017179 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Wed, 2 Mar 2022 08:40:32 -0800 Subject: [PATCH 2/9] xen: delay xen_hvm_init_time_ops() if kdump is boot on vcpu>=32 The sched_clock() can be used very early since commit 857baa87b642 ("sched/clock: Enable sched clock early"). In addition, with commit 38669ba205d1 ("x86/xen/time: Output xen sched_clock time from 0"), kdump kernel in Xen HVM guest may panic at very early stage when accessing &__this_cpu_read(xen_vcpu)->time as in below: setup_arch() -> init_hypervisor_platform() -> x86_init.hyper.init_platform = xen_hvm_guest_init() -> xen_hvm_init_time_ops() -> xen_clocksource_read() -> src = &__this_cpu_read(xen_vcpu)->time; This is because Xen HVM supports at most MAX_VIRT_CPUS=32 'vcpu_info' embedded inside 'shared_info' during early stage until xen_vcpu_setup() is used to allocate/relocate 'vcpu_info' for boot cpu at arbitrary address. However, when Xen HVM guest panic on vcpu >= 32, since xen_vcpu_info_reset(0) would set per_cpu(xen_vcpu, cpu) = NULL when vcpu >= 32, xen_clocksource_read() on vcpu >= 32 would panic. This patch calls xen_hvm_init_time_ops() again later in xen_hvm_smp_prepare_boot_cpu() after the 'vcpu_info' for boot vcpu is registered when the boot vcpu is >= 32. This issue can be reproduced on purpose via below command at the guest side when kdump/kexec is enabled: "taskset -c 33 echo c > /proc/sysrq-trigger" The bugfix for PVM is not implemented due to the lack of testing environment. [boris: xen_hvm_init_time_ops() returns on errors instead of jumping to end] Cc: Joe Jin Signed-off-by: Dongli Zhang Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20220302164032.14569-3-dongli.zhang@oracle.com Signed-off-by: Boris Ostrovsky --- arch/x86/xen/smp_hvm.c | 6 ++++++ arch/x86/xen/time.c | 24 +++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index 6ff3c887e0b9..b70afdff419c 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -19,6 +19,12 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) */ xen_vcpu_setup(0); + /* + * Called again in case the kernel boots on vcpu >= MAX_VIRT_CPUS. + * Refer to comments in xen_hvm_init_time_ops(). + */ + xen_hvm_init_time_ops(); + /* * The alternative logic (which patches the unlock/lock) runs before * the smp bootup up code is activated. Hence we need to set this up diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index d9c945ee1100..9ef0a5cca96e 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -558,6 +558,11 @@ static void xen_hvm_setup_cpu_clockevents(void) void __init xen_hvm_init_time_ops(void) { + static bool hvm_time_initialized; + + if (hvm_time_initialized) + return; + /* * vector callback is needed otherwise we cannot receive interrupts * on cpu > 0 and at this point we don't know how many cpus are @@ -567,7 +572,22 @@ void __init xen_hvm_init_time_ops(void) return; if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { - pr_info("Xen doesn't support pvclock on HVM, disable pv timer"); + pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer"); + return; + } + + /* + * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'. + * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest + * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access + * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic. + * + * The xen_hvm_init_time_ops() should be called again later after + * __this_cpu_read(xen_vcpu) is available. + */ + if (!__this_cpu_read(xen_vcpu)) { + pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n", + xen_vcpu_nr(0)); return; } @@ -577,6 +597,8 @@ void __init xen_hvm_init_time_ops(void) x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; x86_platform.set_wallclock = xen_set_wallclock; + + hvm_time_initialized = true; } #endif From b359b3a0296a8a9c853c7ee98f9728942d7f0f4e Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Mon, 7 Mar 2022 14:25:54 +0800 Subject: [PATCH 3/9] x86/xen: Fix kerneldoc warning Fix the following W=1 kernel warnings: arch/x86/xen/setup.c:725: warning: expecting prototype for machine_specific_memory_setup(). Prototype was for xen_memory_setup() instead. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20220307062554.8334-1-jiapeng.chong@linux.alibaba.com Signed-off-by: Boris Ostrovsky --- arch/x86/xen/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index af216feb63d9..81aa46f770c5 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -719,7 +719,7 @@ static void __init xen_reserve_xen_mfnlist(void) } /** - * machine_specific_memory_setup - Hook for machine specific memory setup. + * xen_memory_setup - Hook for machine specific memory setup. **/ char * __init xen_memory_setup(void) { From 982e4430beb94e65c482d36a11dbb779f20c38a7 Mon Sep 17 00:00:00 2001 From: zhanglianjie Date: Sat, 5 Mar 2022 21:38:23 +0800 Subject: [PATCH 4/9] drivers/xen: use helper macro __ATTR_RW Use helper macro __ATTR_RW to define HYPERVISOR_ATTR_RW to make code more clear. Minor readability improvement. Remove extra whitespace [boris: added this comment] Signed-off-by: zhanglianjie Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20220305133823.158961-1-zhanglianjie@uniontech.com Signed-off-by: Boris Ostrovsky --- drivers/xen/sys-hypervisor.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index feb1d16252e7..fcb0792f090e 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -22,11 +22,10 @@ #endif #define HYPERVISOR_ATTR_RO(_name) \ -static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name) +static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name) #define HYPERVISOR_ATTR_RW(_name) \ -static struct hyp_sysfs_attr _name##_attr = \ - __ATTR(_name, 0644, _name##_show, _name##_store) +static struct hyp_sysfs_attr _name##_attr = __ATTR_RW(_name) struct hyp_sysfs_attr { struct attribute attr; From b0f212633b31ddca99c76aa38f812fe492e8410a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 11 Mar 2022 11:34:28 +0100 Subject: [PATCH 5/9] xen/grant-table: remove gnttab_*transfer*() functions All grant table operations related to the "transfer" functionality are unused currently. There have been users in the old days of the "Xen-o-Linux" kernel, but those didn't make it upstream. So remove the "transfer" related functions. Signed-off-by: Juergen Gross Link: https://lore.kernel.org/r/20220311103429.12845-2-jgross@suse.com Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/grant-table.c | 113 +------------------------------------- include/xen/grant_table.h | 8 --- 2 files changed, 2 insertions(+), 119 deletions(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 5c83d41766c8..8963af8ec764 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -109,7 +109,7 @@ struct gnttab_ops { void (*unmap_frames)(void); /* * Introducing a valid entry into the grant table, granting the frame of - * this grant entry to domain for accessing or transfering. Ref + * this grant entry to domain for accessing. Ref * parameter is reference of this introduced grant entry, domid is id of * granted domain, frame is the page frame to be granted, and flags is * status of the grant entry to be updated. @@ -125,14 +125,6 @@ struct gnttab_ops { * access for this entry and return success(==1). */ int (*end_foreign_access_ref)(grant_ref_t ref, int readonly); - /* - * Stop granting a grant entry to domain for transfer. Ref parameter is - * reference of a grant entry whose grant transfer will be stopped. If - * tranfer has not started, just reclaim the grant entry and return - * failure(==0). Otherwise, wait for the transfer to complete and then - * return the frame. - */ - unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref); /* * Read the frame number related to a given grant reference. */ @@ -230,10 +222,7 @@ static void put_free_entry(grant_ref_t ref) * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2. * Introducing a valid entry into the grant table: * 1. Write ent->domid. - * 2. Write ent->frame: - * GTF_permit_access: Frame to which access is permitted. - * GTF_accept_transfer: Pseudo-phys frame slot being filled by new - * frame, or zero if none. + * 2. Write ent->frame: Frame to which access is permitted. * 3. Write memory barrier (WMB). * 4. Write ent->flags, inc. valid type. */ @@ -455,102 +444,6 @@ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, } EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); -int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) -{ - int ref; - - ref = get_free_entries(1); - if (unlikely(ref < 0)) - return -ENOSPC; - gnttab_grant_foreign_transfer_ref(ref, domid, pfn); - - return ref; -} -EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer); - -void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, - unsigned long pfn) -{ - gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer); -} -EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); - -static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref) -{ - unsigned long frame; - u16 flags; - u16 *pflags; - - pflags = &gnttab_shared.v1[ref].flags; - - /* - * If a transfer is not even yet started, try to reclaim the grant - * reference and return failure (== 0). - */ - while (!((flags = *pflags) & GTF_transfer_committed)) { - if (sync_cmpxchg(pflags, flags, 0) == flags) - return 0; - cpu_relax(); - } - - /* If a transfer is in progress then wait until it is completed. */ - while (!(flags & GTF_transfer_completed)) { - flags = *pflags; - cpu_relax(); - } - - rmb(); /* Read the frame number /after/ reading completion status. */ - frame = gnttab_shared.v1[ref].frame; - BUG_ON(frame == 0); - - return frame; -} - -static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref) -{ - unsigned long frame; - u16 flags; - u16 *pflags; - - pflags = &gnttab_shared.v2[ref].hdr.flags; - - /* - * If a transfer is not even yet started, try to reclaim the grant - * reference and return failure (== 0). - */ - while (!((flags = *pflags) & GTF_transfer_committed)) { - if (sync_cmpxchg(pflags, flags, 0) == flags) - return 0; - cpu_relax(); - } - - /* If a transfer is in progress then wait until it is completed. */ - while (!(flags & GTF_transfer_completed)) { - flags = *pflags; - cpu_relax(); - } - - rmb(); /* Read the frame number /after/ reading completion status. */ - frame = gnttab_shared.v2[ref].full_page.frame; - BUG_ON(frame == 0); - - return frame; -} - -unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) -{ - return gnttab_interface->end_foreign_transfer_ref(ref); -} -EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref); - -unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) -{ - unsigned long frame = gnttab_end_foreign_transfer_ref(ref); - put_free_entry(ref); - return frame; -} -EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer); - void gnttab_free_grant_reference(grant_ref_t ref) { put_free_entry(ref); @@ -1423,7 +1316,6 @@ static const struct gnttab_ops gnttab_v1_ops = { .unmap_frames = gnttab_unmap_frames_v1, .update_entry = gnttab_update_entry_v1, .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1, - .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1, .read_frame = gnttab_read_frame_v1, }; @@ -1435,7 +1327,6 @@ static const struct gnttab_ops gnttab_v2_ops = { .unmap_frames = gnttab_unmap_frames_v2, .update_entry = gnttab_update_entry_v2, .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2, - .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, .read_frame = gnttab_read_frame_v2, }; diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index c9fea9389ebe..9f9b1a297f0d 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -125,11 +125,6 @@ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, */ int gnttab_try_end_foreign_access(grant_ref_t ref); -int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); - -unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); -unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); - /* * operations on reserved batches of grant references */ @@ -162,9 +157,6 @@ static inline void gnttab_page_grant_foreign_access_ref_one( readonly); } -void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, - unsigned long pfn); - static inline void gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr, uint32_t flags, grant_ref_t ref, domid_t domid) From c94b731da21f10086a9e52d63c21c730e3f6c939 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 11 Mar 2022 11:34:29 +0100 Subject: [PATCH 6/9] xen/grant-table: remove readonly parameter from functions The gnttab_end_foreign_access() family of functions is taking a "readonly" parameter, which isn't used. Remove it from the function parameters. Signed-off-by: Juergen Gross Link: https://lore.kernel.org/r/20220311103429.12845-3-jgross@suse.com Reviewed-by: Jan Beulich Acked-by: Christian Schoenebeck Signed-off-by: Boris Ostrovsky --- drivers/block/xen-blkfront.c | 8 ++--- drivers/char/tpm/xen-tpmfront.c | 2 +- drivers/gpu/drm/xen/xen_drm_front_evtchnl.c | 2 +- drivers/input/misc/xen-kbdfront.c | 4 +-- drivers/net/xen-netfront.c | 13 ++++--- drivers/pci/xen-pcifront.c | 2 +- drivers/scsi/xen-scsifront.c | 4 +-- drivers/usb/host/xen-hcd.c | 4 +-- drivers/xen/gntalloc.c | 2 +- drivers/xen/gntdev-dmabuf.c | 2 +- drivers/xen/grant-table.c | 38 +++++++++------------ drivers/xen/pvcalls-front.c | 6 ++-- drivers/xen/xen-front-pgdir-shbuf.c | 3 +- include/xen/grant_table.h | 5 ++- net/9p/trans_xen.c | 8 ++--- sound/xen/xen_snd_front_evtchnl.c | 2 +- 16 files changed, 48 insertions(+), 57 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 03b5fb341e58..aa996b637d0b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1223,7 +1223,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) list_del(&persistent_gnt->node); if (persistent_gnt->gref != GRANT_INVALID_REF) { gnttab_end_foreign_access(persistent_gnt->gref, - 0, 0UL); + 0UL); rinfo->persistent_gnts_c--; } if (info->feature_persistent) @@ -1246,7 +1246,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) rinfo->shadow[i].req.u.rw.nr_segments; for (j = 0; j < segs; j++) { persistent_gnt = rinfo->shadow[i].grants_used[j]; - gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + gnttab_end_foreign_access(persistent_gnt->gref, 0UL); if (info->feature_persistent) __free_page(persistent_gnt->page); kfree(persistent_gnt); @@ -1261,7 +1261,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) for (j = 0; j < INDIRECT_GREFS(segs); j++) { persistent_gnt = rinfo->shadow[i].indirect_grants[j]; - gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + gnttab_end_foreign_access(persistent_gnt->gref, 0UL); __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1284,7 +1284,7 @@ free_shadow: /* Free resources associated with old device channel. */ for (i = 0; i < info->nr_ring_pages; i++) { if (rinfo->ring_ref[i] != GRANT_INVALID_REF) { - gnttab_end_foreign_access(rinfo->ring_ref[i], 0, 0); + gnttab_end_foreign_access(rinfo->ring_ref[i], 0); rinfo->ring_ref[i] = GRANT_INVALID_REF; } } diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index da5b30771418..ad0675f23e6e 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -332,7 +332,7 @@ static void ring_free(struct tpm_private *priv) return; if (priv->ring_ref) - gnttab_end_foreign_access(priv->ring_ref, 0, + gnttab_end_foreign_access(priv->ring_ref, (unsigned long)priv->shr); else free_page((unsigned long)priv->shr); diff --git a/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c index e10d95dddb99..08b526eeec16 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c +++ b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c @@ -148,7 +148,7 @@ static void evtchnl_free(struct xen_drm_front_info *front_info, /* end access and free the page */ if (evtchnl->gref != GRANT_INVALID_REF) - gnttab_end_foreign_access(evtchnl->gref, 0, page); + gnttab_end_foreign_access(evtchnl->gref, page); memset(evtchnl, 0, sizeof(*evtchnl)); } diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c index 3d17a0b3fe51..1fc9b3e7007f 100644 --- a/drivers/input/misc/xen-kbdfront.c +++ b/drivers/input/misc/xen-kbdfront.c @@ -481,7 +481,7 @@ static int xenkbd_connect_backend(struct xenbus_device *dev, error_evtchan: xenbus_free_evtchn(dev, evtchn); error_grant: - gnttab_end_foreign_access(info->gref, 0, 0UL); + gnttab_end_foreign_access(info->gref, 0UL); info->gref = -1; return ret; } @@ -492,7 +492,7 @@ static void xenkbd_disconnect_backend(struct xenkbd_info *info) unbind_from_irqhandler(info->irq, info); info->irq = -1; if (info->gref >= 0) - gnttab_end_foreign_access(info->gref, 0, 0UL); + gnttab_end_foreign_access(info->gref, 0UL); info->gref = -1; } diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index daa4e6106aac..e2b4a1893a13 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -425,7 +425,7 @@ static bool xennet_tx_buf_gc(struct netfront_queue *queue) skb = queue->tx_skbs[id]; queue->tx_skbs[id] = NULL; if (unlikely(!gnttab_end_foreign_access_ref( - queue->grant_tx_ref[id], GNTMAP_readonly))) { + queue->grant_tx_ref[id]))) { dev_alert(dev, "Grant still in use by backend domain\n"); goto err; @@ -1029,7 +1029,7 @@ static int xennet_get_responses(struct netfront_queue *queue, goto next; } - if (!gnttab_end_foreign_access_ref(ref, 0)) { + if (!gnttab_end_foreign_access_ref(ref)) { dev_alert(dev, "Grant still in use by backend domain\n"); queue->info->broken = true; @@ -1388,7 +1388,6 @@ static void xennet_release_tx_bufs(struct netfront_queue *queue) queue->tx_skbs[i] = NULL; get_page(queue->grant_tx_page[i]); gnttab_end_foreign_access(queue->grant_tx_ref[i], - GNTMAP_readonly, (unsigned long)page_address(queue->grant_tx_page[i])); queue->grant_tx_page[i] = NULL; queue->grant_tx_ref[i] = GRANT_INVALID_REF; @@ -1421,7 +1420,7 @@ static void xennet_release_rx_bufs(struct netfront_queue *queue) * foreign access is ended (which may be deferred). */ get_page(page); - gnttab_end_foreign_access(ref, 0, + gnttab_end_foreign_access(ref, (unsigned long)page_address(page)); queue->grant_rx_ref[id] = GRANT_INVALID_REF; @@ -1763,7 +1762,7 @@ static void xennet_end_access(int ref, void *page) { /* This frees the page as a side-effect */ if (ref != GRANT_INVALID_REF) - gnttab_end_foreign_access(ref, 0, (unsigned long)page); + gnttab_end_foreign_access(ref, (unsigned long)page); } static void xennet_disconnect_backend(struct netfront_info *info) @@ -1980,14 +1979,14 @@ static int setup_netfront(struct xenbus_device *dev, */ fail: if (queue->rx_ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(queue->rx_ring_ref, 0, + gnttab_end_foreign_access(queue->rx_ring_ref, (unsigned long)rxs); queue->rx_ring_ref = GRANT_INVALID_REF; } else { free_page((unsigned long)rxs); } if (queue->tx_ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(queue->tx_ring_ref, 0, + gnttab_end_foreign_access(queue->tx_ring_ref, (unsigned long)txs); queue->tx_ring_ref = GRANT_INVALID_REF; } else { diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index d2a7b9fd678b..3edc1565a27c 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -755,7 +755,7 @@ static void free_pdev(struct pcifront_device *pdev) xenbus_free_evtchn(pdev->xdev, pdev->evtchn); if (pdev->gnt_ref != INVALID_GRANT_REF) - gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */, + gnttab_end_foreign_access(pdev->gnt_ref, (unsigned long)pdev->sh_info); else free_page((unsigned long)pdev->sh_info); diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 7f421600cb66..12109e4c73d4 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -757,7 +757,7 @@ static int scsifront_alloc_ring(struct vscsifrnt_info *info) free_irq: unbind_from_irqhandler(info->irq, info); free_gnttab: - gnttab_end_foreign_access(info->ring_ref, 0, + gnttab_end_foreign_access(info->ring_ref, (unsigned long)info->ring.sring); return err; @@ -766,7 +766,7 @@ free_gnttab: static void scsifront_free_ring(struct vscsifrnt_info *info) { unbind_from_irqhandler(info->irq, info); - gnttab_end_foreign_access(info->ring_ref, 0, + gnttab_end_foreign_access(info->ring_ref, (unsigned long)info->ring.sring); } diff --git a/drivers/usb/host/xen-hcd.c b/drivers/usb/host/xen-hcd.c index 19b8c7ed74cb..5f4a00df4f1c 100644 --- a/drivers/usb/host/xen-hcd.c +++ b/drivers/usb/host/xen-hcd.c @@ -1075,14 +1075,14 @@ static void xenhcd_destroy_rings(struct xenhcd_info *info) info->irq = 0; if (info->urb_ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(info->urb_ring_ref, 0, + gnttab_end_foreign_access(info->urb_ring_ref, (unsigned long)info->urb_ring.sring); info->urb_ring_ref = GRANT_INVALID_REF; } info->urb_ring.sring = NULL; if (info->conn_ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(info->conn_ring_ref, 0, + gnttab_end_foreign_access(info->conn_ring_ref, (unsigned long)info->conn_ring.sring); info->conn_ring_ref = GRANT_INVALID_REF; } diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index edb0acd0b832..4849f94372a4 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -192,7 +192,7 @@ static void __del_gref(struct gntalloc_gref *gref) if (gref->gref_id) { if (gref->page) { addr = (unsigned long)page_to_virt(gref->page); - gnttab_end_foreign_access(gref->gref_id, 0, addr); + gnttab_end_foreign_access(gref->gref_id, addr); } else gnttab_free_grant_reference(gref->gref_id); } diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c index 12e380db7f55..d5bfd7b867fc 100644 --- a/drivers/xen/gntdev-dmabuf.c +++ b/drivers/xen/gntdev-dmabuf.c @@ -533,7 +533,7 @@ static void dmabuf_imp_end_foreign_access(u32 *refs, int count) for (i = 0; i < count; i++) if (refs[i] != GRANT_INVALID_REF) - gnttab_end_foreign_access(refs[i], 0, 0UL); + gnttab_end_foreign_access(refs[i], 0UL); } static void dmabuf_imp_free_storage(struct gntdev_dmabuf *gntdev_dmabuf) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 8963af8ec764..8ccccace2a4f 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -118,13 +118,12 @@ struct gnttab_ops { unsigned long frame, unsigned flags); /* * Stop granting a grant entry to domain for accessing. Ref parameter is - * reference of a grant entry whose grant access will be stopped, - * readonly is not in use in this function. If the grant entry is - * currently mapped for reading or writing, just return failure(==0) - * directly and don't tear down the grant access. Otherwise, stop grant - * access for this entry and return success(==1). + * reference of a grant entry whose grant access will be stopped. + * If the grant entry is currently mapped for reading or writing, just + * return failure(==0) directly and don't tear down the grant access. + * Otherwise, stop grant access for this entry and return success(==1). */ - int (*end_foreign_access_ref)(grant_ref_t ref, int readonly); + int (*end_foreign_access_ref)(grant_ref_t ref); /* * Read the frame number related to a given grant reference. */ @@ -270,7 +269,7 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); -static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) +static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref) { u16 flags, nflags; u16 *pflags; @@ -286,7 +285,7 @@ static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) return 1; } -static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly) +static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref) { gnttab_shared.v2[ref].hdr.flags = 0; mb(); /* Concurrent access by hypervisor. */ @@ -309,14 +308,14 @@ static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly) return 1; } -static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref) { - return gnttab_interface->end_foreign_access_ref(ref, readonly); + return gnttab_interface->end_foreign_access_ref(ref); } -int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +int gnttab_end_foreign_access_ref(grant_ref_t ref) { - if (_gnttab_end_foreign_access_ref(ref, readonly)) + if (_gnttab_end_foreign_access_ref(ref)) return 1; pr_warn("WARNING: g.e. %#x still in use!\n", ref); return 0; @@ -336,7 +335,6 @@ static unsigned long gnttab_read_frame_v2(grant_ref_t ref) struct deferred_entry { struct list_head list; grant_ref_t ref; - bool ro; uint16_t warn_delay; struct page *page; }; @@ -360,7 +358,7 @@ static void gnttab_handle_deferred(struct timer_list *unused) break; list_del(&entry->list); spin_unlock_irqrestore(&gnttab_list_lock, flags); - if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) { + if (_gnttab_end_foreign_access_ref(entry->ref)) { put_free_entry(entry->ref); pr_debug("freeing g.e. %#x (pfn %#lx)\n", entry->ref, page_to_pfn(entry->page)); @@ -386,8 +384,7 @@ static void gnttab_handle_deferred(struct timer_list *unused) spin_unlock_irqrestore(&gnttab_list_lock, flags); } -static void gnttab_add_deferred(grant_ref_t ref, bool readonly, - struct page *page) +static void gnttab_add_deferred(grant_ref_t ref, struct page *page) { struct deferred_entry *entry; gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL; @@ -405,7 +402,6 @@ static void gnttab_add_deferred(grant_ref_t ref, bool readonly, unsigned long flags; entry->ref = ref; - entry->ro = readonly; entry->page = page; entry->warn_delay = 60; spin_lock_irqsave(&gnttab_list_lock, flags); @@ -423,7 +419,7 @@ static void gnttab_add_deferred(grant_ref_t ref, bool readonly, int gnttab_try_end_foreign_access(grant_ref_t ref) { - int ret = _gnttab_end_foreign_access_ref(ref, 0); + int ret = _gnttab_end_foreign_access_ref(ref); if (ret) put_free_entry(ref); @@ -432,15 +428,13 @@ int gnttab_try_end_foreign_access(grant_ref_t ref) } EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access); -void gnttab_end_foreign_access(grant_ref_t ref, int readonly, - unsigned long page) +void gnttab_end_foreign_access(grant_ref_t ref, unsigned long page) { if (gnttab_try_end_foreign_access(ref)) { if (page != 0) put_page(virt_to_page(page)); } else - gnttab_add_deferred(ref, readonly, - page ? virt_to_page(page) : NULL); + gnttab_add_deferred(ref, page ? virt_to_page(page) : NULL); } EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 0ca351f30a6d..e254ed19488f 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -238,8 +238,8 @@ static void pvcalls_front_free_map(struct pvcalls_bedata *bedata, spin_unlock(&bedata->socket_lock); for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++) - gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0); - gnttab_end_foreign_access(map->active.ref, 0, 0); + gnttab_end_foreign_access(map->active.ring->ref[i], 0); + gnttab_end_foreign_access(map->active.ref, 0); free_page((unsigned long)map->active.ring); kfree(map); @@ -1117,7 +1117,7 @@ static int pvcalls_front_remove(struct xenbus_device *dev) } } if (bedata->ref != -1) - gnttab_end_foreign_access(bedata->ref, 0, 0); + gnttab_end_foreign_access(bedata->ref, 0); kfree(bedata->ring.sring); kfree(bedata); xenbus_switch_state(dev, XenbusStateClosed); diff --git a/drivers/xen/xen-front-pgdir-shbuf.c b/drivers/xen/xen-front-pgdir-shbuf.c index 81b6e13fa5ec..a959dee21134 100644 --- a/drivers/xen/xen-front-pgdir-shbuf.c +++ b/drivers/xen/xen-front-pgdir-shbuf.c @@ -143,8 +143,7 @@ void xen_front_pgdir_shbuf_free(struct xen_front_pgdir_shbuf *buf) for (i = 0; i < buf->num_grefs; i++) if (buf->grefs[i] != GRANT_INVALID_REF) - gnttab_end_foreign_access(buf->grefs[i], - 0, 0UL); + gnttab_end_foreign_access(buf->grefs[i], 0UL); } kfree(buf->grefs); kfree(buf->directory); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 9f9b1a297f0d..dfd5bf31cfb9 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -97,7 +97,7 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, * longer in use. Return 1 if the grant entry was freed, 0 if it is still in * use. */ -int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); +int gnttab_end_foreign_access_ref(grant_ref_t ref); /* * Eventually end access through the given grant reference, and once that @@ -114,8 +114,7 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing * via free_pages_exact()) in order to avoid high order pages. */ -void gnttab_end_foreign_access(grant_ref_t ref, int readonly, - unsigned long page); +void gnttab_end_foreign_access(grant_ref_t ref, unsigned long page); /* * End access through the given grant reference, iff the grant entry is diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 01f8067994d6..77883b6788cd 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -279,13 +279,13 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) grant_ref_t ref; ref = priv->rings[i].intf->ref[j]; - gnttab_end_foreign_access(ref, 0, 0); + gnttab_end_foreign_access(ref, 0); } free_pages_exact(priv->rings[i].data.in, 1UL << (priv->rings[i].intf->ring_order + XEN_PAGE_SHIFT)); } - gnttab_end_foreign_access(priv->rings[i].ref, 0, 0); + gnttab_end_foreign_access(priv->rings[i].ref, 0); free_page((unsigned long)priv->rings[i].intf); } kfree(priv->rings); @@ -353,10 +353,10 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, out: if (bytes) { for (i--; i >= 0; i--) - gnttab_end_foreign_access(ring->intf->ref[i], 0, 0); + gnttab_end_foreign_access(ring->intf->ref[i], 0); free_pages_exact(bytes, 1UL << (order + XEN_PAGE_SHIFT)); } - gnttab_end_foreign_access(ring->ref, 0, 0); + gnttab_end_foreign_access(ring->ref, 0); free_page((unsigned long)ring->intf); return ret; } diff --git a/sound/xen/xen_snd_front_evtchnl.c b/sound/xen/xen_snd_front_evtchnl.c index 29e0f0ea67eb..ecbc294fc59a 100644 --- a/sound/xen/xen_snd_front_evtchnl.c +++ b/sound/xen/xen_snd_front_evtchnl.c @@ -168,7 +168,7 @@ static void evtchnl_free(struct xen_snd_front_info *front_info, /* End access and free the page. */ if (channel->gref != GRANT_INVALID_REF) - gnttab_end_foreign_access(channel->gref, 0, page); + gnttab_end_foreign_access(channel->gref, page); else free_page(page); From 309b517276f21dc7e6315c6637792f8bbfdf7ec4 Mon Sep 17 00:00:00 2001 From: jianchunfu Date: Mon, 14 Mar 2022 15:05:14 +0800 Subject: [PATCH 7/9] arch:x86:xen: Remove unnecessary assignment in xen_apic_read() In the function xen_apic_read(), the initialized value of 'ret' is unused because it will be assigned by the function HYPERVISOR_platform_op(), thus remove it. Signed-off-by: jianchunfu Link: https://lore.kernel.org/r/20220314070514.2602-1-jianchunfu@cmss.chinamobile.com Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- arch/x86/xen/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 0d46cc283cf5..62d34b6611c5 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -51,7 +51,7 @@ static u32 xen_apic_read(u32 reg) .interface_version = XENPF_INTERFACE_VERSION, .u.pcpu_info.xen_cpuid = 0, }; - int ret = 0; + int ret; /* Shouldn't need this as APIC is turned off for PV, and we only * get called on the bootup processor. But just in case. */ From ff32baa1f39b1adb519479a51e7acbcbfdd2206c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20K=C4=85dzio=C5=82ka?= Date: Wed, 23 Mar 2022 02:21:03 +0100 Subject: [PATCH 8/9] xen: don't hang when resuming PCI device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a xen domain with at least two VCPUs has a PCI device attached which enters the D3hot state during suspend, the kernel may hang while resuming, depending on the core on which an async resume task gets scheduled. The bug occurs because xen's do_suspend calls dpm_resume_start while only the timer of the boot CPU has been resumed (when xen_suspend called syscore_resume), before calling xen_arch_suspend to resume the timers of the other CPUs. This breaks pci_dev_d3_sleep. Thus this patch moves the call to xen_arch_resume before the call to dpm_resume_start, eliminating the hangs and restoring the stack-like structure of the suspend/restore procedure. Signed-off-by: Jakub Kądziołka Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20220323012103.2537-1-niedzejkob@invisiblethingslab.com Signed-off-by: Boris Ostrovsky --- drivers/xen/manage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 374d36de7f5a..3d5a384d65f7 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -141,6 +141,8 @@ static void do_suspend(void) raw_notifier_call_chain(&xen_resume_notifier, 0, NULL); + xen_arch_resume(); + dpm_resume_start(si.cancelled ? PMSG_THAW : PMSG_RESTORE); if (err) { @@ -148,8 +150,6 @@ static void do_suspend(void) si.cancelled = 1; } - xen_arch_resume(); - out_resume: if (!si.cancelled) xs_resume(); From de2ae403b4c0e79a3410e63bc448542fbb9f9bfc Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 25 Mar 2022 15:20:02 +0100 Subject: [PATCH 9/9] xen: fix is_xen_pmu() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit is_xen_pmu() is taking the cpu number as parameter, but it is not using it. Instead it just tests whether the Xen PMU initialization on the current cpu did succeed. As this test is done by checking a percpu pointer, preemption needs to be disabled in order to avoid switching the cpu while doing the test. While resuming from suspend() this seems not to be the case: [ 88.082751] ACPI: PM: Low-level resume complete [ 88.087933] ACPI: EC: EC started [ 88.091464] ACPI: PM: Restoring platform NVS memory [ 88.097166] xen_acpi_processor: Uploading Xen processor PM info [ 88.103850] Enabling non-boot CPUs ... [ 88.108128] installing Xen timer for CPU 1 [ 88.112763] BUG: using smp_processor_id() in preemptible [00000000] code: systemd-sleep/7138 [ 88.122256] caller is is_xen_pmu+0x12/0x30 [ 88.126937] CPU: 0 PID: 7138 Comm: systemd-sleep Tainted: G W 5.16.13-2.fc32.qubes.x86_64 #1 [ 88.137939] Hardware name: Star Labs StarBook/StarBook, BIOS 7.97 03/21/2022 [ 88.145930] Call Trace: [ 88.148757] [ 88.151193] dump_stack_lvl+0x48/0x5e [ 88.155381] check_preemption_disabled+0xde/0xe0 [ 88.160641] is_xen_pmu+0x12/0x30 [ 88.164441] xen_smp_intr_init_pv+0x75/0x100 Fix that by replacing is_xen_pmu() by a simple boolean variable which reflects the Xen PMU initialization state on cpu 0. Modify xen_pmu_init() to return early in case it is being called for a cpu other than cpu 0 and the boolean variable not being set. Fixes: bf6dfb154d93 ("xen/PMU: PMU emulation code") Reported-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20220325142002.31789-1-jgross@suse.com Signed-off-by: Boris Ostrovsky --- arch/x86/xen/pmu.c | 10 ++++------ arch/x86/xen/pmu.h | 3 ++- arch/x86/xen/smp_pv.c | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 89dd6b1708b0..21ecbe754cb2 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -506,10 +506,7 @@ irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) return ret; } -bool is_xen_pmu(int cpu) -{ - return (get_xenpmu_data() != NULL); -} +bool is_xen_pmu; void xen_pmu_init(int cpu) { @@ -520,7 +517,7 @@ void xen_pmu_init(int cpu) BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE); - if (xen_hvm_domain()) + if (xen_hvm_domain() || (cpu != 0 && !is_xen_pmu)) return; xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL); @@ -541,7 +538,8 @@ void xen_pmu_init(int cpu) per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data; per_cpu(xenpmu_shared, cpu).flags = 0; - if (cpu == 0) { + if (!is_xen_pmu) { + is_xen_pmu = true; perf_register_guest_info_callbacks(&xen_guest_cbs); xen_pmu_arch_init(); } diff --git a/arch/x86/xen/pmu.h b/arch/x86/xen/pmu.h index 0e83a160589b..65c58894fc79 100644 --- a/arch/x86/xen/pmu.h +++ b/arch/x86/xen/pmu.h @@ -4,6 +4,8 @@ #include +extern bool is_xen_pmu; + irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); #ifdef CONFIG_XEN_HAVE_VPMU void xen_pmu_init(int cpu); @@ -12,7 +14,6 @@ void xen_pmu_finish(int cpu); static inline void xen_pmu_init(int cpu) {} static inline void xen_pmu_finish(int cpu) {} #endif -bool is_xen_pmu(int cpu); bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err); bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err); int pmu_apic_update(uint32_t reg); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 4a6019238ee7..688aa8b6ae29 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -129,7 +129,7 @@ int xen_smp_intr_init_pv(unsigned int cpu) per_cpu(xen_irq_work, cpu).irq = rc; per_cpu(xen_irq_work, cpu).name = callfunc_name; - if (is_xen_pmu(cpu)) { + if (is_xen_pmu) { pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu); rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu, xen_pmu_irq_handler,