From 52be93558a9b32f5294750c1394d81e31fe11d6d Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Sun, 10 Jul 2022 21:11:47 -0700 Subject: [PATCH 1/4] Drivers: hv: vm_bus: Handle vmbus rescind calls after vmbus is suspended Add a flag to indicate that the vmbus is suspended so we should ignore any offer message. Add a new work_queue for rescind msg, so we could drain it along with other offer work_queues upon suspension. It was observed that in some hibernation related scenario testing, after vmbus_bus_suspend() we get rescind offer message for the vmbus. This would lead to processing of a rescind message for a channel that has already been suspended. Signed-off-by: Shradha Gupta Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20220711041147.GA5569@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Signed-off-by: Wei Liu --- drivers/hv/connection.c | 11 +++++++++++ drivers/hv/hyperv_vmbus.h | 7 +++++++ drivers/hv/vmbus_drv.c | 27 +++++++++++++++++++-------- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 6218bbf6863a..eca7afd366d6 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -171,6 +171,14 @@ int vmbus_connect(void) goto cleanup; } + vmbus_connection.rescind_work_queue = + create_workqueue("hv_vmbus_rescind"); + if (!vmbus_connection.rescind_work_queue) { + ret = -ENOMEM; + goto cleanup; + } + vmbus_connection.ignore_any_offer_msg = false; + vmbus_connection.handle_primary_chan_wq = create_workqueue("hv_pri_chan"); if (!vmbus_connection.handle_primary_chan_wq) { @@ -357,6 +365,9 @@ void vmbus_disconnect(void) if (vmbus_connection.handle_primary_chan_wq) destroy_workqueue(vmbus_connection.handle_primary_chan_wq); + if (vmbus_connection.rescind_work_queue) + destroy_workqueue(vmbus_connection.rescind_work_queue); + if (vmbus_connection.work_queue) destroy_workqueue(vmbus_connection.work_queue); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 4f5b824b16cf..dc673edf053c 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -261,6 +261,13 @@ struct vmbus_connection { struct workqueue_struct *work_queue; struct workqueue_struct *handle_primary_chan_wq; struct workqueue_struct *handle_sub_chan_wq; + struct workqueue_struct *rescind_work_queue; + + /* + * On suspension of the vmbus, the accumulated offer messages + * must be dropped. + */ + bool ignore_any_offer_msg; /* * The number of sub-channels and hv_sock channels that should be diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 547ae334e5cd..23c680d1a0f5 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1160,7 +1160,9 @@ void vmbus_on_msg_dpc(unsigned long data) * work queue: the RESCIND handler can not start to * run before the OFFER handler finishes. */ - schedule_work(&ctx->work); + if (vmbus_connection.ignore_any_offer_msg) + break; + queue_work(vmbus_connection.rescind_work_queue, &ctx->work); break; case CHANNELMSG_OFFERCHANNEL: @@ -1186,6 +1188,8 @@ void vmbus_on_msg_dpc(unsigned long data) * to the CPUs which will execute the offer & rescind * works by the time these works will start execution. */ + if (vmbus_connection.ignore_any_offer_msg) + break; atomic_inc(&vmbus_connection.offer_in_progress); fallthrough; @@ -2446,15 +2450,20 @@ acpi_walk_err: #ifdef CONFIG_PM_SLEEP static int vmbus_bus_suspend(struct device *dev) { + struct hv_per_cpu_context *hv_cpu = per_cpu_ptr( + hv_context.cpu_context, VMBUS_CONNECT_CPU); struct vmbus_channel *channel, *sc; - while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { - /* - * We wait here until the completion of any channel - * offers that are currently in progress. - */ - usleep_range(1000, 2000); - } + tasklet_disable(&hv_cpu->msg_dpc); + vmbus_connection.ignore_any_offer_msg = true; + /* The tasklet_enable() takes care of providing a memory barrier */ + tasklet_enable(&hv_cpu->msg_dpc); + + /* Drain all the workqueues as we are in suspend */ + drain_workqueue(vmbus_connection.rescind_work_queue); + drain_workqueue(vmbus_connection.work_queue); + drain_workqueue(vmbus_connection.handle_primary_chan_wq); + drain_workqueue(vmbus_connection.handle_sub_chan_wq); mutex_lock(&vmbus_connection.channel_mutex); list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { @@ -2531,6 +2540,8 @@ static int vmbus_bus_resume(struct device *dev) size_t msgsize; int ret; + vmbus_connection.ignore_any_offer_msg = false; + /* * We only use the 'vmbus_proto_version', which was in use before * hibernation, to re-negotiate with the host. From 5182fecc4be8e4ae2e3a3d744b5562a3e74bf2b4 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Thu, 7 Jul 2022 19:49:31 -0500 Subject: [PATCH 2/4] PCI: hv: Take a const cpumask in hv_compose_msi_req_get_cpu() The cpumask that is passed to this function ultimately comes from irq_data_get_effective_affinity_mask(), which was recently changed to return a const cpumask pointer. The first level of functions handling the affinity mask were updated, but not this helper function. Fixes: 4d0b8298818b ("genirq: Return a const cpumask from irq_data_get_affinity_mask") Reported-by: kernel test robot Signed-off-by: Samuel Holland Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20220708004931.1672-1-samuel@sholland.org Signed-off-by: Wei Liu --- drivers/pci/controller/pci-hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index db814f7b93ba..31508d32098c 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1635,7 +1635,7 @@ static u32 hv_compose_msi_req_v1( * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten * by subsequent retarget in hv_irq_unmask(). */ -static int hv_compose_msi_req_get_cpu(struct cpumask *affinity) +static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity) { return cpumask_first_and(affinity, cpu_online_mask); } From bf28462e20b56c1d7c8dbf82367cd43ffbc8a1f1 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Sat, 21 May 2022 07:23:39 -0700 Subject: [PATCH 3/4] drm/hyperv : Removing the restruction of VRAM allocation with PCI bar size There were two different approaches getting used in this driver to allocate vram: 1. VRAM allocation from PCI region for Gen1 2. VRAM alloaction from MMIO region for Gen2 First approach limilts the vram to PCI BAR size, which is 64 MB in most legacy systems. This limits the maximum resolution to be restricted to 64 MB size, and with recent conclusion on fbdev issue its concluded to have similar allocation strategy for both Gen1 and Gen2. This patch unifies the Gen1 and Gen2 vram allocation strategy. Signed-off-by: Saurabh Sengar Reviewed-by: Deepak Rawat Link: https://lore.kernel.org/r/1653143019-20032-1-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu --- drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 74 +------------------------ 1 file changed, 3 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index 4a8941fa0815..6d11e7938c83 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -69,56 +69,7 @@ static struct pci_driver hyperv_pci_driver = { .remove = hyperv_pci_remove, }; -static int hyperv_setup_gen1(struct hyperv_drm_device *hv) -{ - struct drm_device *dev = &hv->dev; - struct pci_dev *pdev; - int ret; - - pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT, - PCI_DEVICE_ID_HYPERV_VIDEO, NULL); - if (!pdev) { - drm_err(dev, "Unable to find PCI Hyper-V video\n"); - return -ENODEV; - } - - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &hyperv_driver); - if (ret) { - drm_err(dev, "Not able to remove boot fb\n"); - return ret; - } - - if (pci_request_region(pdev, 0, DRIVER_NAME) != 0) - drm_warn(dev, "Cannot request framebuffer, boot fb still active?\n"); - - if ((pdev->resource[0].flags & IORESOURCE_MEM) == 0) { - drm_err(dev, "Resource at bar 0 is not IORESOURCE_MEM\n"); - ret = -ENODEV; - goto error; - } - - hv->fb_base = pci_resource_start(pdev, 0); - hv->fb_size = pci_resource_len(pdev, 0); - if (!hv->fb_base) { - drm_err(dev, "Resource not available\n"); - ret = -ENODEV; - goto error; - } - - hv->fb_size = min(hv->fb_size, - (unsigned long)(hv->mmio_megabytes * 1024 * 1024)); - hv->vram = devm_ioremap(&pdev->dev, hv->fb_base, hv->fb_size); - if (!hv->vram) { - drm_err(dev, "Failed to map vram\n"); - ret = -ENOMEM; - } - -error: - pci_dev_put(pdev); - return ret; -} - -static int hyperv_setup_gen2(struct hyperv_drm_device *hv, +static int hyperv_setup_vram(struct hyperv_drm_device *hv, struct hv_device *hdev) { struct drm_device *dev = &hv->dev; @@ -181,10 +132,7 @@ static int hyperv_vmbus_probe(struct hv_device *hdev, goto err_hv_set_drv_data; } - if (efi_enabled(EFI_BOOT)) - ret = hyperv_setup_gen2(hv, hdev); - else - ret = hyperv_setup_gen1(hv); + ret = hyperv_setup_vram(hv, hdev); if (ret) goto err_vmbus_close; @@ -225,29 +173,13 @@ static int hyperv_vmbus_remove(struct hv_device *hdev) { struct drm_device *dev = hv_get_drvdata(hdev); struct hyperv_drm_device *hv = to_hv(dev); - struct pci_dev *pdev; drm_dev_unplug(dev); drm_atomic_helper_shutdown(dev); vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); - /* - * Free allocated MMIO memory only on Gen2 VMs. - * On Gen1 VMs, release the PCI device - */ - if (efi_enabled(EFI_BOOT)) { - vmbus_free_mmio(hv->mem->start, hv->fb_size); - } else { - pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT, - PCI_DEVICE_ID_HYPERV_VIDEO, NULL); - if (!pdev) { - drm_err(dev, "Unable to find PCI Hyper-V video\n"); - return -ENODEV; - } - pci_release_region(pdev, 0); - pci_dev_put(pdev); - } + vmbus_free_mmio(hv->mem->start, hv->fb_size); return 0; } From d180e0a1be6cea2b7436fadbd1c96aecdf3c46c7 Mon Sep 17 00:00:00 2001 From: Alexander Atanasov Date: Mon, 11 Jul 2022 18:18:22 +0000 Subject: [PATCH 4/4] Drivers: hv: Create debugfs file with hyper-v balloon usage information Allow the guest to know how much it is ballooned by the host. It is useful when debugging out of memory conditions. When host gets back memory from the guest it is accounted as used memory in the guest but the guest have no way to know how much it is actually ballooned. Expose current state, flags and max possible memory to the guest. While at it - fix a 10+ years old typo. Signed-off-by: Alexander Atanasov Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20220711181825.52318-1-alexander.atanasov@virtuozzo.com Signed-off-by: Wei Liu --- drivers/hv/hv_balloon.c | 135 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 6 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 91e8a72eee14..fdf6decacf06 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -248,7 +249,7 @@ struct dm_capabilities_resp_msg { * num_committed: Committed memory in pages. * page_file_size: The accumulated size of all page files * in the system in pages. - * zero_free: The nunber of zero and free pages. + * zero_free: The number of zero and free pages. * page_file_writes: The writes to the page file in pages. * io_diff: An indicator of file cache efficiency or page file activity, * calculated as File Cache Page Fault Count - Page Read Count. @@ -567,6 +568,11 @@ struct hv_dynmem_device { __u32 version; struct page_reporting_dev_info pr_dev_info; + + /* + * Maximum number of pages that can be hot_add-ed + */ + __u64 max_dynamic_page_count; }; static struct hv_dynmem_device dm_device; @@ -1078,6 +1084,7 @@ static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg) pr_info("Max. dynamic memory size: %llu MB\n", (*max_page_count) >> (20 - HV_HYP_PAGE_SHIFT)); + dm->max_dynamic_page_count = *max_page_count; } break; @@ -1116,6 +1123,19 @@ static unsigned long compute_balloon_floor(void) return min_pages; } +/* + * Compute total committed memory pages + */ + +static unsigned long get_pages_committed(struct hv_dynmem_device *dm) +{ + return vm_memory_committed() + + dm->num_pages_ballooned + + (dm->num_pages_added > dm->num_pages_onlined ? + dm->num_pages_added - dm->num_pages_onlined : 0) + + compute_balloon_floor(); +} + /* * Post our status as it relates memory pressure to the * host. Host expects the guests to post this status @@ -1157,11 +1177,7 @@ static void post_status(struct hv_dynmem_device *dm) * asking us to balloon them out. */ num_pages_avail = si_mem_available(); - num_pages_committed = vm_memory_committed() + - dm->num_pages_ballooned + - (dm->num_pages_added > dm->num_pages_onlined ? - dm->num_pages_added - dm->num_pages_onlined : 0) + - compute_balloon_floor(); + num_pages_committed = get_pages_committed(dm); trace_balloon_status(num_pages_avail, num_pages_committed, vm_memory_committed(), dm->num_pages_ballooned, @@ -1807,6 +1823,109 @@ out: return ret; } +/* + * DEBUGFS Interface + */ +#ifdef CONFIG_DEBUG_FS + +/** + * hv_balloon_debug_show - shows statistics of balloon operations. + * @f: pointer to the &struct seq_file. + * @offset: ignored. + * + * Provides the statistics that can be accessed in hv-balloon in the debugfs. + * + * Return: zero on success or an error code. + */ +static int hv_balloon_debug_show(struct seq_file *f, void *offset) +{ + struct hv_dynmem_device *dm = f->private; + char *sname; + + seq_printf(f, "%-22s: %u.%u\n", "host_version", + DYNMEM_MAJOR_VERSION(dm->version), + DYNMEM_MINOR_VERSION(dm->version)); + + seq_printf(f, "%-22s:", "capabilities"); + if (ballooning_enabled()) + seq_puts(f, " enabled"); + + if (hot_add_enabled()) + seq_puts(f, " hot_add"); + + seq_puts(f, "\n"); + + seq_printf(f, "%-22s: %u", "state", dm->state); + switch (dm->state) { + case DM_INITIALIZING: + sname = "Initializing"; + break; + case DM_INITIALIZED: + sname = "Initialized"; + break; + case DM_BALLOON_UP: + sname = "Balloon Up"; + break; + case DM_BALLOON_DOWN: + sname = "Balloon Down"; + break; + case DM_HOT_ADD: + sname = "Hot Add"; + break; + case DM_INIT_ERROR: + sname = "Error"; + break; + default: + sname = "Unknown"; + } + seq_printf(f, " (%s)\n", sname); + + /* HV Page Size */ + seq_printf(f, "%-22s: %ld\n", "page_size", HV_HYP_PAGE_SIZE); + + /* Pages added with hot_add */ + seq_printf(f, "%-22s: %u\n", "pages_added", dm->num_pages_added); + + /* pages that are "onlined"/used from pages_added */ + seq_printf(f, "%-22s: %u\n", "pages_onlined", dm->num_pages_onlined); + + /* pages we have given back to host */ + seq_printf(f, "%-22s: %u\n", "pages_ballooned", dm->num_pages_ballooned); + + seq_printf(f, "%-22s: %lu\n", "total_pages_committed", + get_pages_committed(dm)); + + seq_printf(f, "%-22s: %llu\n", "max_dynamic_page_count", + dm->max_dynamic_page_count); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hv_balloon_debug); + +static void hv_balloon_debugfs_init(struct hv_dynmem_device *b) +{ + debugfs_create_file("hv-balloon", 0444, NULL, b, + &hv_balloon_debug_fops); +} + +static void hv_balloon_debugfs_exit(struct hv_dynmem_device *b) +{ + debugfs_remove(debugfs_lookup("hv-balloon", NULL)); +} + +#else + +static inline void hv_balloon_debugfs_init(struct hv_dynmem_device *b) +{ +} + +static inline void hv_balloon_debugfs_exit(struct hv_dynmem_device *b) +{ +} + +#endif /* CONFIG_DEBUG_FS */ + static int balloon_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { @@ -1854,6 +1973,8 @@ static int balloon_probe(struct hv_device *dev, goto probe_error; } + hv_balloon_debugfs_init(&dm_device); + return 0; probe_error: @@ -1879,6 +2000,8 @@ static int balloon_remove(struct hv_device *dev) if (dm->num_pages_ballooned != 0) pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned); + hv_balloon_debugfs_exit(dm); + cancel_work_sync(&dm->balloon_wrk.wrk); cancel_work_sync(&dm->ha_wrk.wrk);