Three bug-fixes:
- Revert the kexec fix which caused on non-kexec shutdowns a race. - Reuse existing P2M leafs - instead of requiring to allocate a large area of bootup virtual address estate. - Fix a one-off error when adding PFNs for balloon pages. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.12 (GNU/Linux) iQEcBAABAgAGBQJQNppKAAoJEFjIrFwIi8fJU/oH/jdWdRqJgC5mCnu9LwrIemEj gPTAcKw01A/2vbOY5rfXx7rCpgeU5ZM/XSt0byz/J5q0bmjjKVM106Smq1s7EaQx OjsdLglWoZYzKJjXH/FEKRPD39f/hd+KNJu3aGEJM8UZ0htvxlg6ACGzVPJa83Pf yrRXSycxvEevbGbuwWdNubxD5WKMMmbzi/HGGfdtL4256d0xIgxMrYgskLek96cR cg11llC5QLzH8mX+M5iX0lchASvMITyERXyEKK2opFN8a/766yi16agP75RKZdkP kWXp0vyOMrpy9UnOs2V1XLc/ufqNwHLcPVfecScXhz8xZWrZYOBdJQf7HAWxvLE= =MgvT -----END PGP SIGNATURE----- Merge tag 'stable/for-linus-3.6-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen Pull three xen bug-fixes from Konrad Rzeszutek Wilk: - Revert the kexec fix which caused on non-kexec shutdowns a race. - Reuse existing P2M leafs - instead of requiring to allocate a large area of bootup virtual address estate. - Fix a one-off error when adding PFNs for balloon pages. * tag 'stable/for-linus-3.6-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: xen/setup: Fix one-off error when adding for-balloon PFNs to the P2M. xen/p2m: Reuse existing P2M leafs if they are filled with 1:1 PFNs or INVALID. Revert "xen PVonHVM: move shared_info to MMIO before kexec"
This commit is contained in:
commit
267560874c
@ -31,7 +31,6 @@
|
|||||||
#include <linux/pci.h>
|
#include <linux/pci.h>
|
||||||
#include <linux/gfp.h>
|
#include <linux/gfp.h>
|
||||||
#include <linux/memblock.h>
|
#include <linux/memblock.h>
|
||||||
#include <linux/syscore_ops.h>
|
|
||||||
|
|
||||||
#include <xen/xen.h>
|
#include <xen/xen.h>
|
||||||
#include <xen/interface/xen.h>
|
#include <xen/interface/xen.h>
|
||||||
@ -1470,130 +1469,38 @@ asmlinkage void __init xen_start_kernel(void)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_XEN_PVHVM
|
void __ref xen_hvm_init_shared_info(void)
|
||||||
/*
|
|
||||||
* The pfn containing the shared_info is located somewhere in RAM. This
|
|
||||||
* will cause trouble if the current kernel is doing a kexec boot into a
|
|
||||||
* new kernel. The new kernel (and its startup code) can not know where
|
|
||||||
* the pfn is, so it can not reserve the page. The hypervisor will
|
|
||||||
* continue to update the pfn, and as a result memory corruption occours
|
|
||||||
* in the new kernel.
|
|
||||||
*
|
|
||||||
* One way to work around this issue is to allocate a page in the
|
|
||||||
* xen-platform pci device's BAR memory range. But pci init is done very
|
|
||||||
* late and the shared_info page is already in use very early to read
|
|
||||||
* the pvclock. So moving the pfn from RAM to MMIO is racy because some
|
|
||||||
* code paths on other vcpus could access the pfn during the small
|
|
||||||
* window when the old pfn is moved to the new pfn. There is even a
|
|
||||||
* small window were the old pfn is not backed by a mfn, and during that
|
|
||||||
* time all reads return -1.
|
|
||||||
*
|
|
||||||
* Because it is not known upfront where the MMIO region is located it
|
|
||||||
* can not be used right from the start in xen_hvm_init_shared_info.
|
|
||||||
*
|
|
||||||
* To minimise trouble the move of the pfn is done shortly before kexec.
|
|
||||||
* This does not eliminate the race because all vcpus are still online
|
|
||||||
* when the syscore_ops will be called. But hopefully there is no work
|
|
||||||
* pending at this point in time. Also the syscore_op is run last which
|
|
||||||
* reduces the risk further.
|
|
||||||
*/
|
|
||||||
|
|
||||||
static struct shared_info *xen_hvm_shared_info;
|
|
||||||
|
|
||||||
static void xen_hvm_connect_shared_info(unsigned long pfn)
|
|
||||||
{
|
{
|
||||||
|
int cpu;
|
||||||
struct xen_add_to_physmap xatp;
|
struct xen_add_to_physmap xatp;
|
||||||
|
static struct shared_info *shared_info_page = 0;
|
||||||
|
|
||||||
|
if (!shared_info_page)
|
||||||
|
shared_info_page = (struct shared_info *)
|
||||||
|
extend_brk(PAGE_SIZE, PAGE_SIZE);
|
||||||
xatp.domid = DOMID_SELF;
|
xatp.domid = DOMID_SELF;
|
||||||
xatp.idx = 0;
|
xatp.idx = 0;
|
||||||
xatp.space = XENMAPSPACE_shared_info;
|
xatp.space = XENMAPSPACE_shared_info;
|
||||||
xatp.gpfn = pfn;
|
xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
|
||||||
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
|
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
|
||||||
BUG();
|
BUG();
|
||||||
|
|
||||||
}
|
HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
|
||||||
static void xen_hvm_set_shared_info(struct shared_info *sip)
|
|
||||||
{
|
|
||||||
int cpu;
|
|
||||||
|
|
||||||
HYPERVISOR_shared_info = sip;
|
|
||||||
|
|
||||||
/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
|
/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
|
||||||
* page, we use it in the event channel upcall and in some pvclock
|
* page, we use it in the event channel upcall and in some pvclock
|
||||||
* related functions. We don't need the vcpu_info placement
|
* related functions. We don't need the vcpu_info placement
|
||||||
* optimizations because we don't use any pv_mmu or pv_irq op on
|
* optimizations because we don't use any pv_mmu or pv_irq op on
|
||||||
* HVM.
|
* HVM.
|
||||||
* When xen_hvm_set_shared_info is run at boot time only vcpu 0 is
|
* When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
|
||||||
* online but xen_hvm_set_shared_info is run at resume time too and
|
* online but xen_hvm_init_shared_info is run at resume time too and
|
||||||
* in that case multiple vcpus might be online. */
|
* in that case multiple vcpus might be online. */
|
||||||
for_each_online_cpu(cpu) {
|
for_each_online_cpu(cpu) {
|
||||||
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
|
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reconnect the shared_info pfn to a mfn */
|
#ifdef CONFIG_XEN_PVHVM
|
||||||
void xen_hvm_resume_shared_info(void)
|
|
||||||
{
|
|
||||||
xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_KEXEC
|
|
||||||
static struct shared_info *xen_hvm_shared_info_kexec;
|
|
||||||
static unsigned long xen_hvm_shared_info_pfn_kexec;
|
|
||||||
|
|
||||||
/* Remember a pfn in MMIO space for kexec reboot */
|
|
||||||
void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn)
|
|
||||||
{
|
|
||||||
xen_hvm_shared_info_kexec = sip;
|
|
||||||
xen_hvm_shared_info_pfn_kexec = pfn;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void xen_hvm_syscore_shutdown(void)
|
|
||||||
{
|
|
||||||
struct xen_memory_reservation reservation = {
|
|
||||||
.domid = DOMID_SELF,
|
|
||||||
.nr_extents = 1,
|
|
||||||
};
|
|
||||||
unsigned long prev_pfn;
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
if (!xen_hvm_shared_info_kexec)
|
|
||||||
return;
|
|
||||||
|
|
||||||
prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT;
|
|
||||||
set_xen_guest_handle(reservation.extent_start, &prev_pfn);
|
|
||||||
|
|
||||||
/* Move pfn to MMIO, disconnects previous pfn from mfn */
|
|
||||||
xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec);
|
|
||||||
|
|
||||||
/* Update pointers, following hypercall is also a memory barrier */
|
|
||||||
xen_hvm_set_shared_info(xen_hvm_shared_info_kexec);
|
|
||||||
|
|
||||||
/* Allocate new mfn for previous pfn */
|
|
||||||
do {
|
|
||||||
rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
|
|
||||||
if (rc == 0)
|
|
||||||
msleep(123);
|
|
||||||
} while (rc == 0);
|
|
||||||
|
|
||||||
/* Make sure the previous pfn is really connected to a (new) mfn */
|
|
||||||
BUG_ON(rc != 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct syscore_ops xen_hvm_syscore_ops = {
|
|
||||||
.shutdown = xen_hvm_syscore_shutdown,
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Use a pfn in RAM, may move to MMIO before kexec. */
|
|
||||||
static void __init xen_hvm_init_shared_info(void)
|
|
||||||
{
|
|
||||||
/* Remember pointer for resume */
|
|
||||||
xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE);
|
|
||||||
xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT);
|
|
||||||
xen_hvm_set_shared_info(xen_hvm_shared_info);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __init init_hvm_pv_info(void)
|
static void __init init_hvm_pv_info(void)
|
||||||
{
|
{
|
||||||
int major, minor;
|
int major, minor;
|
||||||
@ -1644,9 +1551,6 @@ static void __init xen_hvm_guest_init(void)
|
|||||||
init_hvm_pv_info();
|
init_hvm_pv_info();
|
||||||
|
|
||||||
xen_hvm_init_shared_info();
|
xen_hvm_init_shared_info();
|
||||||
#ifdef CONFIG_KEXEC
|
|
||||||
register_syscore_ops(&xen_hvm_syscore_ops);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (xen_feature(XENFEAT_hvm_callback_vector))
|
if (xen_feature(XENFEAT_hvm_callback_vector))
|
||||||
xen_have_vector_callback = 1;
|
xen_have_vector_callback = 1;
|
||||||
|
@ -196,9 +196,11 @@ RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
|
|||||||
|
|
||||||
/* When we populate back during bootup, the amount of pages can vary. The
|
/* When we populate back during bootup, the amount of pages can vary. The
|
||||||
* max we have is seen is 395979, but that does not mean it can't be more.
|
* max we have is seen is 395979, but that does not mean it can't be more.
|
||||||
* But some machines can have 3GB I/O holes even. So lets reserve enough
|
* Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle
|
||||||
* for 4GB of I/O and E820 holes. */
|
* it can re-use Xen provided mfn_list array, so we only need to allocate at
|
||||||
RESERVE_BRK(p2m_populated, PMD_SIZE * 4);
|
* most three P2M top nodes. */
|
||||||
|
RESERVE_BRK(p2m_populated, PAGE_SIZE * 3);
|
||||||
|
|
||||||
static inline unsigned p2m_top_index(unsigned long pfn)
|
static inline unsigned p2m_top_index(unsigned long pfn)
|
||||||
{
|
{
|
||||||
BUG_ON(pfn >= MAX_P2M_PFN);
|
BUG_ON(pfn >= MAX_P2M_PFN);
|
||||||
@ -575,12 +577,99 @@ static bool __init early_alloc_p2m(unsigned long pfn)
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Skim over the P2M tree looking at pages that are either filled with
|
||||||
|
* INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and
|
||||||
|
* replace the P2M leaf with a p2m_missing or p2m_identity.
|
||||||
|
* Stick the old page in the new P2M tree location.
|
||||||
|
*/
|
||||||
|
bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn)
|
||||||
|
{
|
||||||
|
unsigned topidx;
|
||||||
|
unsigned mididx;
|
||||||
|
unsigned ident_pfns;
|
||||||
|
unsigned inv_pfns;
|
||||||
|
unsigned long *p2m;
|
||||||
|
unsigned long *mid_mfn_p;
|
||||||
|
unsigned idx;
|
||||||
|
unsigned long pfn;
|
||||||
|
|
||||||
|
/* We only look when this entails a P2M middle layer */
|
||||||
|
if (p2m_index(set_pfn))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (pfn = 0; pfn <= MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
|
||||||
|
topidx = p2m_top_index(pfn);
|
||||||
|
|
||||||
|
if (!p2m_top[topidx])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (p2m_top[topidx] == p2m_mid_missing)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
mididx = p2m_mid_index(pfn);
|
||||||
|
p2m = p2m_top[topidx][mididx];
|
||||||
|
if (!p2m)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if ((p2m == p2m_missing) || (p2m == p2m_identity))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if ((unsigned long)p2m == INVALID_P2M_ENTRY)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ident_pfns = 0;
|
||||||
|
inv_pfns = 0;
|
||||||
|
for (idx = 0; idx < P2M_PER_PAGE; idx++) {
|
||||||
|
/* IDENTITY_PFNs are 1:1 */
|
||||||
|
if (p2m[idx] == IDENTITY_FRAME(pfn + idx))
|
||||||
|
ident_pfns++;
|
||||||
|
else if (p2m[idx] == INVALID_P2M_ENTRY)
|
||||||
|
inv_pfns++;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE))
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
found:
|
||||||
|
/* Found one, replace old with p2m_identity or p2m_missing */
|
||||||
|
p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
|
||||||
|
/* And the other for save/restore.. */
|
||||||
|
mid_mfn_p = p2m_top_mfn_p[topidx];
|
||||||
|
/* NOTE: Even if it is a p2m_identity it should still be point to
|
||||||
|
* a page filled with INVALID_P2M_ENTRY entries. */
|
||||||
|
mid_mfn_p[mididx] = virt_to_mfn(p2m_missing);
|
||||||
|
|
||||||
|
/* Reset where we want to stick the old page in. */
|
||||||
|
topidx = p2m_top_index(set_pfn);
|
||||||
|
mididx = p2m_mid_index(set_pfn);
|
||||||
|
|
||||||
|
/* This shouldn't happen */
|
||||||
|
if (WARN_ON(p2m_top[topidx] == p2m_mid_missing))
|
||||||
|
early_alloc_p2m(set_pfn);
|
||||||
|
|
||||||
|
if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
p2m_init(p2m);
|
||||||
|
p2m_top[topidx][mididx] = p2m;
|
||||||
|
mid_mfn_p = p2m_top_mfn_p[topidx];
|
||||||
|
mid_mfn_p[mididx] = virt_to_mfn(p2m);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
|
bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
|
||||||
{
|
{
|
||||||
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
|
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
|
||||||
if (!early_alloc_p2m(pfn))
|
if (!early_alloc_p2m(pfn))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (early_can_reuse_p2m_middle(pfn, mfn))
|
||||||
|
return __set_phys_to_machine(pfn, mfn);
|
||||||
|
|
||||||
if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/))
|
if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -78,9 +78,16 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
|
|||||||
memblock_reserve(start, size);
|
memblock_reserve(start, size);
|
||||||
|
|
||||||
xen_max_p2m_pfn = PFN_DOWN(start + size);
|
xen_max_p2m_pfn = PFN_DOWN(start + size);
|
||||||
|
for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
|
||||||
|
unsigned long mfn = pfn_to_mfn(pfn);
|
||||||
|
|
||||||
|
if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
|
||||||
|
continue;
|
||||||
|
WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
|
||||||
|
pfn, mfn);
|
||||||
|
|
||||||
for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
|
|
||||||
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long __init xen_do_chunk(unsigned long start,
|
static unsigned long __init xen_do_chunk(unsigned long start,
|
||||||
|
@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
|
|||||||
{
|
{
|
||||||
#ifdef CONFIG_XEN_PVHVM
|
#ifdef CONFIG_XEN_PVHVM
|
||||||
int cpu;
|
int cpu;
|
||||||
xen_hvm_resume_shared_info();
|
xen_hvm_init_shared_info();
|
||||||
xen_callback_vector();
|
xen_callback_vector();
|
||||||
xen_unplug_emulated_devices();
|
xen_unplug_emulated_devices();
|
||||||
if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
|
if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
|
||||||
|
@ -41,7 +41,7 @@ void xen_enable_syscall(void);
|
|||||||
void xen_vcpu_restore(void);
|
void xen_vcpu_restore(void);
|
||||||
|
|
||||||
void xen_callback_vector(void);
|
void xen_callback_vector(void);
|
||||||
void xen_hvm_resume_shared_info(void);
|
void xen_hvm_init_shared_info(void);
|
||||||
void xen_unplug_emulated_devices(void);
|
void xen_unplug_emulated_devices(void);
|
||||||
|
|
||||||
void __init xen_build_dynamic_phys_to_machine(void);
|
void __init xen_build_dynamic_phys_to_machine(void);
|
||||||
|
@ -101,19 +101,6 @@ static int platform_pci_resume(struct pci_dev *pdev)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __devinit prepare_shared_info(void)
|
|
||||||
{
|
|
||||||
#ifdef CONFIG_KEXEC
|
|
||||||
unsigned long addr;
|
|
||||||
struct shared_info *hvm_shared_info;
|
|
||||||
|
|
||||||
addr = alloc_xen_mmio(PAGE_SIZE);
|
|
||||||
hvm_shared_info = ioremap(addr, PAGE_SIZE);
|
|
||||||
memset(hvm_shared_info, 0, PAGE_SIZE);
|
|
||||||
xen_hvm_prepare_kexec(hvm_shared_info, addr >> PAGE_SHIFT);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __devinit platform_pci_init(struct pci_dev *pdev,
|
static int __devinit platform_pci_init(struct pci_dev *pdev,
|
||||||
const struct pci_device_id *ent)
|
const struct pci_device_id *ent)
|
||||||
{
|
{
|
||||||
@ -151,8 +138,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
|
|||||||
platform_mmio = mmio_addr;
|
platform_mmio = mmio_addr;
|
||||||
platform_mmiolen = mmio_len;
|
platform_mmiolen = mmio_len;
|
||||||
|
|
||||||
prepare_shared_info();
|
|
||||||
|
|
||||||
if (!xen_have_vector_callback) {
|
if (!xen_have_vector_callback) {
|
||||||
ret = xen_allocate_irq(pdev);
|
ret = xen_allocate_irq(pdev);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -58,8 +58,6 @@ void notify_remote_via_irq(int irq);
|
|||||||
|
|
||||||
void xen_irq_resume(void);
|
void xen_irq_resume(void);
|
||||||
|
|
||||||
void xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn);
|
|
||||||
|
|
||||||
/* Clear an irq's pending state, in preparation for polling on it */
|
/* Clear an irq's pending state, in preparation for polling on it */
|
||||||
void xen_clear_irq_pending(int irq);
|
void xen_clear_irq_pending(int irq);
|
||||||
void xen_set_irq_pending(int irq);
|
void xen_set_irq_pending(int irq);
|
||||||
|
Loading…
Reference in New Issue
Block a user