forked from Minki/linux
powerpc fixes for 4.13 #4
The highlight is Ben's patch to work around a host killing bug when running KVM guests with the Radix MMU on Power9. See the long change log of that commit for more detail. And then three fairly minor fixes: - Fix of_node_put() underflow during reconfig remove, using old DLPAR tools. - Fix recently introduced ld version check with 64-bit LE-only toolchain. - Free the subpage_prot_table correctly, avoiding a memory leak. Thanks to: Aneesh Kumar K.V, Benjamin Herrenschmidt, Laurent Vivier. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJZezIGAAoJEFHr6jzI4aWAyukP/3mxlQ3WdQlYPByZ18cj6YL5 L0kbRxDgAosD9HzcOPqku1um1l7D6Gk5KZFZfsol7SasSmCZEwV4MbdTrAiRxS6K tF0V14hP/BDQeIKfxlnUepzfL8PY7CkO6sDAa6BjHXvBk4+POI+37uw9+2GEV8DY tA45fHjA/Zq3eUXsK0WTHIcd09lJXXarf9Tlx+YNZ+3yJ1OMfOji3CXgTkjtwYM9 XTtsKzsagY1zLwr5gXJu1P05+OGna2VmY6+Tn2lnf7scTFW3qYGF3eWRx71diiKS PpZCjqfzWF4+TDIGPoYIrkTE+ZKR0lyo6F38GYwae0cYZMs9pGPEpeNahd8Nun+v MLU6TnhNfOI40GEYgmOMNKHPJJLSx59Qr/GnrAi/h2nUEocuN76jzNbaeFBtj3jD /vrRTmVUtt1wGqORX7BK4YZFHqcHmZBCM7bQnxibJtLv7fMue0sk58fs2jAaZ1iD NacpzsXG7CWYgj6ApclVCYuF99dXTpjrw/WPxilXDg84Pxb7Dv1SpIvLb2T5Guq+ iqqavViRHP1ng+5/giIsOvF9CnsCzbRYLb0zZTP91nckMmYI6wX2zc56lofjcI5j Qc5o/aJvBk4vSM9sibBGEdrZJ1Vt16gGorQ5NZUurZund/cVqvQFhm/4Tvnc0cVN yvLNZI8am35pI9CCJ2im =Z6uF -----END PGP SIGNATURE----- Merge tag 'powerpc-4.13-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux Pull powerpc fixes from Michael Ellerman: "The highlight is Ben's patch to work around a host killing bug when running KVM guests with the Radix MMU on Power9. See the long change log of that commit for more detail. And then three fairly minor fixes: - fix of_node_put() underflow during reconfig remove, using old DLPAR tools. - fix recently introduced ld version check with 64-bit LE-only toolchain. - free the subpage_prot_table correctly, avoiding a memory leak. Thanks to: Aneesh Kumar K.V, Benjamin Herrenschmidt, Laurent Vivier" * tag 'powerpc-4.13-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: powerpc/mm/hash: Free the subpage_prot_table correctly powerpc/Makefile: Fix ld version check with 64-bit LE-only toolchain powerpc/pseries: Fix of_node_put() underflow during reconfig remove powerpc/mm/radix: Workaround prefetch issue with KVM
This commit is contained in:
commit
080012bad6
@ -59,6 +59,19 @@ machine-$(CONFIG_PPC64) += 64
|
||||
machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le
|
||||
UTS_MACHINE := $(subst $(space),,$(machine-y))
|
||||
|
||||
# XXX This needs to be before we override LD below
|
||||
ifdef CONFIG_PPC32
|
||||
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
|
||||
else
|
||||
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
|
||||
# Have the linker provide sfpr if possible.
|
||||
# There is a corresponding test in arch/powerpc/lib/Makefile
|
||||
KBUILD_LDFLAGS_MODULE += --save-restore-funcs
|
||||
else
|
||||
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
|
||||
override LD += -EL
|
||||
LDEMULATION := lppc
|
||||
@ -190,18 +203,6 @@ else
|
||||
CHECKFLAGS += -D__LITTLE_ENDIAN__
|
||||
endif
|
||||
|
||||
ifdef CONFIG_PPC32
|
||||
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
|
||||
else
|
||||
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
|
||||
# Have the linker provide sfpr if possible.
|
||||
# There is a corresponding test in arch/powerpc/lib/Makefile
|
||||
KBUILD_LDFLAGS_MODULE += --save-restore-funcs
|
||||
else
|
||||
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_476FPE_ERR46),y)
|
||||
KBUILD_LDFLAGS_MODULE += --ppc476-workaround \
|
||||
-T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds
|
||||
|
@ -59,13 +59,14 @@ extern struct patb_entry *partition_tb;
|
||||
#define PRTS_MASK 0x1f /* process table size field */
|
||||
#define PRTB_MASK 0x0ffffffffffff000UL
|
||||
|
||||
/*
|
||||
* Limit process table to PAGE_SIZE table. This
|
||||
* also limit the max pid we can support.
|
||||
* MAX_USER_CONTEXT * 16 bytes of space.
|
||||
*/
|
||||
#define PRTB_SIZE_SHIFT (CONTEXT_BITS + 4)
|
||||
#define PRTB_ENTRIES (1ul << CONTEXT_BITS)
|
||||
/* Number of supported PID bits */
|
||||
extern unsigned int mmu_pid_bits;
|
||||
|
||||
/* Base PID to allocate from */
|
||||
extern unsigned int mmu_base_pid;
|
||||
|
||||
#define PRTB_SIZE_SHIFT (mmu_pid_bits + 4)
|
||||
#define PRTB_ENTRIES (1ul << mmu_pid_bits)
|
||||
|
||||
/*
|
||||
* Power9 currently only support 64K partition table size.
|
||||
|
@ -67,6 +67,12 @@ extern void __destroy_context(unsigned long context_id);
|
||||
extern void mmu_context_init(void);
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
|
||||
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
|
||||
#else
|
||||
static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
|
||||
#endif
|
||||
|
||||
extern void switch_cop(struct mm_struct *next);
|
||||
extern int use_cop(unsigned long acop, struct mm_struct *mm);
|
||||
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
|
||||
@ -79,9 +85,13 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
|
||||
struct mm_struct *next,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
bool new_on_cpu = false;
|
||||
|
||||
/* Mark this context has been used on the new CPU */
|
||||
if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next)))
|
||||
if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
|
||||
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
|
||||
new_on_cpu = true;
|
||||
}
|
||||
|
||||
/* 32-bit keeps track of the current PGDIR in the thread struct */
|
||||
#ifdef CONFIG_PPC32
|
||||
@ -109,6 +119,10 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
|
||||
if (cpu_has_feature(CPU_FTR_ALTIVEC))
|
||||
asm volatile ("dssall");
|
||||
#endif /* CONFIG_ALTIVEC */
|
||||
|
||||
if (new_on_cpu)
|
||||
radix_kvm_prefetch_workaround(next);
|
||||
|
||||
/*
|
||||
* The actual HW switching method differs between the various
|
||||
* sub architectures. Out of line for now
|
||||
|
@ -1443,12 +1443,14 @@ mc_cont:
|
||||
ori r6,r6,1
|
||||
mtspr SPRN_CTRLT,r6
|
||||
4:
|
||||
/* Read the guest SLB and save it away */
|
||||
/* Check if we are running hash or radix and store it in cr2 */
|
||||
ld r5, VCPU_KVM(r9)
|
||||
lbz r0, KVM_RADIX(r5)
|
||||
cmpwi r0, 0
|
||||
cmpwi cr2,r0,0
|
||||
|
||||
/* Read the guest SLB and save it away */
|
||||
li r5, 0
|
||||
bne 3f /* for radix, save 0 entries */
|
||||
bne cr2, 3f /* for radix, save 0 entries */
|
||||
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
|
||||
mtctr r0
|
||||
li r6,0
|
||||
@ -1712,11 +1714,6 @@ BEGIN_FTR_SECTION_NESTED(96)
|
||||
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
|
||||
22:
|
||||
/* Clear out SLB */
|
||||
li r5,0
|
||||
slbmte r5,r5
|
||||
slbia
|
||||
ptesync
|
||||
|
||||
/* Restore host values of some registers */
|
||||
BEGIN_FTR_SECTION
|
||||
@ -1737,10 +1734,56 @@ BEGIN_FTR_SECTION
|
||||
mtspr SPRN_PID, r7
|
||||
mtspr SPRN_IAMR, r8
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
|
||||
#ifdef CONFIG_PPC_RADIX_MMU
|
||||
/*
|
||||
* Are we running hash or radix ?
|
||||
*/
|
||||
beq cr2,3f
|
||||
|
||||
/* Radix: Handle the case where the guest used an illegal PID */
|
||||
LOAD_REG_ADDR(r4, mmu_base_pid)
|
||||
lwz r3, VCPU_GUEST_PID(r9)
|
||||
lwz r5, 0(r4)
|
||||
cmpw cr0,r3,r5
|
||||
blt 2f
|
||||
|
||||
/*
|
||||
* Illegal PID, the HW might have prefetched and cached in the TLB
|
||||
* some translations for the LPID 0 / guest PID combination which
|
||||
* Linux doesn't know about, so we need to flush that PID out of
|
||||
* the TLB. First we need to set LPIDR to 0 so tlbiel applies to
|
||||
* the right context.
|
||||
*/
|
||||
li r0,0
|
||||
mtspr SPRN_LPID,r0
|
||||
isync
|
||||
|
||||
/* Then do a congruence class local flush */
|
||||
ld r6,VCPU_KVM(r9)
|
||||
lwz r0,KVM_TLB_SETS(r6)
|
||||
mtctr r0
|
||||
li r7,0x400 /* IS field = 0b01 */
|
||||
ptesync
|
||||
sldi r0,r3,32 /* RS has PID */
|
||||
1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */
|
||||
addi r7,r7,0x1000
|
||||
bdnz 1b
|
||||
ptesync
|
||||
|
||||
2: /* Flush the ERAT on radix P9 DD1 guest exit */
|
||||
BEGIN_FTR_SECTION
|
||||
PPC_INVALIDATE_ERAT
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
|
||||
b 4f
|
||||
#endif /* CONFIG_PPC_RADIX_MMU */
|
||||
|
||||
/* Hash: clear out SLB */
|
||||
3: li r5,0
|
||||
slbmte r5,r5
|
||||
slbia
|
||||
ptesync
|
||||
4:
|
||||
/*
|
||||
* POWER7/POWER8 guest -> host partition switch code.
|
||||
* We don't have to lock against tlbies but we do
|
||||
|
@ -126,9 +126,10 @@ static int hash__init_new_context(struct mm_struct *mm)
|
||||
static int radix__init_new_context(struct mm_struct *mm)
|
||||
{
|
||||
unsigned long rts_field;
|
||||
int index;
|
||||
int index, max_id;
|
||||
|
||||
index = alloc_context_id(1, PRTB_ENTRIES - 1);
|
||||
max_id = (1 << mmu_pid_bits) - 1;
|
||||
index = alloc_context_id(mmu_base_pid, max_id);
|
||||
if (index < 0)
|
||||
return index;
|
||||
|
||||
|
@ -25,6 +25,9 @@
|
||||
|
||||
#include <trace/events/thp.h>
|
||||
|
||||
unsigned int mmu_pid_bits;
|
||||
unsigned int mmu_base_pid;
|
||||
|
||||
static int native_register_process_table(unsigned long base, unsigned long pg_sz,
|
||||
unsigned long table_size)
|
||||
{
|
||||
@ -261,11 +264,34 @@ static void __init radix_init_pgtable(void)
|
||||
for_each_memblock(memory, reg)
|
||||
WARN_ON(create_physical_mapping(reg->base,
|
||||
reg->base + reg->size));
|
||||
|
||||
/* Find out how many PID bits are supported */
|
||||
if (cpu_has_feature(CPU_FTR_HVMODE)) {
|
||||
if (!mmu_pid_bits)
|
||||
mmu_pid_bits = 20;
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
/*
|
||||
* When KVM is possible, we only use the top half of the
|
||||
* PID space to avoid collisions between host and guest PIDs
|
||||
* which can cause problems due to prefetch when exiting the
|
||||
* guest with AIL=3
|
||||
*/
|
||||
mmu_base_pid = 1 << (mmu_pid_bits - 1);
|
||||
#else
|
||||
mmu_base_pid = 1;
|
||||
#endif
|
||||
} else {
|
||||
/* The guest uses the bottom half of the PID space */
|
||||
if (!mmu_pid_bits)
|
||||
mmu_pid_bits = 19;
|
||||
mmu_base_pid = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate Partition table and process table for the
|
||||
* host.
|
||||
*/
|
||||
BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large.");
|
||||
BUG_ON(PRTB_SIZE_SHIFT > 36);
|
||||
process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
|
||||
/*
|
||||
* Fill in the process table.
|
||||
@ -339,6 +365,12 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
|
||||
if (type == NULL || strcmp(type, "cpu") != 0)
|
||||
return 0;
|
||||
|
||||
/* Find MMU PID size */
|
||||
prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
|
||||
if (prop && size == 4)
|
||||
mmu_pid_bits = be32_to_cpup(prop);
|
||||
|
||||
/* Grab page size encodings */
|
||||
prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
|
||||
if (!prop)
|
||||
return 0;
|
||||
|
@ -36,7 +36,7 @@ void subpage_prot_free(struct mm_struct *mm)
|
||||
}
|
||||
}
|
||||
addr = 0;
|
||||
for (i = 0; i < 2; ++i) {
|
||||
for (i = 0; i < (TASK_SIZE_USER64 >> 43); ++i) {
|
||||
p = spt->protptrs[i];
|
||||
if (!p)
|
||||
continue;
|
||||
|
@ -12,12 +12,12 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <asm/ppc-opcode.h>
|
||||
|
||||
#include <asm/ppc-opcode.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/trace.h>
|
||||
|
||||
#include <asm/cputhreads.h>
|
||||
|
||||
#define RIC_FLUSH_TLB 0
|
||||
#define RIC_FLUSH_PWC 1
|
||||
@ -454,3 +454,44 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
|
||||
else
|
||||
radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
|
||||
{
|
||||
unsigned int pid = mm->context.id;
|
||||
|
||||
if (unlikely(pid == MMU_NO_CONTEXT))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If this context hasn't run on that CPU before and KVM is
|
||||
* around, there's a slim chance that the guest on another
|
||||
* CPU just brought in obsolete translation into the TLB of
|
||||
* this CPU due to a bad prefetch using the guest PID on
|
||||
* the way into the hypervisor.
|
||||
*
|
||||
* We work around this here. If KVM is possible, we check if
|
||||
* any sibling thread is in KVM. If it is, the window may exist
|
||||
* and thus we flush that PID from the core.
|
||||
*
|
||||
* A potential future improvement would be to mark which PIDs
|
||||
* have never been used on the system and avoid it if the PID
|
||||
* is new and the process has no other cpumask bit set.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
|
||||
int cpu = smp_processor_id();
|
||||
int sib = cpu_first_thread_sibling(cpu);
|
||||
bool flush = false;
|
||||
|
||||
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
|
||||
if (sib == cpu)
|
||||
continue;
|
||||
if (paca[sib].kvm_hstate.kvm_vcpu)
|
||||
flush = true;
|
||||
}
|
||||
if (flush)
|
||||
_tlbiel_pid(pid, RIC_FLUSH_ALL);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
|
||||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||
|
@ -82,7 +82,6 @@ static int pSeries_reconfig_remove_node(struct device_node *np)
|
||||
|
||||
of_detach_node(np);
|
||||
of_node_put(parent);
|
||||
of_node_put(np); /* Must decrement the refcount */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user