diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c43f5bb55ac8..8f460bdd4be1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1698,6 +1698,7 @@ config ARCH_WANT_GENERAL_HUGETLB
 config ARM_MODULE_PLTS
 	bool "Use PLTs to allow module memory to spill over into vmalloc area"
 	depends on MODULES
+	default y
 	help
 	  Allocate PLTs when loading modules so that jumps and calls whose
 	  targets are too far away for their relative offsets to be encoded
@@ -1708,7 +1709,8 @@ config ARM_MODULE_PLTS
 	  rounding up to page size, the actual memory footprint is usually
 	  the same.
 
-	  Say y if you are getting out of memory errors while loading modules
+	  Disabling this is usually safe for small single-platform
+	  configurations. If unsure, say y.
 
 source "mm/Kconfig"
 
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index f32a5468d79f..1dc4045e1af6 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -106,7 +106,7 @@ tune-$(CONFIG_CPU_V6K)		=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
 tune-y := $(tune-y)
 
 ifeq ($(CONFIG_AEABI),y)
-CFLAGS_ABI	:=-mabi=aapcs-linux -mno-thumb-interwork -mfpu=vfp
+CFLAGS_ABI	:=-mabi=aapcs-linux -mfpu=vfp
 else
 CFLAGS_ABI	:=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,)
 endif
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 6a4e7341ecd3..a3c5fbcad4ab 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -113,7 +113,7 @@ CFLAGS_fdt_ro.o := $(nossp_flags)
 CFLAGS_fdt_rw.o := $(nossp_flags)
 CFLAGS_fdt_wip.o := $(nossp_flags)
 
-ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
+ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj)
 asflags-y := -DZIMAGE
 
 # Supply kernel BSS size to the decompressor via a linker symbol.
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 9342904cccca..0cd4dccbae78 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -447,6 +447,14 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	.size \name , . - \name
 	.endm
 
+	.macro	csdb
+#ifdef CONFIG_THUMB2_KERNEL
+	.inst.w	0xf3af8014
+#else
+	.inst	0xe320f014
+#endif
+	.endm
+
 	.macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req
 #ifndef CONFIG_CPU_USE_DOMAINS
 	adds	\tmp, \addr, #\size - 1
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 40f5c410fd8c..69772e742a0a 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -17,6 +17,12 @@
 #define isb(option) __asm__ __volatile__ ("isb " #option : : : "memory")
 #define dsb(option) __asm__ __volatile__ ("dsb " #option : : : "memory")
 #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
+#ifdef CONFIG_THUMB2_KERNEL
+#define CSDB	".inst.w 0xf3af8014"
+#else
+#define CSDB	".inst	0xe320f014"
+#endif
+#define csdb() __asm__ __volatile__(CSDB : : : "memory")
 #elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6
 #define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
 				    : : "r" (0) : "memory")
@@ -37,6 +43,13 @@
 #define dmb(x) __asm__ __volatile__ ("" : : : "memory")
 #endif
 
+#ifndef CSDB
+#define CSDB
+#endif
+#ifndef csdb
+#define csdb()
+#endif
+
 #ifdef CONFIG_ARM_HEAVY_MB
 extern void (*soc_mb)(void);
 extern void arm_heavy_mb(void);
@@ -63,6 +76,25 @@ extern void arm_heavy_mb(void);
 #define __smp_rmb()	__smp_mb()
 #define __smp_wmb()	dmb(ishst)
 
+#ifdef CONFIG_CPU_SPECTRE
+static inline unsigned long array_index_mask_nospec(unsigned long idx,
+						    unsigned long sz)
+{
+	unsigned long mask;
+
+	asm volatile(
+		"cmp	%1, %2\n"
+	"	sbc	%0, %1, %1\n"
+	CSDB
+	: "=r" (mask)
+	: "r" (idx), "Ir" (sz)
+	: "cc");
+
+	return mask;
+}
+#define array_index_mask_nospec array_index_mask_nospec
+#endif
+
 #include <asm-generic/barrier.h>
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/arm/include/asm/bugs.h b/arch/arm/include/asm/bugs.h
index a97f1ea708d1..73a99c72a930 100644
--- a/arch/arm/include/asm/bugs.h
+++ b/arch/arm/include/asm/bugs.h
@@ -10,12 +10,14 @@
 #ifndef __ASM_BUGS_H
 #define __ASM_BUGS_H
 
-#ifdef CONFIG_MMU
 extern void check_writebuffer_bugs(void);
 
-#define check_bugs() check_writebuffer_bugs()
+#ifdef CONFIG_MMU
+extern void check_bugs(void);
+extern void check_other_bugs(void);
 #else
 #define check_bugs() do { } while (0)
+#define check_other_bugs() do { } while (0)
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index 4c9fa72b59f5..07e27f212dc7 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -65,6 +65,9 @@
 #define __write_sysreg(v, r, w, c, t)	asm volatile(w " " c : : "r" ((t)(v)))
 #define write_sysreg(v, ...)		__write_sysreg(v, __VA_ARGS__)
 
+#define BPIALL				__ACCESS_CP15(c7, 0, c5, 6)
+#define ICIALLU				__ACCESS_CP15(c7, 0, c5, 0)
+
 extern unsigned long cr_alignment;	/* defined in entry-armv.S */
 
 static inline unsigned long get_cr(void)
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index cb546425da8a..26021980504d 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -77,8 +77,16 @@
 #define ARM_CPU_PART_CORTEX_A12		0x4100c0d0
 #define ARM_CPU_PART_CORTEX_A17		0x4100c0e0
 #define ARM_CPU_PART_CORTEX_A15		0x4100c0f0
+#define ARM_CPU_PART_CORTEX_A53		0x4100d030
+#define ARM_CPU_PART_CORTEX_A57		0x4100d070
+#define ARM_CPU_PART_CORTEX_A72		0x4100d080
+#define ARM_CPU_PART_CORTEX_A73		0x4100d090
+#define ARM_CPU_PART_CORTEX_A75		0x4100d0a0
 #define ARM_CPU_PART_MASK		0xff00fff0
 
+/* Broadcom cores */
+#define ARM_CPU_PART_BRAHMA_B15		0x420000f0
+
 /* DEC implemented cores */
 #define ARM_CPU_PART_SA1100		0x4400a110
 
diff --git a/arch/arm/include/asm/kgdb.h b/arch/arm/include/asm/kgdb.h
index 3b73fdcf3627..8de1100d1067 100644
--- a/arch/arm/include/asm/kgdb.h
+++ b/arch/arm/include/asm/kgdb.h
@@ -77,7 +77,7 @@ extern int kgdb_fault_expected;
 
 #define KGDB_MAX_NO_CPUS	1
 #define BUFMAX			400
-#define NUMREGBYTES		(DBG_MAX_REG_NUM << 2)
+#define NUMREGBYTES		(GDB_MAX_REGS << 2)
 #define NUMCRITREGBYTES		(32 << 2)
 
 #define _R0			0
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 5a953ecb0d78..231e87ad45d5 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -61,8 +61,6 @@ struct kvm_vcpu;
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init_end[];
 
-extern char __kvm_hyp_vector[];
-
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index c7c28c885a19..343fc9e6f78d 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -21,6 +21,7 @@
 
 #include <linux/types.h>
 #include <linux/kvm_types.h>
+#include <asm/cputype.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -311,8 +312,17 @@ static inline void kvm_arm_vhe_guest_exit(void) {}
 
 static inline bool kvm_arm_harden_branch_predictor(void)
 {
-	/* No way to detect it yet, pretend it is not there. */
-	return false;
+	switch(read_cpuid_part()) {
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	case ARM_CPU_PART_BRAHMA_B15:
+	case ARM_CPU_PART_CORTEX_A12:
+	case ARM_CPU_PART_CORTEX_A15:
+	case ARM_CPU_PART_CORTEX_A17:
+		return true;
+#endif
+	default:
+		return false;
+	}
 }
 
 static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index f675162663f0..c94d291fd1a8 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -327,7 +327,28 @@ static inline int kvm_read_guest_lock(struct kvm *kvm,
 
 static inline void *kvm_get_hyp_vector(void)
 {
-	return kvm_ksym_ref(__kvm_hyp_vector);
+	switch(read_cpuid_part()) {
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	case ARM_CPU_PART_CORTEX_A12:
+	case ARM_CPU_PART_CORTEX_A17:
+	{
+		extern char __kvm_hyp_vector_bp_inv[];
+		return kvm_ksym_ref(__kvm_hyp_vector_bp_inv);
+	}
+
+	case ARM_CPU_PART_BRAHMA_B15:
+	case ARM_CPU_PART_CORTEX_A15:
+	{
+		extern char __kvm_hyp_vector_ic_inv[];
+		return kvm_ksym_ref(__kvm_hyp_vector_ic_inv);
+	}
+#endif
+	default:
+	{
+		extern char __kvm_hyp_vector[];
+		return kvm_ksym_ref(__kvm_hyp_vector);
+	}
+	}
 }
 
 static inline int kvm_map_vectors(void)
diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h
index 6d1491c8ee22..5e088c83d3d8 100644
--- a/arch/arm/include/asm/mpu.h
+++ b/arch/arm/include/asm/mpu.h
@@ -12,60 +12,101 @@
 /* ID_MMFR0 data relevant to MPU */
 #define MMFR0_PMSA		(0xF << 4)
 #define MMFR0_PMSAv7		(3 << 4)
+#define MMFR0_PMSAv8		(4 << 4)
 
 /* MPU D/I Size Register fields */
-#define MPU_RSR_SZ		1
-#define MPU_RSR_EN		0
-#define MPU_RSR_SD		8
+#define PMSAv7_RSR_SZ		1
+#define PMSAv7_RSR_EN		0
+#define PMSAv7_RSR_SD		8
 
 /* Number of subregions (SD) */
-#define MPU_NR_SUBREGS		8
-#define MPU_MIN_SUBREG_SIZE	256
+#define PMSAv7_NR_SUBREGS	8
+#define PMSAv7_MIN_SUBREG_SIZE	256
 
 /* The D/I RSR value for an enabled region spanning the whole of memory */
-#define MPU_RSR_ALL_MEM		63
+#define PMSAv7_RSR_ALL_MEM	63
 
 /* Individual bits in the DR/IR ACR */
-#define MPU_ACR_XN		(1 << 12)
-#define MPU_ACR_SHARED		(1 << 2)
+#define PMSAv7_ACR_XN		(1 << 12)
+#define PMSAv7_ACR_SHARED	(1 << 2)
 
 /* C, B and TEX[2:0] bits only have semantic meanings when grouped */
-#define MPU_RGN_CACHEABLE	0xB
-#define MPU_RGN_SHARED_CACHEABLE (MPU_RGN_CACHEABLE | MPU_ACR_SHARED)
-#define MPU_RGN_STRONGLY_ORDERED 0
+#define PMSAv7_RGN_CACHEABLE		0xB
+#define PMSAv7_RGN_SHARED_CACHEABLE	(PMSAv7_RGN_CACHEABLE | PMSAv7_ACR_SHARED)
+#define PMSAv7_RGN_STRONGLY_ORDERED	0
 
 /* Main region should only be shared for SMP */
 #ifdef CONFIG_SMP
-#define MPU_RGN_NORMAL		(MPU_RGN_CACHEABLE | MPU_ACR_SHARED)
+#define PMSAv7_RGN_NORMAL	(PMSAv7_RGN_CACHEABLE | PMSAv7_ACR_SHARED)
 #else
-#define MPU_RGN_NORMAL		MPU_RGN_CACHEABLE
+#define PMSAv7_RGN_NORMAL	PMSAv7_RGN_CACHEABLE
 #endif
 
 /* Access permission bits of ACR (only define those that we use)*/
-#define MPU_AP_PL1RO_PL0NA	(0x5 << 8)
-#define MPU_AP_PL1RW_PL0RW	(0x3 << 8)
-#define MPU_AP_PL1RW_PL0R0	(0x2 << 8)
-#define MPU_AP_PL1RW_PL0NA	(0x1 << 8)
+#define PMSAv7_AP_PL1RO_PL0NA	(0x5 << 8)
+#define PMSAv7_AP_PL1RW_PL0RW	(0x3 << 8)
+#define PMSAv7_AP_PL1RW_PL0R0	(0x2 << 8)
+#define PMSAv7_AP_PL1RW_PL0NA	(0x1 << 8)
+
+#define PMSAv8_BAR_XN		1
+
+#define PMSAv8_LAR_EN		1
+#define PMSAv8_LAR_IDX(n)	(((n) & 0x7) << 1)
+
+
+#define PMSAv8_AP_PL1RW_PL0NA	(0 << 1)
+#define PMSAv8_AP_PL1RW_PL0RW	(1 << 1)
+#define PMSAv8_AP_PL1RO_PL0RO	(3 << 1)
+
+#ifdef CONFIG_SMP
+#define PMSAv8_RGN_SHARED	(3 << 3) // inner sharable
+#else
+#define PMSAv8_RGN_SHARED	(0 << 3)
+#endif
+
+#define PMSAv8_RGN_DEVICE_nGnRnE	0
+#define PMSAv8_RGN_NORMAL		1
+
+#define PMSAv8_MAIR(attr, mt)	((attr) << ((mt) * 8))
+
+#ifdef CONFIG_CPU_V7M
+#define PMSAv8_MINALIGN		32
+#else
+#define PMSAv8_MINALIGN		64
+#endif
 
 /* For minimal static MPU region configurations */
-#define MPU_PROBE_REGION	0
-#define MPU_BG_REGION		1
-#define MPU_RAM_REGION		2
-#define MPU_ROM_REGION		3
+#define PMSAv7_PROBE_REGION	0
+#define PMSAv7_BG_REGION	1
+#define PMSAv7_RAM_REGION	2
+#define PMSAv7_ROM_REGION	3
+
+/* Fixed for PMSAv8 only */
+#define PMSAv8_XIP_REGION	0
+#define PMSAv8_KERNEL_REGION	1
 
 /* Maximum number of regions Linux is interested in */
-#define MPU_MAX_REGIONS		16
+#define MPU_MAX_REGIONS	16
 
-#define MPU_DATA_SIDE		0
-#define MPU_INSTR_SIDE		1
+#define PMSAv7_DATA_SIDE	0
+#define PMSAv7_INSTR_SIDE	1
 
 #ifndef __ASSEMBLY__
 
 struct mpu_rgn {
 	/* Assume same attributes for d/i-side  */
-	u32 drbar;
-	u32 drsr;
-	u32 dracr;
+	union {
+		u32 drbar;   /* PMSAv7 */
+		u32 prbar;   /* PMSAv8 */
+	};
+	union {
+		u32 drsr;   /* PMSAv7 */
+		u32 prlar;  /* PMSAv8 */
+	};
+	union {
+		u32 dracr;  /* PMSAv7 */
+		u32 unused; /* not used in PMSAv8 */
+	};
 };
 
 struct mpu_rgn_info {
@@ -75,16 +116,17 @@ struct mpu_rgn_info {
 extern struct mpu_rgn_info mpu_rgn_info;
 
 #ifdef CONFIG_ARM_MPU
+extern void __init pmsav7_adjust_lowmem_bounds(void);
+extern void __init pmsav8_adjust_lowmem_bounds(void);
 
-extern void __init adjust_lowmem_bounds_mpu(void);
-extern void __init mpu_setup(void);
-
+extern void __init pmsav7_setup(void);
+extern void __init pmsav8_setup(void);
 #else
-
-static inline void adjust_lowmem_bounds_mpu(void) {}
-static inline void mpu_setup(void) {}
-
-#endif /* !CONFIG_ARM_MPU */
+static inline void pmsav7_adjust_lowmem_bounds(void) {};
+static inline void pmsav8_adjust_lowmem_bounds(void) {};
+static inline void pmsav7_setup(void) {};
+static inline void pmsav8_setup(void) {};
+#endif
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index f2e1af45bd6f..e25f4392e1b2 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -36,6 +36,10 @@ extern struct processor {
 	 * Set up any processor specifics
 	 */
 	void (*_proc_init)(void);
+	/*
+	 * Check for processor bugs
+	 */
+	void (*check_bugs)(void);
 	/*
 	 * Disable any processor specifics
 	 */
diff --git a/arch/arm/include/asm/system_misc.h b/arch/arm/include/asm/system_misc.h
index 78f6db114faf..8e76db83c498 100644
--- a/arch/arm/include/asm/system_misc.h
+++ b/arch/arm/include/asm/system_misc.h
@@ -8,6 +8,7 @@
 #include <linux/linkage.h>
 #include <linux/irqflags.h>
 #include <linux/reboot.h>
+#include <linux/percpu.h>
 
 extern void cpu_init(void);
 
@@ -15,6 +16,20 @@ void soft_restart(unsigned long);
 extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 extern void (*arm_pm_idle)(void);
 
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+typedef void (*harden_branch_predictor_fn_t)(void);
+DECLARE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn);
+static inline void harden_branch_predictor(void)
+{
+	harden_branch_predictor_fn_t fn = per_cpu(harden_branch_predictor_fn,
+						  smp_processor_id());
+	if (fn)
+		fn();
+}
+#else
+#define harden_branch_predictor() do { } while (0)
+#endif
+
 #define UDBG_UNDEFINED	(1 << 0)
 #define UDBG_SYSCALL	(1 << 1)
 #define UDBG_BADABORT	(1 << 2)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 0bf2347495f1..3d614e90c19f 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -152,7 +152,7 @@ extern int __get_user_64t_4(void *);
 #define __get_user_check(x, p)						\
 	({								\
 		unsigned long __limit = current_thread_info()->addr_limit - 1; \
-		register const typeof(*(p)) __user *__p asm("r0") = (p);\
+		register typeof(*(p)) __user *__p asm("r0") = (p);	\
 		register typeof(x) __r2 asm("r2");			\
 		register unsigned long __l asm("r1") = __limit;		\
 		register int __e asm("r0");				\
diff --git a/arch/arm/include/asm/v7m.h b/arch/arm/include/asm/v7m.h
index 634e77107425..187ccf6496ad 100644
--- a/arch/arm/include/asm/v7m.h
+++ b/arch/arm/include/asm/v7m.h
@@ -64,9 +64,17 @@
 #define MPU_CTRL_ENABLE		1
 #define MPU_CTRL_PRIVDEFENA	(1 << 2)
 
-#define MPU_RNR			0x98
-#define MPU_RBAR		0x9c
-#define MPU_RASR		0xa0
+#define PMSAv7_RNR		0x98
+#define PMSAv7_RBAR		0x9c
+#define PMSAv7_RASR		0xa0
+
+#define PMSAv8_RNR		0x98
+#define PMSAv8_RBAR		0x9c
+#define PMSAv8_RLAR		0xa0
+#define PMSAv8_RBAR_A(n)	(PMSAv8_RBAR + 8*(n))
+#define PMSAv8_RLAR_A(n)	(PMSAv8_RLAR + 8*(n))
+#define PMSAv8_MAIR0		0xc0
+#define PMSAv8_MAIR1		0xc4
 
 /* Cache opeartions */
 #define	V7M_SCB_ICIALLU		0x250	/* I-cache invalidate all to PoU */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index b59ac4bf82b8..8cad59465af3 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -31,6 +31,7 @@ else
 obj-y		+= entry-armv.o
 endif
 
+obj-$(CONFIG_MMU)		+= bugs.o
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 obj-$(CONFIG_ISA_DMA_API)	+= dma.o
 obj-$(CONFIG_FIQ)		+= fiq.o fiqasm.o
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index f369ece99958..27c5381518d8 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -194,9 +194,11 @@ int main(void)
   DEFINE(MPU_RNG_INFO_USED,	offsetof(struct mpu_rgn_info, used));
 
   DEFINE(MPU_RNG_SIZE,		sizeof(struct mpu_rgn));
-  DEFINE(MPU_RGN_DRBAR,		offsetof(struct mpu_rgn, drbar));
-  DEFINE(MPU_RGN_DRSR,		offsetof(struct mpu_rgn, drsr));
-  DEFINE(MPU_RGN_DRACR,		offsetof(struct mpu_rgn, dracr));
+  DEFINE(MPU_RGN_DRBAR,	offsetof(struct mpu_rgn, drbar));
+  DEFINE(MPU_RGN_DRSR,	offsetof(struct mpu_rgn, drsr));
+  DEFINE(MPU_RGN_DRACR,	offsetof(struct mpu_rgn, dracr));
+  DEFINE(MPU_RGN_PRBAR,	offsetof(struct mpu_rgn, prbar));
+  DEFINE(MPU_RGN_PRLAR,	offsetof(struct mpu_rgn, prlar));
 #endif
   return 0; 
 }
diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c
new file mode 100644
index 000000000000..7be511310191
--- /dev/null
+++ b/arch/arm/kernel/bugs.c
@@ -0,0 +1,18 @@
+// SPDX-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <asm/bugs.h>
+#include <asm/proc-fns.h>
+
+void check_other_bugs(void)
+{
+#ifdef MULTI_CPU
+	if (processor.check_bugs)
+		processor.check_bugs();
+#endif
+}
+
+void __init check_bugs(void)
+{
+	check_writebuffer_bugs();
+	check_other_bugs();
+}
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 3c4f88701f22..20df608bf343 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -242,9 +242,7 @@ local_restart:
 	tst	r10, #_TIF_SYSCALL_WORK		@ are we tracing syscalls?
 	bne	__sys_trace
 
-	cmp	scno, #NR_syscalls		@ check upper syscall limit
-	badr	lr, ret_fast_syscall		@ return address
-	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
+	invoke_syscall tbl, scno, r10, ret_fast_syscall
 
 	add	r1, sp, #S_OFF
 2:	cmp	scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
@@ -278,14 +276,8 @@ __sys_trace:
 	mov	r1, scno
 	add	r0, sp, #S_OFF
 	bl	syscall_trace_enter
-
-	badr	lr, __sys_trace_return		@ return address
-	mov	scno, r0			@ syscall number (possibly new)
-	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
-	cmp	scno, #NR_syscalls		@ check upper syscall limit
-	ldmccia	r1, {r0 - r6}			@ have to reload r0 - r6
-	stmccia	sp, {r4, r5}			@ and update the stack args
-	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
+	mov	scno, r0
+	invoke_syscall tbl, scno, r10, __sys_trace_return, reload=1
 	cmp	scno, #-1			@ skip the syscall?
 	bne	2b
 	add	sp, sp, #S_OFF			@ restore stack
@@ -363,6 +355,10 @@ sys_syscall:
 		bic	scno, r0, #__NR_OABI_SYSCALL_BASE
 		cmp	scno, #__NR_syscall - __NR_SYSCALL_BASE
 		cmpne	scno, #NR_syscalls	@ check range
+#ifdef CONFIG_CPU_SPECTRE
+		movhs	scno, #0
+		csdb
+#endif
 		stmloia	sp, {r5, r6}		@ shuffle args
 		movlo	r0, r1
 		movlo	r1, r2
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 0f07579af472..773424843d6e 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -378,6 +378,31 @@
 #endif
 	.endm
 
+	.macro	invoke_syscall, table, nr, tmp, ret, reload=0
+#ifdef CONFIG_CPU_SPECTRE
+	mov	\tmp, \nr
+	cmp	\tmp, #NR_syscalls		@ check upper syscall limit
+	movcs	\tmp, #0
+	csdb
+	badr	lr, \ret			@ return address
+	.if	\reload
+	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
+	ldmccia	r1, {r0 - r6}			@ reload r0-r6
+	stmccia	sp, {r4, r5}			@ update stack arguments
+	.endif
+	ldrcc	pc, [\table, \tmp, lsl #2]	@ call sys_* routine
+#else
+	cmp	\nr, #NR_syscalls		@ check upper syscall limit
+	badr	lr, \ret			@ return address
+	.if	\reload
+	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
+	ldmccia	r1, {r0 - r6}			@ reload r0-r6
+	stmccia	sp, {r4, r5}			@ update stack arguments
+	.endif
+	ldrcc	pc, [\table, \nr, lsl #2]	@ call sys_* routine
+#endif
+	.endm
+
 /*
  * These are the registers used in the syscall handler, and allow us to
  * have in theory up to 7 arguments to a function - r0 to r6.
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 2e38f85b757a..dd546d65a383 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -68,14 +68,6 @@ ENTRY(stext)
 	beq	__error_p				@ yes, error 'p'
 
 #ifdef CONFIG_ARM_MPU
-	/* Calculate the size of a region covering just the kernel */
-	ldr	r5, =PLAT_PHYS_OFFSET		@ Region start: PHYS_OFFSET
-	ldr     r6, =(_end)			@ Cover whole kernel
-	sub	r6, r6, r5			@ Minimum size of region to map
-	clz	r6, r6				@ Region size must be 2^N...
-	rsb	r6, r6, #31			@ ...so round up region size
-	lsl	r6, r6, #MPU_RSR_SZ		@ Put size in right field
-	orr	r6, r6, #(1 << MPU_RSR_EN)	@ Set region enabled bit
 	bl	__setup_mpu
 #endif
 
@@ -83,8 +75,8 @@ ENTRY(stext)
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10
 	ret	r12
-1:	bl	__after_proc_init
-	b	__mmap_switched
+1:	ldr	lr, =__mmap_switched
+	b	__after_proc_init
 ENDPROC(stext)
 
 #ifdef CONFIG_SMP
@@ -110,8 +102,6 @@ ENTRY(secondary_startup)
 	ldr	r7, __secondary_data
 
 #ifdef CONFIG_ARM_MPU
-	/* Use MPU region info supplied by __cpu_up */
-	ldr	r6, [r7]			@ get secondary_data.mpu_rgn_info
 	bl      __secondary_setup_mpu		@ Initialize the MPU
 #endif
 
@@ -133,12 +123,45 @@ __secondary_data:
 /*
  * Set the Control Register and Read the process ID.
  */
+	.text
 __after_proc_init:
+#ifdef CONFIG_ARM_MPU
+M_CLASS(movw	r12, #:lower16:BASEADDR_V7M_SCB)
+M_CLASS(movt	r12, #:upper16:BASEADDR_V7M_SCB)
+M_CLASS(ldr	r3, [r12, 0x50])
+AR_CLASS(mrc	p15, 0, r3, c0, c1, 4)          @ Read ID_MMFR0
+	and	r3, r3, #(MMFR0_PMSA)           @ PMSA field
+	teq	r3, #(MMFR0_PMSAv7)             @ PMSA v7
+	beq	1f
+	teq	r3, #(MMFR0_PMSAv8)		@ PMSA v8
+	/*
+	 * Memory region attributes for PMSAv8:
+	 *
+	 *   n = AttrIndx[2:0]
+	 *                      n       MAIR
+	 *   DEVICE_nGnRnE      000     00000000
+	 *   NORMAL             001     11111111
+	 */
+	ldreq	r3, =PMSAv8_MAIR(0x00, PMSAv8_RGN_DEVICE_nGnRnE) | \
+		     PMSAv8_MAIR(0xff, PMSAv8_RGN_NORMAL)
+AR_CLASS(mcreq	p15, 0, r3, c10, c2, 0)		@ MAIR 0
+M_CLASS(streq	r3, [r12, #PMSAv8_MAIR0])
+	moveq	r3, #0
+AR_CLASS(mcreq	p15, 0, r3, c10, c2, 1)		@ MAIR 1
+M_CLASS(streq	r3, [r12, #PMSAv8_MAIR1])
+
+1:
+#endif
 #ifdef CONFIG_CPU_CP15
 	/*
 	 * CP15 system control register value returned in r0 from
 	 * the CPU init function.
 	 */
+
+#ifdef CONFIG_ARM_MPU
+	biceq	r0, r0, #CR_BR			@ Disable the 'default mem-map'
+	orreq	r0, r0, #CR_M			@ Set SCTRL.M (MPU on)
+#endif
 #if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
 	orr	r0, r0, #CR_A
 #else
@@ -154,7 +177,15 @@ __after_proc_init:
 	bic	r0, r0, #CR_I
 #endif
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
+	isb
 #elif defined (CONFIG_CPU_V7M)
+#ifdef CONFIG_ARM_MPU
+	ldreq	r3, [r12, MPU_CTRL]
+	biceq	r3, #MPU_CTRL_PRIVDEFENA
+	orreq	r3, #MPU_CTRL_ENABLE
+	streq	r3, [r12, MPU_CTRL]
+	isb
+#endif
 	/* For V7M systems we want to modify the CCR similarly to the SCTLR */
 #ifdef CONFIG_CPU_DCACHE_DISABLE
 	bic	r0, r0, #V7M_SCB_CCR_DC
@@ -165,9 +196,7 @@ __after_proc_init:
 #ifdef CONFIG_CPU_ICACHE_DISABLE
 	bic	r0, r0, #V7M_SCB_CCR_IC
 #endif
-	movw	r3, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
-	movt	r3, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
-	str	r0, [r3]
+	str	r0, [r12, V7M_SCB_CCR]
 #endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */
 	ret	lr
 ENDPROC(__after_proc_init)
@@ -184,7 +213,7 @@ ENDPROC(__after_proc_init)
 .endm
 
 /* Setup a single MPU region, either D or I side (D-side for unified) */
-.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE, unused
+.macro setup_region bar, acr, sr, side = PMSAv7_DATA_SIDE, unused
 	mcr	p15, 0, \bar, c6, c1, (0 + \side)	@ I/DRBAR
 	mcr	p15, 0, \acr, c6, c1, (4 + \side)	@ I/DRACR
 	mcr	p15, 0, \sr, c6, c1, (2 + \side)		@ I/DRSR
@@ -192,14 +221,14 @@ ENDPROC(__after_proc_init)
 #else
 .macro set_region_nr tmp, rgnr, base
 	mov	\tmp, \rgnr
-	str     \tmp, [\base, #MPU_RNR]
+	str     \tmp, [\base, #PMSAv7_RNR]
 .endm
 
 .macro setup_region bar, acr, sr, unused, base
 	lsl     \acr, \acr, #16
 	orr     \acr, \acr, \sr
-	str     \bar, [\base, #MPU_RBAR]
-	str     \acr, [\base, #MPU_RASR]
+	str     \bar, [\base, #PMSAv7_RBAR]
+	str     \acr, [\base, #PMSAv7_RASR]
 .endm
 
 #endif
@@ -210,8 +239,9 @@ ENDPROC(__after_proc_init)
  * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6
  * Region 3: Normal, shared, inaccessible from PL0 to protect the vectors page
  *
- * r6: Value to be written to DRSR (and IRSR if required) for MPU_RAM_REGION
+ * r6: Value to be written to DRSR (and IRSR if required) for PMSAv7_RAM_REGION
 */
+	__HEAD
 
 ENTRY(__setup_mpu)
 
@@ -223,7 +253,22 @@ AR_CLASS(mrc	p15, 0, r0, c0, c1, 4)		@ Read ID_MMFR0
 M_CLASS(ldr	r0, [r12, 0x50])
 	and	r0, r0, #(MMFR0_PMSA)		@ PMSA field
 	teq	r0, #(MMFR0_PMSAv7)		@ PMSA v7
-	bxne	lr
+	beq	__setup_pmsa_v7
+	teq	r0, #(MMFR0_PMSAv8)		@ PMSA v8
+	beq	__setup_pmsa_v8
+
+	ret	lr
+ENDPROC(__setup_mpu)
+
+ENTRY(__setup_pmsa_v7)
+	/* Calculate the size of a region covering just the kernel */
+	ldr	r5, =PLAT_PHYS_OFFSET		@ Region start: PHYS_OFFSET
+	ldr     r6, =(_end)			@ Cover whole kernel
+	sub	r6, r6, r5			@ Minimum size of region to map
+	clz	r6, r6				@ Region size must be 2^N...
+	rsb	r6, r6, #31			@ ...so round up region size
+	lsl	r6, r6, #PMSAv7_RSR_SZ		@ Put size in right field
+	orr	r6, r6, #(1 << PMSAv7_RSR_EN)	@ Set region enabled bit
 
 	/* Determine whether the D/I-side memory map is unified. We set the
 	 * flags here and continue to use them for the rest of this function */
@@ -234,77 +279,189 @@ M_CLASS(ldr    r0, [r12, #MPU_TYPE])
 	tst	r0, #MPUIR_nU			@ MPUIR_nU = 0 for unified
 
 	/* Setup second region first to free up r6 */
-	set_region_nr r0, #MPU_RAM_REGION, r12
+	set_region_nr r0, #PMSAv7_RAM_REGION, r12
 	isb
 	/* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */
 	ldr	r0, =PLAT_PHYS_OFFSET		@ RAM starts at PHYS_OFFSET
-	ldr	r5,=(MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL)
+	ldr	r5,=(PMSAv7_AP_PL1RW_PL0RW | PMSAv7_RGN_NORMAL)
 
-	setup_region r0, r5, r6, MPU_DATA_SIDE, r12	@ PHYS_OFFSET, shared, enabled
+	setup_region r0, r5, r6, PMSAv7_DATA_SIDE, r12	@ PHYS_OFFSET, shared, enabled
 	beq	1f					@ Memory-map not unified
-	setup_region r0, r5, r6, MPU_INSTR_SIDE, r12	@ PHYS_OFFSET, shared, enabled
+	setup_region r0, r5, r6, PMSAv7_INSTR_SIDE, r12	@ PHYS_OFFSET, shared, enabled
 1:	isb
 
 	/* First/background region */
-	set_region_nr r0, #MPU_BG_REGION, r12
+	set_region_nr r0, #PMSAv7_BG_REGION, r12
 	isb
 	/* Execute Never,  strongly ordered, inaccessible to PL0, rw PL1  */
 	mov	r0, #0				@ BG region starts at 0x0
-	ldr	r5,=(MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA)
-	mov	r6, #MPU_RSR_ALL_MEM		@ 4GB region, enabled
+	ldr	r5,=(PMSAv7_ACR_XN | PMSAv7_RGN_STRONGLY_ORDERED | PMSAv7_AP_PL1RW_PL0NA)
+	mov	r6, #PMSAv7_RSR_ALL_MEM		@ 4GB region, enabled
 
-	setup_region r0, r5, r6, MPU_DATA_SIDE, r12	@ 0x0, BG region, enabled
+	setup_region r0, r5, r6, PMSAv7_DATA_SIDE, r12	@ 0x0, BG region, enabled
 	beq	2f					@ Memory-map not unified
-	setup_region r0, r5, r6, MPU_INSTR_SIDE r12	@ 0x0, BG region, enabled
+	setup_region r0, r5, r6, PMSAv7_INSTR_SIDE r12	@ 0x0, BG region, enabled
 2:	isb
 
 #ifdef CONFIG_XIP_KERNEL
-	set_region_nr r0, #MPU_ROM_REGION, r12
+	set_region_nr r0, #PMSAv7_ROM_REGION, r12
 	isb
 
-	ldr	r5,=(MPU_AP_PL1RO_PL0NA | MPU_RGN_NORMAL)
+	ldr	r5,=(PMSAv7_AP_PL1RO_PL0NA | PMSAv7_RGN_NORMAL)
 
 	ldr	r0, =CONFIG_XIP_PHYS_ADDR		@ ROM start
 	ldr     r6, =(_exiprom)				@ ROM end
 	sub	r6, r6, r0				@ Minimum size of region to map
 	clz	r6, r6					@ Region size must be 2^N...
 	rsb	r6, r6, #31				@ ...so round up region size
-	lsl	r6, r6, #MPU_RSR_SZ			@ Put size in right field
-	orr	r6, r6, #(1 << MPU_RSR_EN)		@ Set region enabled bit
+	lsl	r6, r6, #PMSAv7_RSR_SZ			@ Put size in right field
+	orr	r6, r6, #(1 << PMSAv7_RSR_EN)		@ Set region enabled bit
 
-	setup_region r0, r5, r6, MPU_DATA_SIDE, r12	@ XIP_PHYS_ADDR, shared, enabled
+	setup_region r0, r5, r6, PMSAv7_DATA_SIDE, r12	@ XIP_PHYS_ADDR, shared, enabled
 	beq	3f					@ Memory-map not unified
-	setup_region r0, r5, r6, MPU_INSTR_SIDE, r12	@ XIP_PHYS_ADDR, shared, enabled
+	setup_region r0, r5, r6, PMSAv7_INSTR_SIDE, r12	@ XIP_PHYS_ADDR, shared, enabled
 3:	isb
 #endif
+	ret	lr
+ENDPROC(__setup_pmsa_v7)
 
-	/* Enable the MPU */
-AR_CLASS(mrc	p15, 0, r0, c1, c0, 0)		@ Read SCTLR
-AR_CLASS(bic	r0, r0, #CR_BR)			@ Disable the 'default mem-map'
-AR_CLASS(orr	r0, r0, #CR_M)			@ Set SCTRL.M (MPU on)
-AR_CLASS(mcr	p15, 0, r0, c1, c0, 0)		@ Enable MPU
-
-M_CLASS(ldr	r0, [r12, #MPU_CTRL])
-M_CLASS(bic	r0, #MPU_CTRL_PRIVDEFENA)
-M_CLASS(orr	r0, #MPU_CTRL_ENABLE)
-M_CLASS(str	r0, [r12, #MPU_CTRL])
+ENTRY(__setup_pmsa_v8)
+	mov	r0, #0
+AR_CLASS(mcr	p15, 0, r0, c6, c2, 1)		@ PRSEL
+M_CLASS(str	r0, [r12, #PMSAv8_RNR])
 	isb
 
+#ifdef CONFIG_XIP_KERNEL
+	ldr	r5, =CONFIG_XIP_PHYS_ADDR		@ ROM start
+	ldr     r6, =(_exiprom)				@ ROM end
+	sub	r6, r6, #1
+	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
+
+	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED)
+	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN)
+
+AR_CLASS(mcr	p15, 0, r5, c6, c8, 0)			@ PRBAR0
+AR_CLASS(mcr	p15, 0, r6, c6, c8, 1)			@ PRLAR0
+M_CLASS(str	r5, [r12, #PMSAv8_RBAR_A(0)])
+M_CLASS(str	r6, [r12, #PMSAv8_RLAR_A(0)])
+#endif
+
+	ldr	r5, =KERNEL_START
+	ldr	r6, =KERNEL_END
+	sub	r6, r6, #1
+	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
+
+	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED)
+	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN)
+
+AR_CLASS(mcr	p15, 0, r5, c6, c8, 4)			@ PRBAR1
+AR_CLASS(mcr	p15, 0, r6, c6, c8, 5)			@ PRLAR1
+M_CLASS(str	r5, [r12, #PMSAv8_RBAR_A(1)])
+M_CLASS(str	r6, [r12, #PMSAv8_RLAR_A(1)])
+
+	/* Setup Background: 0x0 - min(KERNEL_START, XIP_PHYS_ADDR) */
+#ifdef CONFIG_XIP_KERNEL
+	ldr	r6, =KERNEL_START
+	ldr	r5, =CONFIG_XIP_PHYS_ADDR
+	cmp	r6, r5
+	movcs	r6, r5
+#else
+	ldr	r6, =KERNEL_START
+#endif
+	cmp	r6, #0
+	beq	1f
+
+	mov	r5, #0
+	sub	r6, r6, #1
+	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
+
+	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN)
+	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN)
+
+AR_CLASS(mcr	p15, 0, r5, c6, c9, 0)			@ PRBAR2
+AR_CLASS(mcr	p15, 0, r6, c6, c9, 1)			@ PRLAR2
+M_CLASS(str	r5, [r12, #PMSAv8_RBAR_A(2)])
+M_CLASS(str	r6, [r12, #PMSAv8_RLAR_A(2)])
+
+1:
+	/* Setup Background: max(KERNEL_END, _exiprom) - 0xffffffff */
+#ifdef CONFIG_XIP_KERNEL
+	ldr	r5, =KERNEL_END
+	ldr	r6, =(_exiprom)
+	cmp	r5, r6
+	movcc	r5, r6
+#else
+	ldr	r5, =KERNEL_END
+#endif
+	mov	r6, #0xffffffff
+	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
+
+	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN)
+	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN)
+
+AR_CLASS(mcr	p15, 0, r5, c6, c9, 4)			@ PRBAR3
+AR_CLASS(mcr	p15, 0, r6, c6, c9, 5)			@ PRLAR3
+M_CLASS(str	r5, [r12, #PMSAv8_RBAR_A(3)])
+M_CLASS(str	r6, [r12, #PMSAv8_RLAR_A(3)])
+
+#ifdef CONFIG_XIP_KERNEL
+	/* Setup Background: min(_exiprom, KERNEL_END) - max(KERNEL_START, XIP_PHYS_ADDR) */
+	ldr	r5, =(_exiprom)
+	ldr	r6, =KERNEL_END
+	cmp	r5, r6
+	movcs	r5, r6
+
+	ldr	r6, =KERNEL_START
+	ldr	r0, =CONFIG_XIP_PHYS_ADDR
+	cmp	r6, r0
+	movcc	r6, r0
+
+	sub	r6, r6, #1
+	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
+
+	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN)
+	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN)
+
+#ifdef CONFIG_CPU_V7M
+	/* There is no alias for n == 4 */
+	mov	r0, #4
+	str	r0, [r12, #PMSAv8_RNR]			@ PRSEL
+	isb
+
+	str	r5, [r12, #PMSAv8_RBAR_A(0)]
+	str	r6, [r12, #PMSAv8_RLAR_A(0)]
+#else
+	mcr	p15, 0, r5, c6, c10, 1			@ PRBAR4
+	mcr	p15, 0, r6, c6, c10, 2			@ PRLAR4
+#endif
+#endif
 	ret	lr
-ENDPROC(__setup_mpu)
+ENDPROC(__setup_pmsa_v8)
 
 #ifdef CONFIG_SMP
 /*
  * r6: pointer at mpu_rgn_info
  */
 
+	.text
 ENTRY(__secondary_setup_mpu)
+	/* Use MPU region info supplied by __cpu_up */
+	ldr	r6, [r7]			@ get secondary_data.mpu_rgn_info
+
 	/* Probe for v7 PMSA compliance */
 	mrc	p15, 0, r0, c0, c1, 4		@ Read ID_MMFR0
 	and	r0, r0, #(MMFR0_PMSA)		@ PMSA field
 	teq	r0, #(MMFR0_PMSAv7)		@ PMSA v7
-	bne	__error_p
+	beq	__secondary_setup_pmsa_v7
+	teq	r0, #(MMFR0_PMSAv8)		@ PMSA v8
+	beq	__secondary_setup_pmsa_v8
+	b	__error_p
+ENDPROC(__secondary_setup_mpu)
 
+/*
+ * r6: pointer at mpu_rgn_info
+ */
+ENTRY(__secondary_setup_pmsa_v7)
 	/* Determine whether the D/I-side memory map is unified. We set the
 	 * flags here and continue to use them for the rest of this function */
 	mrc	p15, 0, r0, c0, c0, 4		@ MPUIR
@@ -328,25 +485,45 @@ ENTRY(__secondary_setup_mpu)
 	ldr	r6, [r3, #MPU_RGN_DRSR]
 	ldr	r5, [r3, #MPU_RGN_DRACR]
 
-	setup_region r0, r5, r6, MPU_DATA_SIDE
+	setup_region r0, r5, r6, PMSAv7_DATA_SIDE
 	beq	2f
-	setup_region r0, r5, r6, MPU_INSTR_SIDE
+	setup_region r0, r5, r6, PMSAv7_INSTR_SIDE
 2:	isb
 
 	mrc	p15, 0, r0, c0, c0, 4		@ Reevaluate the MPUIR
 	cmp	r4, #0
 	bgt	1b
 
-	/* Enable the MPU */
-	mrc	p15, 0, r0, c1, c0, 0		@ Read SCTLR
-	bic	r0, r0, #CR_BR			@ Disable the 'default mem-map'
-	orr	r0, r0, #CR_M			@ Set SCTRL.M (MPU on)
-	mcr	p15, 0, r0, c1, c0, 0		@ Enable MPU
+	ret	lr
+ENDPROC(__secondary_setup_pmsa_v7)
+
+ENTRY(__secondary_setup_pmsa_v8)
+	ldr	r4, [r6, #MPU_RNG_INFO_USED]
+#ifndef CONFIG_XIP_KERNEL
+	add	r4, r4, #1
+#endif
+	mov	r5, #MPU_RNG_SIZE
+	add	r3, r6, #MPU_RNG_INFO_RNGS
+	mla	r3, r4, r5, r3
+
+1:
+	sub	r3, r3, #MPU_RNG_SIZE
+	sub	r4, r4, #1
+
+	mcr	p15, 0, r4, c6, c2, 1		@ PRSEL
 	isb
 
-	ret	lr
-ENDPROC(__secondary_setup_mpu)
+	ldr	r5, [r3, #MPU_RGN_PRBAR]
+	ldr	r6, [r3, #MPU_RGN_PRLAR]
 
+	mcr	p15, 0, r5, c6, c3, 0		@ PRBAR
+	mcr	p15, 0, r6, c6, c3, 1           @ PRLAR
+
+	cmp	r4, #0
+	bgt	1b
+
+	ret	lr
+ENDPROC(__secondary_setup_pmsa_v8)
 #endif /* CONFIG_SMP */
 #endif /* CONFIG_ARM_MPU */
 #include "head-common.S"
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 2da087926ebe..0978282d5fc2 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -31,6 +31,7 @@
 #include <linux/irq_work.h>
 
 #include <linux/atomic.h>
+#include <asm/bugs.h>
 #include <asm/smp.h>
 #include <asm/cacheflush.h>
 #include <asm/cpu.h>
@@ -236,8 +237,6 @@ int __cpu_disable(void)
 	flush_cache_louis();
 	local_flush_tlb_all();
 
-	clear_tasks_mm_cpumask(cpu);
-
 	return 0;
 }
 
@@ -255,6 +254,7 @@ void __cpu_die(unsigned int cpu)
 	}
 	pr_debug("CPU%u: shutdown\n", cpu);
 
+	clear_tasks_mm_cpumask(cpu);
 	/*
 	 * platform_cpu_kill() is generally expected to do the powering off
 	 * and/or cutting of clocks to the dying CPU.  Optionally, this may
@@ -405,6 +405,9 @@ asmlinkage void secondary_start_kernel(void)
 	 * before we continue - which happens after __cpu_up returns.
 	 */
 	set_cpu_online(cpu, true);
+
+	check_other_bugs();
+
 	complete(&cpu_running);
 
 	local_irq_enable();
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index a40ebb7c0896..d08099269e35 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -3,6 +3,7 @@
 #include <linux/slab.h>
 #include <linux/mm_types.h>
 
+#include <asm/bugs.h>
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
 #include <asm/pgalloc.h>
@@ -36,6 +37,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 		cpu_switch_mm(mm->pgd, mm);
 		local_flush_bp_all();
 		local_flush_tlb_all();
+		check_other_bugs();
 	}
 
 	return ret;
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index d32f5d35f602..3593d5c1acd2 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -13,6 +13,7 @@
 #include <asm/cache.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
+#include <asm/mpu.h>
 #include <asm/page.h>
 
 #include "vmlinux.lds.h"
@@ -148,6 +149,9 @@ SECTIONS
 	__init_end = .;
 
 	BSS_SECTION(0, 0, 8)
+#ifdef CONFIG_ARM_MPU
+	. = ALIGN(PMSAv8_MINALIGN);
+#endif
 	_end = .;
 
 	STABS_DEBUG
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index b77dc675ae55..23150c0f0f4d 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -12,6 +12,7 @@
 #include <asm/cache.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
+#include <asm/mpu.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
@@ -54,6 +55,9 @@ SECTIONS
 	. = ALIGN(1<<SECTION_SHIFT);
 #endif
 
+#ifdef CONFIG_ARM_MPU
+	. = ALIGN(PMSAv8_MINALIGN);
+#endif
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
 		ARM_TEXT
@@ -143,6 +147,9 @@ SECTIONS
 	_edata = .;
 
 	BSS_SECTION(0, 0, 0)
+#ifdef CONFIG_ARM_MPU
+	. = ALIGN(PMSAv8_MINALIGN);
+#endif
 	_end = .;
 
 	STABS_DEBUG
diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h
index 71281e08e1d4..ae5fdff18406 100644
--- a/arch/arm/kernel/vmlinux.lds.h
+++ b/arch/arm/kernel/vmlinux.lds.h
@@ -27,24 +27,24 @@
 
 #define PROC_INFO							\
 		. = ALIGN(4);						\
-		VMLINUX_SYMBOL(__proc_info_begin) = .;			\
+		__proc_info_begin = .;					\
 		*(.proc.info.init)					\
-		VMLINUX_SYMBOL(__proc_info_end) = .;
+		__proc_info_end = .;
 
 #define HYPERVISOR_TEXT							\
-		VMLINUX_SYMBOL(__hyp_text_start) = .;			\
+		__hyp_text_start = .;					\
 		*(.hyp.text)						\
-		VMLINUX_SYMBOL(__hyp_text_end) = .;
+		__hyp_text_end = .;
 
 #define IDMAP_TEXT							\
 		ALIGN_FUNCTION();					\
-		VMLINUX_SYMBOL(__idmap_text_start) = .;			\
+		__idmap_text_start = .;					\
 		*(.idmap.text)						\
-		VMLINUX_SYMBOL(__idmap_text_end) = .;			\
+		__idmap_text_end = .;					\
 		. = ALIGN(PAGE_SIZE);					\
-		VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;		\
+		__hyp_idmap_text_start = .;				\
 		*(.hyp.idmap.text)					\
-		VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
+		__hyp_idmap_text_end = .;
 
 #define ARM_DISCARD							\
 		*(.ARM.exidx.exit.text)					\
diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S
index 95a2faefc070..aa3f9a9837ac 100644
--- a/arch/arm/kvm/hyp/hyp-entry.S
+++ b/arch/arm/kvm/hyp/hyp-entry.S
@@ -16,6 +16,7 @@
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
+#include <linux/arm-smccc.h>
 #include <linux/linkage.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
@@ -71,6 +72,90 @@ __kvm_hyp_vector:
 	W(b)	hyp_irq
 	W(b)	hyp_fiq
 
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	.align 5
+__kvm_hyp_vector_ic_inv:
+	.global __kvm_hyp_vector_ic_inv
+
+	/*
+	 * We encode the exception entry in the bottom 3 bits of
+	 * SP, and we have to guarantee to be 8 bytes aligned.
+	 */
+	W(add)	sp, sp, #1	/* Reset 	  7 */
+	W(add)	sp, sp, #1	/* Undef	  6 */
+	W(add)	sp, sp, #1	/* Syscall	  5 */
+	W(add)	sp, sp, #1	/* Prefetch abort 4 */
+	W(add)	sp, sp, #1	/* Data abort	  3 */
+	W(add)	sp, sp, #1	/* HVC		  2 */
+	W(add)	sp, sp, #1	/* IRQ		  1 */
+	W(nop)			/* FIQ		  0 */
+
+	mcr	p15, 0, r0, c7, c5, 0	/* ICIALLU */
+	isb
+
+	b	decode_vectors
+
+	.align 5
+__kvm_hyp_vector_bp_inv:
+	.global __kvm_hyp_vector_bp_inv
+
+	/*
+	 * We encode the exception entry in the bottom 3 bits of
+	 * SP, and we have to guarantee to be 8 bytes aligned.
+	 */
+	W(add)	sp, sp, #1	/* Reset 	  7 */
+	W(add)	sp, sp, #1	/* Undef	  6 */
+	W(add)	sp, sp, #1	/* Syscall	  5 */
+	W(add)	sp, sp, #1	/* Prefetch abort 4 */
+	W(add)	sp, sp, #1	/* Data abort	  3 */
+	W(add)	sp, sp, #1	/* HVC		  2 */
+	W(add)	sp, sp, #1	/* IRQ		  1 */
+	W(nop)			/* FIQ		  0 */
+
+	mcr	p15, 0, r0, c7, c5, 6	/* BPIALL */
+	isb
+
+decode_vectors:
+
+#ifdef CONFIG_THUMB2_KERNEL
+	/*
+	 * Yet another silly hack: Use VPIDR as a temp register.
+	 * Thumb2 is really a pain, as SP cannot be used with most
+	 * of the bitwise instructions. The vect_br macro ensures
+	 * things gets cleaned-up.
+	 */
+	mcr	p15, 4, r0, c0, c0, 0	/* VPIDR */
+	mov	r0, sp
+	and	r0, r0, #7
+	sub	sp, sp, r0
+	push	{r1, r2}
+	mov	r1, r0
+	mrc	p15, 4, r0, c0, c0, 0	/* VPIDR */
+	mrc	p15, 0, r2, c0, c0, 0	/* MIDR  */
+	mcr	p15, 4, r2, c0, c0, 0	/* VPIDR */
+#endif
+
+.macro vect_br val, targ
+ARM(	eor	sp, sp, #\val	)
+ARM(	tst	sp, #7		)
+ARM(	eorne	sp, sp, #\val	)
+
+THUMB(	cmp	r1, #\val	)
+THUMB(	popeq	{r1, r2}	)
+
+	beq	\targ
+.endm
+
+	vect_br	0, hyp_fiq
+	vect_br	1, hyp_irq
+	vect_br	2, hyp_hvc
+	vect_br	3, hyp_dabt
+	vect_br	4, hyp_pabt
+	vect_br	5, hyp_svc
+	vect_br	6, hyp_undef
+	vect_br	7, hyp_reset
+#endif
+
 .macro invalid_vector label, cause
 	.align
 \label:	mov	r0, #\cause
@@ -118,7 +203,7 @@ hyp_hvc:
 	lsr     r2, r2, #16
 	and     r2, r2, #0xff
 	cmp     r2, #0
-	bne	guest_trap		@ Guest called HVC
+	bne	guest_hvc_trap		@ Guest called HVC
 
 	/*
 	 * Getting here means host called HVC, we shift parameters and branch
@@ -149,7 +234,14 @@ hyp_hvc:
 	bx	ip
 
 1:
-	push	{lr}
+	/*
+	 * Pushing r2 here is just a way of keeping the stack aligned to
+	 * 8 bytes on any path that can trigger a HYP exception. Here,
+	 * we may well be about to jump into the guest, and the guest
+	 * exit would otherwise be badly decoded by our fancy
+	 * "decode-exception-without-a-branch" code...
+	 */
+	push	{r2, lr}
 
 	mov	lr, r0
 	mov	r0, r1
@@ -159,7 +251,21 @@ hyp_hvc:
 THUMB(	orr	lr, #1)
 	blx	lr			@ Call the HYP function
 
-	pop	{lr}
+	pop	{r2, lr}
+	eret
+
+guest_hvc_trap:
+	movw	r2, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
+	movt	r2, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
+	ldr	r0, [sp]		@ Guest's r0
+	teq	r0, r2
+	bne	guest_trap
+	add	sp, sp, #12
+	@ Returns:
+	@ r0 = 0
+	@ r1 = HSR value (perfectly predictable)
+	@ r2 = ARM_SMCCC_ARCH_WORKAROUND_1
+	mov	r0, #0
 	eret
 
 guest_trap:
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 5a016bc80e26..96a7b6cf459b 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -415,6 +415,7 @@ config CPU_V7
 	select CPU_CP15_MPU if !MMU
 	select CPU_HAS_ASID if MMU
 	select CPU_PABRT_V7
+	select CPU_SPECTRE if MMU
 	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V7 if MMU
 
@@ -821,6 +822,28 @@ config CPU_BPREDICT_DISABLE
 	help
 	  Say Y here to disable branch prediction.  If unsure, say N.
 
+config CPU_SPECTRE
+	bool
+
+config HARDEN_BRANCH_PREDICTOR
+	bool "Harden the branch predictor against aliasing attacks" if EXPERT
+	depends on CPU_SPECTRE
+	default y
+	help
+	   Speculation attacks against some high-performance processors rely
+	   on being able to manipulate the branch predictor for a victim
+	   context by executing aliasing branches in the attacker context.
+	   Such attacks can be partially mitigated against by clearing
+	   internal branch predictor state and limiting the prediction
+	   logic in some situations.
+
+	   This config option will take CPU-specific actions to harden
+	   the branch predictor against aliasing attacks and may rely on
+	   specific instruction sequences or control bits being set by
+	   the system firmware.
+
+	   If unsure, say Y.
+
 config TLS_REG_EMUL
 	bool
 	select NEED_KUSER_HELPERS
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 9dbb84923e12..7cb1699fbfc4 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_MMU)		+= fault-armv.o flush.o idmap.o ioremap.o \
 
 ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
-obj-$(CONFIG_ARM_MPU)		+= pmsa-v7.o
+obj-$(CONFIG_ARM_MPU)		+= pmsa-v7.o pmsa-v8.o
 endif
 
 obj-$(CONFIG_ARM_PTDUMP_CORE)	+= dump.o
@@ -97,7 +97,7 @@ obj-$(CONFIG_CPU_MOHAWK)	+= proc-mohawk.o
 obj-$(CONFIG_CPU_FEROCEON)	+= proc-feroceon.o
 obj-$(CONFIG_CPU_V6)		+= proc-v6.o
 obj-$(CONFIG_CPU_V6K)		+= proc-v6.o
-obj-$(CONFIG_CPU_V7)		+= proc-v7.o
+obj-$(CONFIG_CPU_V7)		+= proc-v7.o proc-v7-bugs.o
 obj-$(CONFIG_CPU_V7M)		+= proc-v7m.o
 
 AFLAGS_proc-v6.o	:=-Wa,-march=armv6
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 4b6613b5e042..af27f1c22d93 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -831,7 +831,7 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		 unsigned long attrs)
 {
 	int ret;
-	unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long nr_vma_pages = vma_pages(vma);
 	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	unsigned long pfn = dma_to_pfn(dev, dma_addr);
 	unsigned long off = vma->vm_pgoff;
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 32034543f49c..84becc911ee3 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -163,6 +163,9 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr,
 {
 	struct siginfo si;
 
+	if (addr > TASK_SIZE)
+		harden_branch_predictor();
+
 	clear_siginfo(&si);
 
 #ifdef CONFIG_DEBUG_USER
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 7c087961b7ce..5dd6c58d653b 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -99,6 +99,38 @@ void __init arm_mm_memblock_reserve(void)
 	memblock_reserve(0, 1);
 }
 
+static void __init adjust_lowmem_bounds_mpu(void)
+{
+	unsigned long pmsa = read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA;
+
+	switch (pmsa) {
+	case MMFR0_PMSAv7:
+		pmsav7_adjust_lowmem_bounds();
+		break;
+	case MMFR0_PMSAv8:
+		pmsav8_adjust_lowmem_bounds();
+		break;
+	default:
+		break;
+	}
+}
+
+static void __init mpu_setup(void)
+{
+	unsigned long pmsa = read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA;
+
+	switch (pmsa) {
+	case MMFR0_PMSAv7:
+		pmsav7_setup();
+		break;
+	case MMFR0_PMSAv8:
+		pmsav8_setup();
+		break;
+	default:
+		break;
+	}
+}
+
 void __init adjust_lowmem_bounds(void)
 {
 	phys_addr_t end;
diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c
index e2853bfff74e..699fa2e88725 100644
--- a/arch/arm/mm/pmsa-v7.c
+++ b/arch/arm/mm/pmsa-v7.c
@@ -102,7 +102,7 @@ static inline u32 irbar_read(void)
 
 static inline void rgnr_write(u32 v)
 {
-	writel_relaxed(v, BASEADDR_V7M_SCB + MPU_RNR);
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv7_RNR);
 }
 
 /* Data-side / unified region attributes */
@@ -110,28 +110,28 @@ static inline void rgnr_write(u32 v)
 /* Region access control register */
 static inline void dracr_write(u32 v)
 {
-	u32 rsr = readl_relaxed(BASEADDR_V7M_SCB + MPU_RASR) & GENMASK(15, 0);
+	u32 rsr = readl_relaxed(BASEADDR_V7M_SCB + PMSAv7_RASR) & GENMASK(15, 0);
 
-	writel_relaxed((v << 16) | rsr, BASEADDR_V7M_SCB + MPU_RASR);
+	writel_relaxed((v << 16) | rsr, BASEADDR_V7M_SCB + PMSAv7_RASR);
 }
 
 /* Region size register */
 static inline void drsr_write(u32 v)
 {
-	u32 racr = readl_relaxed(BASEADDR_V7M_SCB + MPU_RASR) & GENMASK(31, 16);
+	u32 racr = readl_relaxed(BASEADDR_V7M_SCB + PMSAv7_RASR) & GENMASK(31, 16);
 
-	writel_relaxed(v | racr, BASEADDR_V7M_SCB + MPU_RASR);
+	writel_relaxed(v | racr, BASEADDR_V7M_SCB + PMSAv7_RASR);
 }
 
 /* Region base address register */
 static inline void drbar_write(u32 v)
 {
-	writel_relaxed(v, BASEADDR_V7M_SCB + MPU_RBAR);
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv7_RBAR);
 }
 
 static inline u32 drbar_read(void)
 {
-	return readl_relaxed(BASEADDR_V7M_SCB + MPU_RBAR);
+	return readl_relaxed(BASEADDR_V7M_SCB + PMSAv7_RBAR);
 }
 
 /* ARMv7-M only supports a unified MPU, so I-side operations are nop */
@@ -143,11 +143,6 @@ static inline unsigned long irbar_read(void) {return 0;}
 
 #endif
 
-static int __init mpu_present(void)
-{
-	return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7);
-}
-
 static bool __init try_split_region(phys_addr_t base, phys_addr_t size, struct region *region)
 {
 	unsigned long  subreg, bslots, sslots;
@@ -161,7 +156,7 @@ static bool __init try_split_region(phys_addr_t base, phys_addr_t size, struct r
 
 	bdiff = base - abase;
 	sdiff = p2size - asize;
-	subreg = p2size / MPU_NR_SUBREGS;
+	subreg = p2size / PMSAv7_NR_SUBREGS;
 
 	if ((bdiff % subreg) || (sdiff % subreg))
 		return false;
@@ -172,17 +167,17 @@ static bool __init try_split_region(phys_addr_t base, phys_addr_t size, struct r
 	if (bslots || sslots) {
 		int i;
 
-		if (subreg < MPU_MIN_SUBREG_SIZE)
+		if (subreg < PMSAv7_MIN_SUBREG_SIZE)
 			return false;
 
-		if (bslots + sslots > MPU_NR_SUBREGS)
+		if (bslots + sslots > PMSAv7_NR_SUBREGS)
 			return false;
 
 		for (i = 0; i < bslots; i++)
 			_set_bit(i, &region->subreg);
 
 		for (i = 1; i <= sslots; i++)
-			_set_bit(MPU_NR_SUBREGS - i, &region->subreg);
+			_set_bit(PMSAv7_NR_SUBREGS - i, &region->subreg);
 	}
 
 	region->base = abase;
@@ -233,7 +228,7 @@ static int __init allocate_region(phys_addr_t base, phys_addr_t size,
 }
 
 /* MPU initialisation functions */
-void __init adjust_lowmem_bounds_mpu(void)
+void __init pmsav7_adjust_lowmem_bounds(void)
 {
 	phys_addr_t  specified_mem_size = 0, total_mem_size = 0;
 	struct memblock_region *reg;
@@ -243,10 +238,7 @@ void __init adjust_lowmem_bounds_mpu(void)
 	unsigned int mem_max_regions;
 	int num, i;
 
-	if (!mpu_present())
-		return;
-
-	/* Free-up MPU_PROBE_REGION */
+	/* Free-up PMSAv7_PROBE_REGION */
 	mpu_min_region_order = __mpu_min_region_order();
 
 	/* How many regions are supported */
@@ -301,12 +293,12 @@ void __init adjust_lowmem_bounds_mpu(void)
 	num = allocate_region(mem_start, specified_mem_size, mem_max_regions, mem);
 
 	for (i = 0; i < num; i++) {
-		unsigned long  subreg = mem[i].size / MPU_NR_SUBREGS;
+		unsigned long  subreg = mem[i].size / PMSAv7_NR_SUBREGS;
 
 		total_mem_size += mem[i].size - subreg * hweight_long(mem[i].subreg);
 
 		pr_debug("MPU: base %pa size %pa disable subregions: %*pbl\n",
-			 &mem[i].base, &mem[i].size, MPU_NR_SUBREGS, &mem[i].subreg);
+			 &mem[i].base, &mem[i].size, PMSAv7_NR_SUBREGS, &mem[i].subreg);
 	}
 
 	if (total_mem_size != specified_mem_size) {
@@ -349,7 +341,7 @@ static int __init __mpu_min_region_order(void)
 	u32 drbar_result, irbar_result;
 
 	/* We've kept a region free for this probing */
-	rgnr_write(MPU_PROBE_REGION);
+	rgnr_write(PMSAv7_PROBE_REGION);
 	isb();
 	/*
 	 * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum
@@ -388,8 +380,8 @@ static int __init mpu_setup_region(unsigned int number, phys_addr_t start,
 		return -ENOMEM;
 
 	/* Writing N to bits 5:1 (RSR_SZ)  specifies region size 2^N+1 */
-	size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN;
-	size_data |= subregions << MPU_RSR_SD;
+	size_data = ((size_order - 1) << PMSAv7_RSR_SZ) | 1 << PMSAv7_RSR_EN;
+	size_data |= subregions << PMSAv7_RSR_SD;
 
 	if (need_flush)
 		flush_cache_all();
@@ -424,18 +416,15 @@ static int __init mpu_setup_region(unsigned int number, phys_addr_t start,
 /*
 * Set up default MPU regions, doing nothing if there is no MPU
 */
-void __init mpu_setup(void)
+void __init pmsav7_setup(void)
 {
 	int i, region = 0, err = 0;
 
-	if (!mpu_present())
-		return;
-
 	/* Setup MPU (order is important) */
 
 	/* Background */
 	err |= mpu_setup_region(region++, 0, 32,
-				MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0RW,
+				PMSAv7_ACR_XN | PMSAv7_RGN_STRONGLY_ORDERED | PMSAv7_AP_PL1RW_PL0RW,
 				0, false);
 
 #ifdef CONFIG_XIP_KERNEL
@@ -448,13 +437,13 @@ void __init mpu_setup(void)
                  * with BG region (which is uncachable), thus we need
                  * to clean and invalidate cache.
 		 */
-		bool need_flush = region == MPU_RAM_REGION;
+		bool need_flush = region == PMSAv7_RAM_REGION;
 
 		if (!xip[i].size)
 			continue;
 
 		err |= mpu_setup_region(region++, xip[i].base, ilog2(xip[i].size),
-					MPU_AP_PL1RO_PL0NA | MPU_RGN_NORMAL,
+					PMSAv7_AP_PL1RO_PL0NA | PMSAv7_RGN_NORMAL,
 					xip[i].subreg, need_flush);
 	}
 #endif
@@ -465,14 +454,14 @@ void __init mpu_setup(void)
 			continue;
 
 		err |= mpu_setup_region(region++, mem[i].base, ilog2(mem[i].size),
-					MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL,
+					PMSAv7_AP_PL1RW_PL0RW | PMSAv7_RGN_NORMAL,
 					mem[i].subreg, false);
 	}
 
 	/* Vectors */
 #ifndef CONFIG_CPU_V7M
 	err |= mpu_setup_region(region++, vectors_base, ilog2(2 * PAGE_SIZE),
-				MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL,
+				PMSAv7_AP_PL1RW_PL0NA | PMSAv7_RGN_NORMAL,
 				0, false);
 #endif
 	if (err) {
diff --git a/arch/arm/mm/pmsa-v8.c b/arch/arm/mm/pmsa-v8.c
new file mode 100644
index 000000000000..617a83def88a
--- /dev/null
+++ b/arch/arm/mm/pmsa-v8.c
@@ -0,0 +1,307 @@
+/*
+ * Based on linux/arch/arm/pmsa-v7.c
+ *
+ * ARM PMSAv8 supporting functions.
+ */
+
+#include <linux/memblock.h>
+#include <linux/range.h>
+
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/mpu.h>
+
+#include <asm/memory.h>
+#include <asm/sections.h>
+
+#include "mm.h"
+
+#ifndef CONFIG_CPU_V7M
+
+#define PRSEL	__ACCESS_CP15(c6, 0, c2, 1)
+#define PRBAR	__ACCESS_CP15(c6, 0, c3, 0)
+#define PRLAR	__ACCESS_CP15(c6, 0, c3, 1)
+
+static inline u32 prlar_read(void)
+{
+	return read_sysreg(PRLAR);
+}
+
+static inline u32 prbar_read(void)
+{
+	return read_sysreg(PRBAR);
+}
+
+static inline void prsel_write(u32 v)
+{
+	write_sysreg(v, PRSEL);
+}
+
+static inline void prbar_write(u32 v)
+{
+	write_sysreg(v, PRBAR);
+}
+
+static inline void prlar_write(u32 v)
+{
+	write_sysreg(v, PRLAR);
+}
+#else
+
+static inline u32 prlar_read(void)
+{
+	return readl_relaxed(BASEADDR_V7M_SCB + PMSAv8_RLAR);
+}
+
+static inline u32 prbar_read(void)
+{
+	return readl_relaxed(BASEADDR_V7M_SCB + PMSAv8_RBAR);
+}
+
+static inline void prsel_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv8_RNR);
+}
+
+static inline void prbar_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv8_RBAR);
+}
+
+static inline void prlar_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv8_RLAR);
+}
+
+#endif
+
+static struct range __initdata io[MPU_MAX_REGIONS];
+static struct range __initdata mem[MPU_MAX_REGIONS];
+
+static unsigned int __initdata mpu_max_regions;
+
+static __init bool is_region_fixed(int number)
+{
+	switch (number) {
+	case PMSAv8_XIP_REGION:
+	case PMSAv8_KERNEL_REGION:
+		return true;
+	default:
+		return false;
+	}
+}
+
+void __init pmsav8_adjust_lowmem_bounds(void)
+{
+	phys_addr_t mem_end;
+	struct memblock_region *reg;
+	bool first = true;
+
+	for_each_memblock(memory, reg) {
+		if (first) {
+			phys_addr_t phys_offset = PHYS_OFFSET;
+
+			/*
+			 * Initially only use memory continuous from
+			 * PHYS_OFFSET */
+			if (reg->base != phys_offset)
+				panic("First memory bank must be contiguous from PHYS_OFFSET");
+			mem_end = reg->base + reg->size;
+			first = false;
+		} else {
+			/*
+			 * memblock auto merges contiguous blocks, remove
+			 * all blocks afterwards in one go (we can't remove
+			 * blocks separately while iterating)
+			 */
+			pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
+				  &mem_end, &reg->base);
+			memblock_remove(reg->base, 0 - reg->base);
+			break;
+		}
+	}
+}
+
+static int __init __mpu_max_regions(void)
+{
+	static int max_regions;
+	u32 mpuir;
+
+	if (max_regions)
+		return max_regions;
+
+	mpuir = read_cpuid_mputype();
+
+	max_regions  = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION;
+
+	return max_regions;
+}
+
+static int __init __pmsav8_setup_region(unsigned int number, u32 bar, u32 lar)
+{
+	if (number > mpu_max_regions
+	    || number >= MPU_MAX_REGIONS)
+		return -ENOENT;
+
+	dsb();
+	prsel_write(number);
+	isb();
+	prbar_write(bar);
+	prlar_write(lar);
+
+	mpu_rgn_info.rgns[number].prbar = bar;
+	mpu_rgn_info.rgns[number].prlar = lar;
+
+	mpu_rgn_info.used++;
+
+	return 0;
+}
+
+static int __init pmsav8_setup_ram(unsigned int number, phys_addr_t start,phys_addr_t end)
+{
+	u32 bar, lar;
+
+	if (is_region_fixed(number))
+		return -EINVAL;
+
+	bar = start;
+	lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);;
+
+	bar |= PMSAv8_AP_PL1RW_PL0RW | PMSAv8_RGN_SHARED;
+	lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN;
+
+	return __pmsav8_setup_region(number, bar, lar);
+}
+
+static int __init pmsav8_setup_io(unsigned int number, phys_addr_t start,phys_addr_t end)
+{
+	u32 bar, lar;
+
+	if (is_region_fixed(number))
+		return -EINVAL;
+
+	bar = start;
+	lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);;
+
+	bar |= PMSAv8_AP_PL1RW_PL0RW | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN;
+	lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN;
+
+	return __pmsav8_setup_region(number, bar, lar);
+}
+
+static int __init pmsav8_setup_fixed(unsigned int number, phys_addr_t start,phys_addr_t end)
+{
+	u32 bar, lar;
+
+	if (!is_region_fixed(number))
+		return -EINVAL;
+
+	bar = start;
+	lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);
+
+	bar |= PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED;
+	lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN;
+
+	prsel_write(number);
+	isb();
+
+	if (prbar_read() != bar || prlar_read() != lar)
+		return -EINVAL;
+
+	/* Reserved region was set up early, we just need a record for secondaries */
+	mpu_rgn_info.rgns[number].prbar = bar;
+	mpu_rgn_info.rgns[number].prlar = lar;
+
+	mpu_rgn_info.used++;
+
+	return 0;
+}
+
+#ifndef CONFIG_CPU_V7M
+static int __init pmsav8_setup_vector(unsigned int number, phys_addr_t start,phys_addr_t end)
+{
+	u32 bar, lar;
+
+	if (number == PMSAv8_KERNEL_REGION)
+		return -EINVAL;
+
+	bar = start;
+	lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);
+
+	bar |= PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED;
+	lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN;
+
+	return __pmsav8_setup_region(number, bar, lar);
+}
+#endif
+
+void __init pmsav8_setup(void)
+{
+	int i, err = 0;
+	int region = PMSAv8_KERNEL_REGION;
+
+	/* How many regions are supported ? */
+	mpu_max_regions = __mpu_max_regions();
+
+	/* RAM: single chunk of memory */
+	add_range(mem,  ARRAY_SIZE(mem), 0,  memblock.memory.regions[0].base,
+		  memblock.memory.regions[0].base + memblock.memory.regions[0].size);
+
+	/* IO: cover full 4G range */
+	add_range(io, ARRAY_SIZE(io), 0, 0, 0xffffffff);
+
+	/* RAM and IO: exclude kernel */
+	subtract_range(mem, ARRAY_SIZE(mem), __pa(KERNEL_START), __pa(KERNEL_END));
+	subtract_range(io, ARRAY_SIZE(io),  __pa(KERNEL_START), __pa(KERNEL_END));
+
+#ifdef CONFIG_XIP_KERNEL
+	/* RAM and IO: exclude xip */
+	subtract_range(mem, ARRAY_SIZE(mem), CONFIG_XIP_PHYS_ADDR, __pa(_exiprom));
+	subtract_range(io, ARRAY_SIZE(io), CONFIG_XIP_PHYS_ADDR, __pa(_exiprom));
+#endif
+
+#ifndef CONFIG_CPU_V7M
+	/* RAM and IO: exclude vectors */
+	subtract_range(mem, ARRAY_SIZE(mem),  vectors_base, vectors_base + 2 * PAGE_SIZE);
+	subtract_range(io, ARRAY_SIZE(io),  vectors_base, vectors_base + 2 * PAGE_SIZE);
+#endif
+	/* IO: exclude RAM */
+	for (i = 0; i < ARRAY_SIZE(mem); i++)
+		subtract_range(io, ARRAY_SIZE(io), mem[i].start, mem[i].end);
+
+	/* Now program MPU */
+
+#ifdef CONFIG_XIP_KERNEL
+	/* ROM */
+	err |= pmsav8_setup_fixed(PMSAv8_XIP_REGION, CONFIG_XIP_PHYS_ADDR, __pa(_exiprom));
+#endif
+	/* Kernel */
+	err |= pmsav8_setup_fixed(region++, __pa(KERNEL_START), __pa(KERNEL_END));
+
+
+	/* IO */
+	for (i = 0; i < ARRAY_SIZE(io); i++) {
+		if (!io[i].end)
+			continue;
+
+		err |= pmsav8_setup_io(region++, io[i].start, io[i].end);
+	}
+
+	/* RAM */
+	for (i = 0; i < ARRAY_SIZE(mem); i++) {
+		if (!mem[i].end)
+			continue;
+
+		err |= pmsav8_setup_ram(region++, mem[i].start, mem[i].end);
+	}
+
+	/* Vectors */
+#ifndef CONFIG_CPU_V7M
+	err |= pmsav8_setup_vector(region++, vectors_base, vectors_base + 2 * PAGE_SIZE);
+#endif
+	if (err)
+		pr_warn("MPU region initialization failure! %d", err);
+	else
+		pr_info("Using ARM PMSAv8 Compliant MPU. Used %d of %d regions\n",
+			mpu_rgn_info.used, mpu_max_regions);
+}
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index f10e31d0730a..81d0efb055c6 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -273,13 +273,14 @@
 	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
 	.endm
 
-.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0
+.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0
 	.type	\name\()_processor_functions, #object
 	.align 2
 ENTRY(\name\()_processor_functions)
 	.word	\dabort
 	.word	\pabort
 	.word	cpu_\name\()_proc_init
+	.word	\bugs
 	.word	cpu_\name\()_proc_fin
 	.word	cpu_\name\()_reset
 	.word	cpu_\name\()_do_idle
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index c6141a5435c3..f8d45ad2a515 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -41,11 +41,6 @@
  *	even on Cortex-A8 revisions not affected by 430973.
  *	If IBE is not set, the flush BTAC/BTB won't do anything.
  */
-ENTRY(cpu_ca8_switch_mm)
-#ifdef CONFIG_MMU
-	mov	r2, #0
-	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
-#endif
 ENTRY(cpu_v7_switch_mm)
 #ifdef CONFIG_MMU
 	mmid	r1, r1				@ get mm->context.id
@@ -66,7 +61,6 @@ ENTRY(cpu_v7_switch_mm)
 #endif
 	bx	lr
 ENDPROC(cpu_v7_switch_mm)
-ENDPROC(cpu_ca8_switch_mm)
 
 /*
  *	cpu_v7_set_pte_ext(ptep, pte)
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
new file mode 100644
index 000000000000..5544b82a2e7a
--- /dev/null
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/arm-smccc.h>
+#include <linux/kernel.h>
+#include <linux/psci.h>
+#include <linux/smp.h>
+
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/proc-fns.h>
+#include <asm/system_misc.h>
+
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn);
+
+extern void cpu_v7_iciallu_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+extern void cpu_v7_bpiall_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+extern void cpu_v7_smc_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+extern void cpu_v7_hvc_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+
+static void harden_branch_predictor_bpiall(void)
+{
+	write_sysreg(0, BPIALL);
+}
+
+static void harden_branch_predictor_iciallu(void)
+{
+	write_sysreg(0, ICIALLU);
+}
+
+static void __maybe_unused call_smc_arch_workaround_1(void)
+{
+	arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+static void __maybe_unused call_hvc_arch_workaround_1(void)
+{
+	arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+static void cpu_v7_spectre_init(void)
+{
+	const char *spectre_v2_method = NULL;
+	int cpu = smp_processor_id();
+
+	if (per_cpu(harden_branch_predictor_fn, cpu))
+		return;
+
+	switch (read_cpuid_part()) {
+	case ARM_CPU_PART_CORTEX_A8:
+	case ARM_CPU_PART_CORTEX_A9:
+	case ARM_CPU_PART_CORTEX_A12:
+	case ARM_CPU_PART_CORTEX_A17:
+	case ARM_CPU_PART_CORTEX_A73:
+	case ARM_CPU_PART_CORTEX_A75:
+		if (processor.switch_mm != cpu_v7_bpiall_switch_mm)
+			goto bl_error;
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			harden_branch_predictor_bpiall;
+		spectre_v2_method = "BPIALL";
+		break;
+
+	case ARM_CPU_PART_CORTEX_A15:
+	case ARM_CPU_PART_BRAHMA_B15:
+		if (processor.switch_mm != cpu_v7_iciallu_switch_mm)
+			goto bl_error;
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			harden_branch_predictor_iciallu;
+		spectre_v2_method = "ICIALLU";
+		break;
+
+#ifdef CONFIG_ARM_PSCI
+	default:
+		/* Other ARM CPUs require no workaround */
+		if (read_cpuid_implementor() == ARM_CPU_IMP_ARM)
+			break;
+		/* fallthrough */
+		/* Cortex A57/A72 require firmware workaround */
+	case ARM_CPU_PART_CORTEX_A57:
+	case ARM_CPU_PART_CORTEX_A72: {
+		struct arm_smccc_res res;
+
+		if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
+			break;
+
+		switch (psci_ops.conduit) {
+		case PSCI_CONDUIT_HVC:
+			arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+			if ((int)res.a0 != 0)
+				break;
+			if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu)
+				goto bl_error;
+			per_cpu(harden_branch_predictor_fn, cpu) =
+				call_hvc_arch_workaround_1;
+			processor.switch_mm = cpu_v7_hvc_switch_mm;
+			spectre_v2_method = "hypervisor";
+			break;
+
+		case PSCI_CONDUIT_SMC:
+			arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+			if ((int)res.a0 != 0)
+				break;
+			if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu)
+				goto bl_error;
+			per_cpu(harden_branch_predictor_fn, cpu) =
+				call_smc_arch_workaround_1;
+			processor.switch_mm = cpu_v7_smc_switch_mm;
+			spectre_v2_method = "firmware";
+			break;
+
+		default:
+			break;
+		}
+	}
+#endif
+	}
+
+	if (spectre_v2_method)
+		pr_info("CPU%u: Spectre v2: using %s workaround\n",
+			smp_processor_id(), spectre_v2_method);
+	return;
+
+bl_error:
+	pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n",
+		cpu);
+}
+#else
+static void cpu_v7_spectre_init(void)
+{
+}
+#endif
+
+static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned,
+						  u32 mask, const char *msg)
+{
+	u32 aux_cr;
+
+	asm("mrc p15, 0, %0, c1, c0, 1" : "=r" (aux_cr));
+
+	if ((aux_cr & mask) != mask) {
+		if (!*warned)
+			pr_err("CPU%u: %s", smp_processor_id(), msg);
+		*warned = true;
+		return false;
+	}
+	return true;
+}
+
+static DEFINE_PER_CPU(bool, spectre_warned);
+
+static bool check_spectre_auxcr(bool *warned, u32 bit)
+{
+	return IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR) &&
+		cpu_v7_check_auxcr_set(warned, bit,
+				       "Spectre v2: firmware did not set auxiliary control register IBE bit, system vulnerable\n");
+}
+
+void cpu_v7_ca8_ibe(void)
+{
+	if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6)))
+		cpu_v7_spectre_init();
+}
+
+void cpu_v7_ca15_ibe(void)
+{
+	if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)))
+		cpu_v7_spectre_init();
+}
+
+void cpu_v7_bugs_init(void)
+{
+	cpu_v7_spectre_init();
+}
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index b528a15f460d..6fe52819e014 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -9,6 +9,7 @@
  *
  *  This is the "shell" of the ARMv7 processor support.
  */
+#include <linux/arm-smccc.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -93,6 +94,37 @@ ENTRY(cpu_v7_dcache_clean_area)
 	ret	lr
 ENDPROC(cpu_v7_dcache_clean_area)
 
+#ifdef CONFIG_ARM_PSCI
+	.arch_extension sec
+ENTRY(cpu_v7_smc_switch_mm)
+	stmfd	sp!, {r0 - r3}
+	movw	r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
+	movt	r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
+	smc	#0
+	ldmfd	sp!, {r0 - r3}
+	b	cpu_v7_switch_mm
+ENDPROC(cpu_v7_smc_switch_mm)
+	.arch_extension virt
+ENTRY(cpu_v7_hvc_switch_mm)
+	stmfd	sp!, {r0 - r3}
+	movw	r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
+	movt	r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
+	hvc	#0
+	ldmfd	sp!, {r0 - r3}
+	b	cpu_v7_switch_mm
+ENDPROC(cpu_v7_smc_switch_mm)
+#endif
+ENTRY(cpu_v7_iciallu_switch_mm)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c5, 0		@ ICIALLU
+	b	cpu_v7_switch_mm
+ENDPROC(cpu_v7_iciallu_switch_mm)
+ENTRY(cpu_v7_bpiall_switch_mm)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c5, 6		@ flush BTAC/BTB
+	b	cpu_v7_switch_mm
+ENDPROC(cpu_v7_bpiall_switch_mm)
+
 	string	cpu_v7_name, "ARMv7 Processor"
 	.align
 
@@ -158,31 +190,6 @@ ENTRY(cpu_v7_do_resume)
 ENDPROC(cpu_v7_do_resume)
 #endif
 
-/*
- * Cortex-A8
- */
-	globl_equ	cpu_ca8_proc_init,	cpu_v7_proc_init
-	globl_equ	cpu_ca8_proc_fin,	cpu_v7_proc_fin
-	globl_equ	cpu_ca8_reset,		cpu_v7_reset
-	globl_equ	cpu_ca8_do_idle,	cpu_v7_do_idle
-	globl_equ	cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area
-	globl_equ	cpu_ca8_set_pte_ext,	cpu_v7_set_pte_ext
-	globl_equ	cpu_ca8_suspend_size,	cpu_v7_suspend_size
-#ifdef CONFIG_ARM_CPU_SUSPEND
-	globl_equ	cpu_ca8_do_suspend,	cpu_v7_do_suspend
-	globl_equ	cpu_ca8_do_resume,	cpu_v7_do_resume
-#endif
-
-/*
- * Cortex-A9 processor functions
- */
-	globl_equ	cpu_ca9mp_proc_init,	cpu_v7_proc_init
-	globl_equ	cpu_ca9mp_proc_fin,	cpu_v7_proc_fin
-	globl_equ	cpu_ca9mp_reset,	cpu_v7_reset
-	globl_equ	cpu_ca9mp_do_idle,	cpu_v7_do_idle
-	globl_equ	cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area
-	globl_equ	cpu_ca9mp_switch_mm,	cpu_v7_switch_mm
-	globl_equ	cpu_ca9mp_set_pte_ext,	cpu_v7_set_pte_ext
 .globl	cpu_ca9mp_suspend_size
 .equ	cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2
 #ifdef CONFIG_ARM_CPU_SUSPEND
@@ -547,12 +554,79 @@ __v7_setup_stack:
 
 	__INITDATA
 
+	.weak cpu_v7_bugs_init
+
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
-#ifndef CONFIG_ARM_LPAE
-	define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
-	define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
+	define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init
+
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	@ generic v7 bpiall on context switch
+	globl_equ	cpu_v7_bpiall_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_v7_bpiall_proc_fin,		cpu_v7_proc_fin
+	globl_equ	cpu_v7_bpiall_reset,		cpu_v7_reset
+	globl_equ	cpu_v7_bpiall_do_idle,		cpu_v7_do_idle
+	globl_equ	cpu_v7_bpiall_dcache_clean_area, cpu_v7_dcache_clean_area
+	globl_equ	cpu_v7_bpiall_set_pte_ext,	cpu_v7_set_pte_ext
+	globl_equ	cpu_v7_bpiall_suspend_size,	cpu_v7_suspend_size
+#ifdef CONFIG_ARM_CPU_SUSPEND
+	globl_equ	cpu_v7_bpiall_do_suspend,	cpu_v7_do_suspend
+	globl_equ	cpu_v7_bpiall_do_resume,	cpu_v7_do_resume
 #endif
+	define_processor_functions v7_bpiall, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init
+
+#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_bpiall_processor_functions
+#else
+#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_processor_functions
+#endif
+
+#ifndef CONFIG_ARM_LPAE
+	@ Cortex-A8 - always needs bpiall switch_mm implementation
+	globl_equ	cpu_ca8_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_ca8_proc_fin,	cpu_v7_proc_fin
+	globl_equ	cpu_ca8_reset,		cpu_v7_reset
+	globl_equ	cpu_ca8_do_idle,	cpu_v7_do_idle
+	globl_equ	cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area
+	globl_equ	cpu_ca8_set_pte_ext,	cpu_v7_set_pte_ext
+	globl_equ	cpu_ca8_switch_mm,	cpu_v7_bpiall_switch_mm
+	globl_equ	cpu_ca8_suspend_size,	cpu_v7_suspend_size
+#ifdef CONFIG_ARM_CPU_SUSPEND
+	globl_equ	cpu_ca8_do_suspend,	cpu_v7_do_suspend
+	globl_equ	cpu_ca8_do_resume,	cpu_v7_do_resume
+#endif
+	define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca8_ibe
+
+	@ Cortex-A9 - needs more registers preserved across suspend/resume
+	@ and bpiall switch_mm for hardening
+	globl_equ	cpu_ca9mp_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_ca9mp_proc_fin,	cpu_v7_proc_fin
+	globl_equ	cpu_ca9mp_reset,	cpu_v7_reset
+	globl_equ	cpu_ca9mp_do_idle,	cpu_v7_do_idle
+	globl_equ	cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	globl_equ	cpu_ca9mp_switch_mm,	cpu_v7_bpiall_switch_mm
+#else
+	globl_equ	cpu_ca9mp_switch_mm,	cpu_v7_switch_mm
+#endif
+	globl_equ	cpu_ca9mp_set_pte_ext,	cpu_v7_set_pte_ext
+	define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init
+#endif
+
+	@ Cortex-A15 - needs iciallu switch_mm for hardening
+	globl_equ	cpu_ca15_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_ca15_proc_fin,	cpu_v7_proc_fin
+	globl_equ	cpu_ca15_reset,		cpu_v7_reset
+	globl_equ	cpu_ca15_do_idle,	cpu_v7_do_idle
+	globl_equ	cpu_ca15_dcache_clean_area, cpu_v7_dcache_clean_area
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	globl_equ	cpu_ca15_switch_mm,	cpu_v7_iciallu_switch_mm
+#else
+	globl_equ	cpu_ca15_switch_mm,	cpu_v7_switch_mm
+#endif
+	globl_equ	cpu_ca15_set_pte_ext,	cpu_v7_set_pte_ext
+	globl_equ	cpu_ca15_suspend_size,	cpu_v7_suspend_size
+	globl_equ	cpu_ca15_do_suspend,	cpu_v7_do_suspend
+	globl_equ	cpu_ca15_do_resume,	cpu_v7_do_resume
+	define_processor_functions ca15, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca15_ibe
 #ifdef CONFIG_CPU_PJ4B
 	define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
 #endif
@@ -669,7 +743,7 @@ __v7_ca7mp_proc_info:
 __v7_ca12mp_proc_info:
 	.long	0x410fc0d0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup
+	__v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
 	.size	__v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info
 
 	/*
@@ -679,7 +753,7 @@ __v7_ca12mp_proc_info:
 __v7_ca15mp_proc_info:
 	.long	0x410fc0f0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup
+	__v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup, proc_fns = ca15_processor_functions
 	.size	__v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
 
 	/*
@@ -689,7 +763,7 @@ __v7_ca15mp_proc_info:
 __v7_b15mp_proc_info:
 	.long	0x420f00f0
 	.long	0xff0ffff0
-	__v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup, cache_fns = b15_cache_fns
+	__v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup, proc_fns = ca15_processor_functions, cache_fns = b15_cache_fns
 	.size	__v7_b15mp_proc_info, . - __v7_b15mp_proc_info
 
 	/*
@@ -699,9 +773,25 @@ __v7_b15mp_proc_info:
 __v7_ca17mp_proc_info:
 	.long	0x410fc0e0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup
+	__v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
 	.size	__v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info
 
+	/* ARM Ltd. Cortex A73 processor */
+	.type	__v7_ca73_proc_info, #object
+__v7_ca73_proc_info:
+	.long	0x410fd090
+	.long	0xff0ffff0
+	__v7_proc __v7_ca73_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
+	.size	__v7_ca73_proc_info, . - __v7_ca73_proc_info
+
+	/* ARM Ltd. Cortex A75 processor */
+	.type	__v7_ca75_proc_info, #object
+__v7_ca75_proc_info:
+	.long	0x410fd0a0
+	.long	0xff0ffff0
+	__v7_proc __v7_ca75_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
+	.size	__v7_ca75_proc_info, . - __v7_ca75_proc_info
+
 	/*
 	 * Qualcomm Inc. Krait processors.
 	 */
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 3ece711a6a17..41b706403ef7 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -205,6 +205,7 @@ struct bus_type amba_bustype = {
 	.dma_configure	= platform_dma_configure,
 	.pm		= &amba_pm,
 };
+EXPORT_SYMBOL_GPL(amba_bustype);
 
 static int __init amba_init(void)
 {