Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 and cross-arch updates from Catalin Marinas:
"Here's a slightly wider-spread set of updates for 5.9.
Going outside the usual arch/arm64/ area is the removal of
read_barrier_depends() series from Will and the MSI/IOMMU ID
translation series from Lorenzo.
The notable arm64 updates include ARMv8.4 TLBI range operations and
translation level hint, time namespace support, and perf.
Summary:
- Removal of the tremendously unpopular read_barrier_depends()
barrier, which is a NOP on all architectures apart from Alpha, in
favour of allowing architectures to override READ_ONCE() and do
whatever dance they need to do to ensure address dependencies
provide LOAD -> LOAD/STORE ordering.
This work also offers a potential solution if compilers are shown
to convert LOAD -> LOAD address dependencies into control
dependencies (e.g. under LTO), as weakly ordered architectures will
effectively be able to upgrade READ_ONCE() to smp_load_acquire().
The latter case is not used yet, but will be discussed further at
LPC.
- Make the MSI/IOMMU input/output ID translation PCI agnostic,
augment the MSI/IOMMU ACPI/OF ID mapping APIs to accept an input ID
bus-specific parameter and apply the resulting changes to the
device ID space provided by the Freescale FSL bus.
- arm64 support for TLBI range operations and translation table level
hints (part of the ARMv8.4 architecture version).
- Time namespace support for arm64.
- Export the virtual and physical address sizes in vmcoreinfo for
makedumpfile and crash utilities.
- CPU feature handling cleanups and checks for programmer errors
(overlapping bit-fields).
- ACPI updates for arm64: disallow AML accesses to EFI code regions
and kernel memory.
- perf updates for arm64.
- Miscellaneous fixes and cleanups, most notably PLT counting
optimisation for module loading, recordmcount fix to ignore
relocations other than R_AARCH64_CALL26, CMA areas reserved for
gigantic pages on 16K and 64K configurations.
- Trivial typos, duplicate words"
Link: http://lkml.kernel.org/r/20200710165203.31284-1-will@kernel.org
Link: http://lkml.kernel.org/r/20200619082013.13661-1-lorenzo.pieralisi@arm.com
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (82 commits)
arm64: use IRQ_STACK_SIZE instead of THREAD_SIZE for irq stack
arm64/mm: save memory access in check_and_switch_context() fast switch path
arm64: sigcontext.h: delete duplicated word
arm64: ptrace.h: delete duplicated word
arm64: pgtable-hwdef.h: delete duplicated words
bus: fsl-mc: Add ACPI support for fsl-mc
bus/fsl-mc: Refactor the MSI domain creation in the DPRC driver
of/irq: Make of_msi_map_rid() PCI bus agnostic
of/irq: make of_msi_map_get_device_domain() bus agnostic
dt-bindings: arm: fsl: Add msi-map device-tree binding for fsl-mc bus
of/device: Add input id to of_dma_configure()
of/iommu: Make of_map_rid() PCI agnostic
ACPI/IORT: Add an input ID to acpi_dma_configure()
ACPI/IORT: Remove useless PCI bus walk
ACPI/IORT: Make iort_msi_map_rid() PCI agnostic
ACPI/IORT: Make iort_get_device_domain IRQ domain agnostic
ACPI/IORT: Make iort_match_node_callback walk the ACPI namespace for NC
arm64: enable time namespace support
arm64/vdso: Restrict splitting VVAR VMA
arm64/vdso: Handle faults on timens page
...
This commit is contained in:
@@ -47,20 +47,7 @@
|
||||
pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
|
||||
|
||||
/* ACPI table mapping after acpi_permanent_mmap is set */
|
||||
static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
|
||||
acpi_size size)
|
||||
{
|
||||
/* For normal memory we already have a cacheable mapping. */
|
||||
if (memblock_is_map_memory(phys))
|
||||
return (void __iomem *)__phys_to_virt(phys);
|
||||
|
||||
/*
|
||||
* We should still honor the memory's attribute here because
|
||||
* crash dump kernel possibly excludes some ACPI (reclaim)
|
||||
* regions from memblock list.
|
||||
*/
|
||||
return __ioremap(phys, size, __acpi_get_mem_attribute(phys));
|
||||
}
|
||||
void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
|
||||
#define acpi_os_ioremap acpi_os_ioremap
|
||||
|
||||
typedef u64 phys_cpuid_t;
|
||||
|
||||
@@ -62,7 +62,9 @@
|
||||
#define ARM64_HAS_GENERIC_AUTH 52
|
||||
#define ARM64_HAS_32BIT_EL1 53
|
||||
#define ARM64_BTI 54
|
||||
#define ARM64_HAS_ARMv8_4_TTL 55
|
||||
#define ARM64_HAS_TLB_RANGE 56
|
||||
|
||||
#define ARM64_NCAPS 55
|
||||
#define ARM64_NCAPS 57
|
||||
|
||||
#endif /* __ASM_CPUCAPS_H */
|
||||
|
||||
@@ -692,6 +692,12 @@ static inline bool system_supports_bti(void)
|
||||
return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI);
|
||||
}
|
||||
|
||||
static inline bool system_supports_tlb_range(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_ARM64_TLB_RANGE) &&
|
||||
cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
|
||||
}
|
||||
|
||||
#define ARM64_BP_HARDEN_UNKNOWN -1
|
||||
#define ARM64_BP_HARDEN_WA_NEEDED 0
|
||||
#define ARM64_BP_HARDEN_NOT_REQUIRED 1
|
||||
@@ -774,6 +780,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
|
||||
}
|
||||
|
||||
u32 get_kvm_ipa_limit(void);
|
||||
void dump_cpu_features(void);
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
|
||||
@@ -49,6 +49,8 @@ extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte, unsigned long sz);
|
||||
#define set_huge_swap_pte_at set_huge_swap_pte_at
|
||||
|
||||
void __init arm64_hugetlb_cma_reserve(void);
|
||||
|
||||
#include <asm-generic/hugetlb.h>
|
||||
|
||||
#endif /* __ASM_HUGETLB_H */
|
||||
|
||||
@@ -95,6 +95,7 @@
|
||||
#define KERNEL_HWCAP_DGH __khwcap2_feature(DGH)
|
||||
#define KERNEL_HWCAP_RNG __khwcap2_feature(RNG)
|
||||
#define KERNEL_HWCAP_BTI __khwcap2_feature(BTI)
|
||||
/* reserved for KERNEL_HWCAP_MTE __khwcap2_feature(MTE) */
|
||||
|
||||
/*
|
||||
* This yields a mask that user programs can use to figure out what
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
#ifndef __ASM_KERNEL_PGTABLE_H
|
||||
#define __ASM_KERNEL_PGTABLE_H
|
||||
|
||||
#include <linux/pgtable.h>
|
||||
#include <asm/pgtable-hwdef.h>
|
||||
#include <asm/sparsemem.h>
|
||||
|
||||
/*
|
||||
|
||||
@@ -10,11 +10,8 @@
|
||||
#ifndef __ASM_MEMORY_H
|
||||
#define __ASM_MEMORY_H
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/const.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/types.h>
|
||||
#include <asm/bug.h>
|
||||
#include <asm/page-def.h>
|
||||
|
||||
/*
|
||||
@@ -157,11 +154,15 @@
|
||||
#endif
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
extern u64 vabits_actual;
|
||||
#define PAGE_END (_PAGE_END(vabits_actual))
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/mmdebug.h>
|
||||
#include <linux/types.h>
|
||||
#include <asm/bug.h>
|
||||
|
||||
extern u64 vabits_actual;
|
||||
#define PAGE_END (_PAGE_END(vabits_actual))
|
||||
|
||||
extern s64 physvirt_offset;
|
||||
extern s64 memstart_addr;
|
||||
@@ -322,6 +323,7 @@ static inline void *phys_to_virt(phys_addr_t x)
|
||||
__is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr)); \
|
||||
})
|
||||
|
||||
void dump_mem_limit(void);
|
||||
#endif /* !ASSEMBLY */
|
||||
|
||||
/*
|
||||
|
||||
@@ -175,7 +175,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp)
|
||||
* take CPU migration into account.
|
||||
*/
|
||||
#define destroy_context(mm) do { } while(0)
|
||||
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
|
||||
void check_and_switch_context(struct mm_struct *mm);
|
||||
|
||||
#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; })
|
||||
|
||||
@@ -214,8 +214,6 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
|
||||
static inline void __switch_mm(struct mm_struct *next)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
/*
|
||||
* init_mm.pgd does not contain any user mappings and it is always
|
||||
* active for kernel addresses in TTBR1. Just set the reserved TTBR0.
|
||||
@@ -225,7 +223,7 @@ static inline void __switch_mm(struct mm_struct *next)
|
||||
return;
|
||||
}
|
||||
|
||||
check_and_switch_context(next, cpu);
|
||||
check_and_switch_context(next);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
||||
@@ -72,6 +72,13 @@
|
||||
#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x36
|
||||
#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x37
|
||||
#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x38
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x39
|
||||
#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x3A
|
||||
#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x3B
|
||||
#define ARMV8_PMUV3_PERFCTR_STALL 0x3C
|
||||
#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x3D
|
||||
#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x3E
|
||||
#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x3F
|
||||
|
||||
/* Statistical profiling extension microarchitectural events */
|
||||
#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000
|
||||
@@ -79,6 +86,26 @@
|
||||
#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002
|
||||
#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003
|
||||
|
||||
/* AMUv1 architecture events */
|
||||
#define ARMV8_AMU_PERFCTR_CNT_CYCLES 0x4004
|
||||
#define ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM 0x4005
|
||||
|
||||
/* long-latency read miss events */
|
||||
#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS 0x4006
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD 0x4009
|
||||
#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A
|
||||
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B
|
||||
|
||||
/* additional latency from alignment events */
|
||||
#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020
|
||||
#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021
|
||||
#define ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT 0x4022
|
||||
|
||||
/* Armv8.5 Memory Tagging Extension events */
|
||||
#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED 0x4024
|
||||
#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD 0x4025
|
||||
#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026
|
||||
|
||||
/* ARMv8 recommended implementation defined event types */
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
* Size mapped by an entry at level n ( 0 <= n <= 3)
|
||||
* We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
|
||||
* in the final page. The maximum number of translation levels supported by
|
||||
* the architecture is 4. Hence, starting at at level n, we have further
|
||||
* the architecture is 4. Hence, starting at level n, we have further
|
||||
* ((4 - n) - 1) levels of translation excluding the offset within the page.
|
||||
* So, the total number of bits mapped by an entry at level n is :
|
||||
*
|
||||
@@ -82,23 +82,23 @@
|
||||
* Contiguous page definitions.
|
||||
*/
|
||||
#ifdef CONFIG_ARM64_64K_PAGES
|
||||
#define CONT_PTE_SHIFT 5
|
||||
#define CONT_PMD_SHIFT 5
|
||||
#define CONT_PTE_SHIFT (5 + PAGE_SHIFT)
|
||||
#define CONT_PMD_SHIFT (5 + PMD_SHIFT)
|
||||
#elif defined(CONFIG_ARM64_16K_PAGES)
|
||||
#define CONT_PTE_SHIFT 7
|
||||
#define CONT_PMD_SHIFT 5
|
||||
#define CONT_PTE_SHIFT (7 + PAGE_SHIFT)
|
||||
#define CONT_PMD_SHIFT (5 + PMD_SHIFT)
|
||||
#else
|
||||
#define CONT_PTE_SHIFT 4
|
||||
#define CONT_PMD_SHIFT 4
|
||||
#define CONT_PTE_SHIFT (4 + PAGE_SHIFT)
|
||||
#define CONT_PMD_SHIFT (4 + PMD_SHIFT)
|
||||
#endif
|
||||
|
||||
#define CONT_PTES (1 << CONT_PTE_SHIFT)
|
||||
#define CONT_PTES (1 << (CONT_PTE_SHIFT - PAGE_SHIFT))
|
||||
#define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE)
|
||||
#define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1))
|
||||
#define CONT_PMDS (1 << CONT_PMD_SHIFT)
|
||||
#define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT))
|
||||
#define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE)
|
||||
#define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1))
|
||||
/* the the numerical offset of the PTE within a range of CONT_PTES */
|
||||
/* the numerical offset of the PTE within a range of CONT_PTES */
|
||||
#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))
|
||||
|
||||
/*
|
||||
@@ -178,10 +178,12 @@
|
||||
#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
|
||||
#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
|
||||
#define PTE_S2_XN (_AT(pteval_t, 2) << 53) /* XN[1:0] */
|
||||
#define PTE_S2_SW_RESVD (_AT(pteval_t, 15) << 55) /* Reserved for SW */
|
||||
|
||||
#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */
|
||||
#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
|
||||
#define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */
|
||||
#define PMD_S2_SW_RESVD (_AT(pmdval_t, 15) << 55) /* Reserved for SW */
|
||||
|
||||
#define PUD_S2_RDONLY (_AT(pudval_t, 1) << 6) /* HAP[2:1] */
|
||||
#define PUD_S2_RDWR (_AT(pudval_t, 3) << 6) /* HAP[2:1] */
|
||||
@@ -216,6 +218,7 @@
|
||||
#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x))
|
||||
#define TCR_TxSZ_WIDTH 6
|
||||
#define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET)
|
||||
#define TCR_T1SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T1SZ_OFFSET)
|
||||
|
||||
#define TCR_EPD0_SHIFT 7
|
||||
#define TCR_EPD0_MASK (UL(1) << TCR_EPD0_SHIFT)
|
||||
|
||||
@@ -40,6 +40,16 @@ extern void __pmd_error(const char *file, int line, unsigned long val);
|
||||
extern void __pud_error(const char *file, int line, unsigned long val);
|
||||
extern void __pgd_error(const char *file, int line, unsigned long val);
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
|
||||
|
||||
/* Set stride and tlb_level in flush_*_tlb_range */
|
||||
#define flush_pmd_tlb_range(vma, addr, end) \
|
||||
__flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2)
|
||||
#define flush_pud_tlb_range(vma, addr, end) \
|
||||
__flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
/*
|
||||
* ZERO_PAGE is a global shared page that is always zero: used
|
||||
* for zero-mapped memory areas etc..
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
*
|
||||
* Some code sections either automatically switch back to PSR.I or explicitly
|
||||
* require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included
|
||||
* in the the priority mask, it indicates that PSR.I should be set and
|
||||
* in the priority mask, it indicates that PSR.I should be set and
|
||||
* interrupt disabling temporarily does not rely on IRQ priorities.
|
||||
*/
|
||||
#define GIC_PRIO_IRQON 0xe0
|
||||
|
||||
@@ -256,4 +256,13 @@ stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
|
||||
return (boundary - 1 < end - 1) ? boundary : end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Level values for the ARMv8.4-TTL extension, mapping PUD/PMD/PTE and
|
||||
* the architectural page-table level.
|
||||
*/
|
||||
#define S2_NO_LEVEL_HINT 0
|
||||
#define S2_PUD_LEVEL 1
|
||||
#define S2_PMD_LEVEL 2
|
||||
#define S2_PTE_LEVEL 3
|
||||
|
||||
#endif /* __ARM64_S2_PGTABLE_H_ */
|
||||
|
||||
@@ -421,9 +421,9 @@
|
||||
*/
|
||||
|
||||
#define SYS_AMEVCNTR0_EL0(n) SYS_AM_EL0(4 + ((n) >> 3), (n) & 7)
|
||||
#define SYS_AMEVTYPE0_EL0(n) SYS_AM_EL0(6 + ((n) >> 3), (n) & 7)
|
||||
#define SYS_AMEVTYPER0_EL0(n) SYS_AM_EL0(6 + ((n) >> 3), (n) & 7)
|
||||
#define SYS_AMEVCNTR1_EL0(n) SYS_AM_EL0(12 + ((n) >> 3), (n) & 7)
|
||||
#define SYS_AMEVTYPE1_EL0(n) SYS_AM_EL0(14 + ((n) >> 3), (n) & 7)
|
||||
#define SYS_AMEVTYPER1_EL0(n) SYS_AM_EL0(14 + ((n) >> 3), (n) & 7)
|
||||
|
||||
/* AMU v1: Fixed (architecturally defined) activity monitors */
|
||||
#define SYS_AMEVCNTR0_CORE_EL0 SYS_AMEVCNTR0_EL0(0)
|
||||
@@ -617,6 +617,9 @@
|
||||
#define ID_AA64ISAR0_SHA1_SHIFT 8
|
||||
#define ID_AA64ISAR0_AES_SHIFT 4
|
||||
|
||||
#define ID_AA64ISAR0_TLB_RANGE_NI 0x0
|
||||
#define ID_AA64ISAR0_TLB_RANGE 0x2
|
||||
|
||||
/* id_aa64isar1 */
|
||||
#define ID_AA64ISAR1_I8MM_SHIFT 52
|
||||
#define ID_AA64ISAR1_DGH_SHIFT 48
|
||||
@@ -706,6 +709,9 @@
|
||||
#define ID_AA64ZFR0_SVEVER_SVE2 0x1
|
||||
|
||||
/* id_aa64mmfr0 */
|
||||
#define ID_AA64MMFR0_ECV_SHIFT 60
|
||||
#define ID_AA64MMFR0_FGT_SHIFT 56
|
||||
#define ID_AA64MMFR0_EXS_SHIFT 44
|
||||
#define ID_AA64MMFR0_TGRAN4_2_SHIFT 40
|
||||
#define ID_AA64MMFR0_TGRAN64_2_SHIFT 36
|
||||
#define ID_AA64MMFR0_TGRAN16_2_SHIFT 32
|
||||
@@ -734,6 +740,10 @@
|
||||
#endif
|
||||
|
||||
/* id_aa64mmfr1 */
|
||||
#define ID_AA64MMFR1_ETS_SHIFT 36
|
||||
#define ID_AA64MMFR1_TWED_SHIFT 32
|
||||
#define ID_AA64MMFR1_XNX_SHIFT 28
|
||||
#define ID_AA64MMFR1_SPECSEI_SHIFT 24
|
||||
#define ID_AA64MMFR1_PAN_SHIFT 20
|
||||
#define ID_AA64MMFR1_LOR_SHIFT 16
|
||||
#define ID_AA64MMFR1_HPD_SHIFT 12
|
||||
@@ -746,8 +756,15 @@
|
||||
|
||||
/* id_aa64mmfr2 */
|
||||
#define ID_AA64MMFR2_E0PD_SHIFT 60
|
||||
#define ID_AA64MMFR2_EVT_SHIFT 56
|
||||
#define ID_AA64MMFR2_BBM_SHIFT 52
|
||||
#define ID_AA64MMFR2_TTL_SHIFT 48
|
||||
#define ID_AA64MMFR2_FWB_SHIFT 40
|
||||
#define ID_AA64MMFR2_IDS_SHIFT 36
|
||||
#define ID_AA64MMFR2_AT_SHIFT 32
|
||||
#define ID_AA64MMFR2_ST_SHIFT 28
|
||||
#define ID_AA64MMFR2_NV_SHIFT 24
|
||||
#define ID_AA64MMFR2_CCIDX_SHIFT 20
|
||||
#define ID_AA64MMFR2_LVA_SHIFT 16
|
||||
#define ID_AA64MMFR2_IESB_SHIFT 12
|
||||
#define ID_AA64MMFR2_LSM_SHIFT 8
|
||||
@@ -755,6 +772,7 @@
|
||||
#define ID_AA64MMFR2_CNP_SHIFT 0
|
||||
|
||||
/* id_aa64dfr0 */
|
||||
#define ID_AA64DFR0_DOUBLELOCK_SHIFT 36
|
||||
#define ID_AA64DFR0_PMSVER_SHIFT 32
|
||||
#define ID_AA64DFR0_CTX_CMPS_SHIFT 28
|
||||
#define ID_AA64DFR0_WRPS_SHIFT 20
|
||||
@@ -807,18 +825,40 @@
|
||||
#define ID_ISAR6_DP_SHIFT 4
|
||||
#define ID_ISAR6_JSCVT_SHIFT 0
|
||||
|
||||
#define ID_MMFR0_INNERSHR_SHIFT 28
|
||||
#define ID_MMFR0_FCSE_SHIFT 24
|
||||
#define ID_MMFR0_AUXREG_SHIFT 20
|
||||
#define ID_MMFR0_TCM_SHIFT 16
|
||||
#define ID_MMFR0_SHARELVL_SHIFT 12
|
||||
#define ID_MMFR0_OUTERSHR_SHIFT 8
|
||||
#define ID_MMFR0_PMSA_SHIFT 4
|
||||
#define ID_MMFR0_VMSA_SHIFT 0
|
||||
|
||||
#define ID_MMFR4_EVT_SHIFT 28
|
||||
#define ID_MMFR4_CCIDX_SHIFT 24
|
||||
#define ID_MMFR4_LSM_SHIFT 20
|
||||
#define ID_MMFR4_HPDS_SHIFT 16
|
||||
#define ID_MMFR4_CNP_SHIFT 12
|
||||
#define ID_MMFR4_XNX_SHIFT 8
|
||||
#define ID_MMFR4_AC2_SHIFT 4
|
||||
#define ID_MMFR4_SPECSEI_SHIFT 0
|
||||
|
||||
#define ID_MMFR5_ETS_SHIFT 0
|
||||
|
||||
#define ID_PFR0_DIT_SHIFT 24
|
||||
#define ID_PFR0_CSV2_SHIFT 16
|
||||
#define ID_PFR0_STATE3_SHIFT 12
|
||||
#define ID_PFR0_STATE2_SHIFT 8
|
||||
#define ID_PFR0_STATE1_SHIFT 4
|
||||
#define ID_PFR0_STATE0_SHIFT 0
|
||||
|
||||
#define ID_DFR0_PERFMON_SHIFT 24
|
||||
#define ID_DFR0_MPROFDBG_SHIFT 20
|
||||
#define ID_DFR0_MMAPTRC_SHIFT 16
|
||||
#define ID_DFR0_COPTRC_SHIFT 12
|
||||
#define ID_DFR0_MMAPDBG_SHIFT 8
|
||||
#define ID_DFR0_COPSDBG_SHIFT 4
|
||||
#define ID_DFR0_COPDBG_SHIFT 0
|
||||
|
||||
#define ID_PFR2_SSBS_SHIFT 4
|
||||
#define ID_PFR2_CSV3_SHIFT 0
|
||||
@@ -861,6 +901,11 @@
|
||||
#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED
|
||||
#endif
|
||||
|
||||
#define MVFR2_FPMISC_SHIFT 4
|
||||
#define MVFR2_SIMDMISC_SHIFT 0
|
||||
|
||||
#define DCZID_DZP_SHIFT 4
|
||||
#define DCZID_BS_SHIFT 0
|
||||
|
||||
/*
|
||||
* The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which
|
||||
|
||||
@@ -21,11 +21,37 @@ static void tlb_flush(struct mmu_gather *tlb);
|
||||
|
||||
#include <asm-generic/tlb.h>
|
||||
|
||||
/*
|
||||
* get the tlbi levels in arm64. Default value is 0 if more than one
|
||||
* of cleared_* is set or neither is set.
|
||||
* Arm64 doesn't support p4ds now.
|
||||
*/
|
||||
static inline int tlb_get_level(struct mmu_gather *tlb)
|
||||
{
|
||||
if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
|
||||
tlb->cleared_puds ||
|
||||
tlb->cleared_p4ds))
|
||||
return 3;
|
||||
|
||||
if (tlb->cleared_pmds && !(tlb->cleared_ptes ||
|
||||
tlb->cleared_puds ||
|
||||
tlb->cleared_p4ds))
|
||||
return 2;
|
||||
|
||||
if (tlb->cleared_puds && !(tlb->cleared_ptes ||
|
||||
tlb->cleared_pmds ||
|
||||
tlb->cleared_p4ds))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void tlb_flush(struct mmu_gather *tlb)
|
||||
{
|
||||
struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
|
||||
bool last_level = !tlb->freed_tables;
|
||||
unsigned long stride = tlb_get_unmap_size(tlb);
|
||||
int tlb_level = tlb_get_level(tlb);
|
||||
|
||||
/*
|
||||
* If we're tearing down the address space then we only care about
|
||||
@@ -38,7 +64,8 @@ static inline void tlb_flush(struct mmu_gather *tlb)
|
||||
return;
|
||||
}
|
||||
|
||||
__flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level);
|
||||
__flush_tlb_range(&vma, tlb->start, tlb->end, stride,
|
||||
last_level, tlb_level);
|
||||
}
|
||||
|
||||
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/cputype.h>
|
||||
@@ -59,6 +60,102 @@
|
||||
__ta; \
|
||||
})
|
||||
|
||||
/*
|
||||
* Get translation granule of the system, which is decided by
|
||||
* PAGE_SIZE. Used by TTL.
|
||||
* - 4KB : 1
|
||||
* - 16KB : 2
|
||||
* - 64KB : 3
|
||||
*/
|
||||
#define TLBI_TTL_TG_4K 1
|
||||
#define TLBI_TTL_TG_16K 2
|
||||
#define TLBI_TTL_TG_64K 3
|
||||
|
||||
static inline unsigned long get_trans_granule(void)
|
||||
{
|
||||
switch (PAGE_SIZE) {
|
||||
case SZ_4K:
|
||||
return TLBI_TTL_TG_4K;
|
||||
case SZ_16K:
|
||||
return TLBI_TTL_TG_16K;
|
||||
case SZ_64K:
|
||||
return TLBI_TTL_TG_64K;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Level-based TLBI operations.
|
||||
*
|
||||
* When ARMv8.4-TTL exists, TLBI operations take an additional hint for
|
||||
* the level at which the invalidation must take place. If the level is
|
||||
* wrong, no invalidation may take place. In the case where the level
|
||||
* cannot be easily determined, a 0 value for the level parameter will
|
||||
* perform a non-hinted invalidation.
|
||||
*
|
||||
* For Stage-2 invalidation, use the level values provided to that effect
|
||||
* in asm/stage2_pgtable.h.
|
||||
*/
|
||||
#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
|
||||
|
||||
#define __tlbi_level(op, addr, level) do { \
|
||||
u64 arg = addr; \
|
||||
\
|
||||
if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \
|
||||
level) { \
|
||||
u64 ttl = level & 3; \
|
||||
ttl |= get_trans_granule() << 2; \
|
||||
arg &= ~TLBI_TTL_MASK; \
|
||||
arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \
|
||||
} \
|
||||
\
|
||||
__tlbi(op, arg); \
|
||||
} while(0)
|
||||
|
||||
#define __tlbi_user_level(op, arg, level) do { \
|
||||
if (arm64_kernel_unmapped_at_el0()) \
|
||||
__tlbi_level(op, (arg | USER_ASID_FLAG), level); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* This macro creates a properly formatted VA operand for the TLB RANGE.
|
||||
* The value bit assignments are:
|
||||
*
|
||||
* +----------+------+-------+-------+-------+----------------------+
|
||||
* | ASID | TG | SCALE | NUM | TTL | BADDR |
|
||||
* +-----------------+-------+-------+-------+----------------------+
|
||||
* |63 48|47 46|45 44|43 39|38 37|36 0|
|
||||
*
|
||||
* The address range is determined by below formula:
|
||||
* [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
|
||||
*
|
||||
*/
|
||||
#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \
|
||||
({ \
|
||||
unsigned long __ta = (addr) >> PAGE_SHIFT; \
|
||||
__ta &= GENMASK_ULL(36, 0); \
|
||||
__ta |= (unsigned long)(ttl) << 37; \
|
||||
__ta |= (unsigned long)(num) << 39; \
|
||||
__ta |= (unsigned long)(scale) << 44; \
|
||||
__ta |= get_trans_granule() << 46; \
|
||||
__ta |= (unsigned long)(asid) << 48; \
|
||||
__ta; \
|
||||
})
|
||||
|
||||
/* These macros are used by the TLBI RANGE feature. */
|
||||
#define __TLBI_RANGE_PAGES(num, scale) \
|
||||
((unsigned long)((num) + 1) << (5 * (scale) + 1))
|
||||
#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
|
||||
|
||||
/*
|
||||
* Generate 'num' values from -1 to 30 with -1 rejected by the
|
||||
* __flush_tlb_range() loop below.
|
||||
*/
|
||||
#define TLBI_RANGE_MASK GENMASK_ULL(4, 0)
|
||||
#define __TLBI_RANGE_NUM(pages, scale) \
|
||||
((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
|
||||
|
||||
/*
|
||||
* TLB Invalidation
|
||||
* ================
|
||||
@@ -179,34 +276,83 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
|
||||
|
||||
static inline void __flush_tlb_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long end,
|
||||
unsigned long stride, bool last_level)
|
||||
unsigned long stride, bool last_level,
|
||||
int tlb_level)
|
||||
{
|
||||
int num = 0;
|
||||
int scale = 0;
|
||||
unsigned long asid = ASID(vma->vm_mm);
|
||||
unsigned long addr;
|
||||
unsigned long pages;
|
||||
|
||||
start = round_down(start, stride);
|
||||
end = round_up(end, stride);
|
||||
pages = (end - start) >> PAGE_SHIFT;
|
||||
|
||||
if ((end - start) >= (MAX_TLBI_OPS * stride)) {
|
||||
/*
|
||||
* When not uses TLB range ops, we can handle up to
|
||||
* (MAX_TLBI_OPS - 1) pages;
|
||||
* When uses TLB range ops, we can handle up to
|
||||
* (MAX_TLBI_RANGE_PAGES - 1) pages.
|
||||
*/
|
||||
if ((!system_supports_tlb_range() &&
|
||||
(end - start) >= (MAX_TLBI_OPS * stride)) ||
|
||||
pages >= MAX_TLBI_RANGE_PAGES) {
|
||||
flush_tlb_mm(vma->vm_mm);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Convert the stride into units of 4k */
|
||||
stride >>= 12;
|
||||
|
||||
start = __TLBI_VADDR(start, asid);
|
||||
end = __TLBI_VADDR(end, asid);
|
||||
|
||||
dsb(ishst);
|
||||
for (addr = start; addr < end; addr += stride) {
|
||||
if (last_level) {
|
||||
__tlbi(vale1is, addr);
|
||||
__tlbi_user(vale1is, addr);
|
||||
} else {
|
||||
__tlbi(vae1is, addr);
|
||||
__tlbi_user(vae1is, addr);
|
||||
|
||||
/*
|
||||
* When the CPU does not support TLB range operations, flush the TLB
|
||||
* entries one by one at the granularity of 'stride'. If the the TLB
|
||||
* range ops are supported, then:
|
||||
*
|
||||
* 1. If 'pages' is odd, flush the first page through non-range
|
||||
* operations;
|
||||
*
|
||||
* 2. For remaining pages: the minimum range granularity is decided
|
||||
* by 'scale', so multiple range TLBI operations may be required.
|
||||
* Start from scale = 0, flush the corresponding number of pages
|
||||
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
|
||||
* until no pages left.
|
||||
*
|
||||
* Note that certain ranges can be represented by either num = 31 and
|
||||
* scale or num = 0 and scale + 1. The loop below favours the latter
|
||||
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
|
||||
*/
|
||||
while (pages > 0) {
|
||||
if (!system_supports_tlb_range() ||
|
||||
pages % 2 == 1) {
|
||||
addr = __TLBI_VADDR(start, asid);
|
||||
if (last_level) {
|
||||
__tlbi_level(vale1is, addr, tlb_level);
|
||||
__tlbi_user_level(vale1is, addr, tlb_level);
|
||||
} else {
|
||||
__tlbi_level(vae1is, addr, tlb_level);
|
||||
__tlbi_user_level(vae1is, addr, tlb_level);
|
||||
}
|
||||
start += stride;
|
||||
pages -= stride >> PAGE_SHIFT;
|
||||
continue;
|
||||
}
|
||||
|
||||
num = __TLBI_RANGE_NUM(pages, scale);
|
||||
if (num >= 0) {
|
||||
addr = __TLBI_VADDR_RANGE(start, asid, scale,
|
||||
num, tlb_level);
|
||||
if (last_level) {
|
||||
__tlbi(rvale1is, addr);
|
||||
__tlbi_user(rvale1is, addr);
|
||||
} else {
|
||||
__tlbi(rvae1is, addr);
|
||||
__tlbi_user(rvae1is, addr);
|
||||
}
|
||||
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
|
||||
pages -= __TLBI_RANGE_PAGES(num, scale);
|
||||
}
|
||||
scale++;
|
||||
}
|
||||
dsb(ish);
|
||||
}
|
||||
@@ -217,8 +363,9 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
|
||||
/*
|
||||
* We cannot use leaf-only invalidation here, since we may be invalidating
|
||||
* table entries as part of collapsing hugepages or moving page tables.
|
||||
* Set the tlb_level to 0 because we can not get enough information here.
|
||||
*/
|
||||
__flush_tlb_range(vma, start, end, PAGE_SIZE, false);
|
||||
__flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
|
||||
}
|
||||
|
||||
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/memory.h>
|
||||
#include <asm/extable.h>
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
*/
|
||||
#define VDSO_LBASE 0x0
|
||||
|
||||
#define __VVAR_PAGES 2
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <generated/vdso-offsets.h>
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/errno.h>
|
||||
|
||||
@@ -152,6 +153,18 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TIME_NS
|
||||
static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
|
||||
{
|
||||
const struct vdso_data *ret;
|
||||
|
||||
/* See __arch_get_vdso_data(). */
|
||||
asm volatile("mov %0, %1" : "=r"(ret) : "r"(_timens_data));
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
|
||||
{
|
||||
return vd->clock_mode == VDSO_CLOCKMODE_ARCHTIMER;
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/unistd.h>
|
||||
|
||||
#define VDSO_HAS_CLOCK_GETRES 1
|
||||
@@ -96,6 +97,14 @@ const struct vdso_data *__arch_get_vdso_data(void)
|
||||
return _vdso_data;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TIME_NS
|
||||
static __always_inline
|
||||
const struct vdso_data *__arch_get_timens_vdso_data(void)
|
||||
{
|
||||
return _timens_data;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
|
||||
|
||||
@@ -74,5 +74,6 @@
|
||||
#define HWCAP2_DGH (1 << 15)
|
||||
#define HWCAP2_RNG (1 << 16)
|
||||
#define HWCAP2_BTI (1 << 17)
|
||||
/* reserved for HWCAP2_MTE (1 << 18) */
|
||||
|
||||
#endif /* _UAPI__ASM_HWCAP_H */
|
||||
|
||||
@@ -179,7 +179,7 @@ struct sve_context {
|
||||
* The same convention applies when returning from a signal: a caller
|
||||
* will need to remove or resize the sve_context block if it wants to
|
||||
* make the SVE registers live when they were previously non-live or
|
||||
* vice-versa. This may require the the caller to allocate fresh
|
||||
* vice-versa. This may require the caller to allocate fresh
|
||||
* memory and/or move other context blocks in the signal frame.
|
||||
*
|
||||
* Changing the vector length during signal return is not permitted:
|
||||
|
||||
Reference in New Issue
Block a user