65688d2a05
Back in9730348075
("arm64: Increase the max granular size"), ARCH_DMA_MINALIGN was effectively increased to 128 bytes thanks to an increase in L1_CACHE_BYTES due to an unsubstantiated performance claim on the now obsolete ThunderX-1. Although this was reverted ind93277b983
, ARCH_DMA_MINALIGN was kept at 128 bytes byebc7e21e0f
("arm64: Increase ARCH_DMA_MINALIGN to 128"). During discussion of the original patch, it was reported that the change also prevented a warning during boot on (again, now obsolete) Qualcomm server hardware where the cache writeback granule was larger than 64 bytes. The reason for this warning was because non-coherent DMA could lead to data corruption due to unexpected writeback from the CPU where a cacheline is shared with other allocations. Since then, systems have appeared with larger cachelines still, and so commit8f5c9037a5
("arm64/mm: Correct the cache line size warning with non coherent device") reworked the warning so that it only appears on systems where non-coherent DMA is actually required and taints the kernel with TAINT_CPU_OUT_OF_SPEC. We are not aware of any systems, even including the aforementioned obsolete machines, which have a CWG larger than 64 bytes and require non-coherent DMA. More recently, it has been reported that a ARCH_DMA_MINALIGN of 128 bytes wastes considerable memory (~6% immediately after boot on one system). Reduce ARCH_DMA_MINALIGN to 64 bytes and allow the warning/taint to indicate if there are machines that unknowingly rely on this. Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Vincent Whitchurch <vincent.whitchurch@axis.com> Link: https://lore.kernel.org/linux-arm-kernel/1442944788-17254-1-git-send-email-rric@kernel.org/ Link: https://lore.kernel.org/linux-arm-kernel/CAOZdJXUiRMAguDV+HEJqPg57MyBNqEcTyaH+ya=U93NHb-pdJA@mail.gmail.com/ Link: https://lore.kernel.org/linux-arm-kernel/20190614131141.4428-1-msys.mizuma@gmail.com/ Link: https://lore.kernel.org/r/20210517074332.28280-1-vincent.whitchurch@axis.com Acked-by: Catalin Marinas <catalin.marinas@arm.com> Acked-by: Mark Rutland <mark.rutland@arm.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Mark Rutland <mark.rutland@arm.com> Acked-by: Ard Biesheuvel <ardb@kernel.org> Link: https://lore.kernel.org/r/20210527124356.22367-1-will@kernel.org Signed-off-by: Will Deacon <will@kernel.org>
130 lines
3.2 KiB
C
130 lines
3.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
*/
|
|
#ifndef __ASM_CACHE_H
|
|
#define __ASM_CACHE_H
|
|
|
|
#include <asm/cputype.h>
|
|
|
|
#define CTR_L1IP_SHIFT 14
|
|
#define CTR_L1IP_MASK 3
|
|
#define CTR_DMINLINE_SHIFT 16
|
|
#define CTR_IMINLINE_SHIFT 0
|
|
#define CTR_IMINLINE_MASK 0xf
|
|
#define CTR_ERG_SHIFT 20
|
|
#define CTR_CWG_SHIFT 24
|
|
#define CTR_CWG_MASK 15
|
|
#define CTR_IDC_SHIFT 28
|
|
#define CTR_DIC_SHIFT 29
|
|
|
|
#define CTR_CACHE_MINLINE_MASK \
|
|
(0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT)
|
|
|
|
#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
|
|
|
|
#define ICACHE_POLICY_VPIPT 0
|
|
#define ICACHE_POLICY_RESERVED 1
|
|
#define ICACHE_POLICY_VIPT 2
|
|
#define ICACHE_POLICY_PIPT 3
|
|
|
|
#define L1_CACHE_SHIFT (6)
|
|
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
|
|
|
|
|
|
#define CLIDR_LOUU_SHIFT 27
|
|
#define CLIDR_LOC_SHIFT 24
|
|
#define CLIDR_LOUIS_SHIFT 21
|
|
|
|
#define CLIDR_LOUU(clidr) (((clidr) >> CLIDR_LOUU_SHIFT) & 0x7)
|
|
#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7)
|
|
#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7)
|
|
|
|
/*
|
|
* Memory returned by kmalloc() may be used for DMA, so we must make
|
|
* sure that all such allocations are cache aligned. Otherwise,
|
|
* unrelated code may cause parts of the buffer to be read into the
|
|
* cache before the transfer is done, causing old data to be seen by
|
|
* the CPU.
|
|
*/
|
|
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
|
|
|
|
#ifdef CONFIG_KASAN_SW_TAGS
|
|
#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
|
|
#elif defined(CONFIG_KASAN_HW_TAGS)
|
|
#define ARCH_SLAB_MINALIGN MTE_GRANULE_SIZE
|
|
#endif
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <linux/bitops.h>
|
|
|
|
#define ICACHEF_ALIASING 0
|
|
#define ICACHEF_VPIPT 1
|
|
extern unsigned long __icache_flags;
|
|
|
|
/*
|
|
* Whilst the D-side always behaves as PIPT on AArch64, aliasing is
|
|
* permitted in the I-cache.
|
|
*/
|
|
static inline int icache_is_aliasing(void)
|
|
{
|
|
return test_bit(ICACHEF_ALIASING, &__icache_flags);
|
|
}
|
|
|
|
static __always_inline int icache_is_vpipt(void)
|
|
{
|
|
return test_bit(ICACHEF_VPIPT, &__icache_flags);
|
|
}
|
|
|
|
static inline u32 cache_type_cwg(void)
|
|
{
|
|
return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK;
|
|
}
|
|
|
|
#define __read_mostly __section(".data..read_mostly")
|
|
|
|
static inline int cache_line_size_of_cpu(void)
|
|
{
|
|
u32 cwg = cache_type_cwg();
|
|
|
|
return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
|
|
}
|
|
|
|
int cache_line_size(void);
|
|
|
|
/*
|
|
* Read the effective value of CTR_EL0.
|
|
*
|
|
* According to ARM ARM for ARMv8-A (ARM DDI 0487C.a),
|
|
* section D10.2.33 "CTR_EL0, Cache Type Register" :
|
|
*
|
|
* CTR_EL0.IDC reports the data cache clean requirements for
|
|
* instruction to data coherence.
|
|
*
|
|
* 0 - dcache clean to PoU is required unless :
|
|
* (CLIDR_EL1.LoC == 0) || (CLIDR_EL1.LoUIS == 0 && CLIDR_EL1.LoUU == 0)
|
|
* 1 - dcache clean to PoU is not required for i-to-d coherence.
|
|
*
|
|
* This routine provides the CTR_EL0 with the IDC field updated to the
|
|
* effective state.
|
|
*/
|
|
static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void)
|
|
{
|
|
u32 ctr = read_cpuid_cachetype();
|
|
|
|
if (!(ctr & BIT(CTR_IDC_SHIFT))) {
|
|
u64 clidr = read_sysreg(clidr_el1);
|
|
|
|
if (CLIDR_LOC(clidr) == 0 ||
|
|
(CLIDR_LOUIS(clidr) == 0 && CLIDR_LOUU(clidr) == 0))
|
|
ctr |= BIT(CTR_IDC_SHIFT);
|
|
}
|
|
|
|
return ctr;
|
|
}
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif
|