linux/arch/arm64/include/asm/io.h
Arnd Bergmann 5c40e428ae arm64/io: add constant-argument check
In some configurations __const_iowrite32_copy() does not get inlined
and gcc runs into the BUILD_BUG():

In file included from <command-line>:
In function '__const_memcpy_toio_aligned32',
    inlined from '__const_iowrite32_copy' at arch/arm64/include/asm/io.h:203:3,
    inlined from '__const_iowrite32_copy' at arch/arm64/include/asm/io.h:199:20:
include/linux/compiler_types.h:487:45: error: call to '__compiletime_assert_538' declared with attribute error: BUILD_BUG failed
  487 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
      |                                             ^
include/linux/compiler_types.h:468:25: note: in definition of macro '__compiletime_assert'
  468 |                         prefix ## suffix();                             \
      |                         ^~~~~~
include/linux/compiler_types.h:487:9: note: in expansion of macro '_compiletime_assert'
  487 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
      |         ^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
   39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
      |                                     ^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:59:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
   59 | #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
      |                     ^~~~~~~~~~~~~~~~
arch/arm64/include/asm/io.h:193:17: note: in expansion of macro 'BUILD_BUG'
  193 |                 BUILD_BUG();
      |                 ^~~~~~~~~

Move the check for constant arguments into the inline function to ensure
it is still constant if the compiler decides against inlining it, and
mark them as __always_inline to override the logic that sometimes leads
to the compiler not producing the simplified output.

Note that either the __always_inline annotation or the check for a
constant value are sufficient here, but combining the two looks cleaner
as it also avoids the macro. With clang-8 and older, the macro was still
needed, but all versions of gcc and clang can reliably perform constant
folding here.

Fixes: ead79118da ("arm64/io: Provide a WC friendly __iowriteXX_copy()")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20240604210006.668912-1-arnd@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
2024-06-05 13:30:58 +01:00

318 lines
9.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Based on arch/arm/include/asm/io.h
*
* Copyright (C) 1996-2000 Russell King
* Copyright (C) 2012 ARM Ltd.
*/
#ifndef __ASM_IO_H
#define __ASM_IO_H
#include <linux/types.h>
#include <linux/pgtable.h>
#include <asm/byteorder.h>
#include <asm/barrier.h>
#include <asm/memory.h>
#include <asm/early_ioremap.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
/*
* Generic IO read/write. These perform native-endian accesses.
*/
#define __raw_writeb __raw_writeb
static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr)
{
volatile u8 __iomem *ptr = addr;
asm volatile("strb %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writew __raw_writew
static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr)
{
volatile u16 __iomem *ptr = addr;
asm volatile("strh %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writel __raw_writel
static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
{
volatile u32 __iomem *ptr = addr;
asm volatile("str %w0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_writeq __raw_writeq
static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr)
{
volatile u64 __iomem *ptr = addr;
asm volatile("str %x0, %1" : : "rZ" (val), "Qo" (*ptr));
}
#define __raw_readb __raw_readb
static __always_inline u8 __raw_readb(const volatile void __iomem *addr)
{
u8 val;
asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
"ldarb %w0, [%1]",
ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
: "=r" (val) : "r" (addr));
return val;
}
#define __raw_readw __raw_readw
static __always_inline u16 __raw_readw(const volatile void __iomem *addr)
{
u16 val;
asm volatile(ALTERNATIVE("ldrh %w0, [%1]",
"ldarh %w0, [%1]",
ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
: "=r" (val) : "r" (addr));
return val;
}
#define __raw_readl __raw_readl
static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
{
u32 val;
asm volatile(ALTERNATIVE("ldr %w0, [%1]",
"ldar %w0, [%1]",
ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
: "=r" (val) : "r" (addr));
return val;
}
#define __raw_readq __raw_readq
static __always_inline u64 __raw_readq(const volatile void __iomem *addr)
{
u64 val;
asm volatile(ALTERNATIVE("ldr %0, [%1]",
"ldar %0, [%1]",
ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
: "=r" (val) : "r" (addr));
return val;
}
/* IO barriers */
#define __io_ar(v) \
({ \
unsigned long tmp; \
\
dma_rmb(); \
\
/* \
* Create a dummy control dependency from the IO read to any \
* later instructions. This ensures that a subsequent call to \
* udelay() will be ordered due to the ISB in get_cycles(). \
*/ \
asm volatile("eor %0, %1, %1\n" \
"cbnz %0, ." \
: "=r" (tmp) : "r" ((unsigned long)(v)) \
: "memory"); \
})
#define __io_bw() dma_wmb()
#define __io_br(v)
#define __io_aw(v)
/* arm64-specific, don't use in portable drivers */
#define __iormb(v) __io_ar(v)
#define __iowmb() __io_bw()
#define __iomb() dma_mb()
/*
* I/O port access primitives.
*/
#define arch_has_dev_port() (1)
#define IO_SPACE_LIMIT (PCI_IO_SIZE - 1)
#define PCI_IOBASE ((void __iomem *)PCI_IO_START)
/*
* String version of I/O memory access operations.
*/
extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
extern void __memset_io(volatile void __iomem *, int, size_t);
#define memset_io(c,v,l) __memset_io((c),(v),(l))
#define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l))
#define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l))
/*
* The ARM64 iowrite implementation is intended to support drivers that want to
* use write combining. For instance PCI drivers using write combining with a 64
* byte __iowrite64_copy() expect to get a 64 byte MemWr TLP on the PCIe bus.
*
* Newer ARM core have sensitive write combining buffers, it is important that
* the stores be contiguous blocks of store instructions. Normal memcpy
* approaches have a very low chance to generate write combining.
*
* Since this is the only API on ARM64 that should be used with write combining
* it also integrates the DGH hint which is supposed to lower the latency to
* emit the large TLP from the CPU.
*/
static __always_inline void
__const_memcpy_toio_aligned32(volatile u32 __iomem *to, const u32 *from,
size_t count)
{
switch (count) {
case 8:
asm volatile("str %w0, [%8, #4 * 0]\n"
"str %w1, [%8, #4 * 1]\n"
"str %w2, [%8, #4 * 2]\n"
"str %w3, [%8, #4 * 3]\n"
"str %w4, [%8, #4 * 4]\n"
"str %w5, [%8, #4 * 5]\n"
"str %w6, [%8, #4 * 6]\n"
"str %w7, [%8, #4 * 7]\n"
:
: "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]),
"rZ"(from[3]), "rZ"(from[4]), "rZ"(from[5]),
"rZ"(from[6]), "rZ"(from[7]), "r"(to));
break;
case 4:
asm volatile("str %w0, [%4, #4 * 0]\n"
"str %w1, [%4, #4 * 1]\n"
"str %w2, [%4, #4 * 2]\n"
"str %w3, [%4, #4 * 3]\n"
:
: "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]),
"rZ"(from[3]), "r"(to));
break;
case 2:
asm volatile("str %w0, [%2, #4 * 0]\n"
"str %w1, [%2, #4 * 1]\n"
:
: "rZ"(from[0]), "rZ"(from[1]), "r"(to));
break;
case 1:
__raw_writel(*from, to);
break;
default:
BUILD_BUG();
}
}
void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count);
static __always_inline void
__iowrite32_copy(void __iomem *to, const void *from, size_t count)
{
if (__builtin_constant_p(count) &&
(count == 8 || count == 4 || count == 2 || count == 1)) {
__const_memcpy_toio_aligned32(to, from, count);
dgh();
} else {
__iowrite32_copy_full(to, from, count);
}
}
#define __iowrite32_copy __iowrite32_copy
static __always_inline void
__const_memcpy_toio_aligned64(volatile u64 __iomem *to, const u64 *from,
size_t count)
{
switch (count) {
case 8:
asm volatile("str %x0, [%8, #8 * 0]\n"
"str %x1, [%8, #8 * 1]\n"
"str %x2, [%8, #8 * 2]\n"
"str %x3, [%8, #8 * 3]\n"
"str %x4, [%8, #8 * 4]\n"
"str %x5, [%8, #8 * 5]\n"
"str %x6, [%8, #8 * 6]\n"
"str %x7, [%8, #8 * 7]\n"
:
: "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]),
"rZ"(from[3]), "rZ"(from[4]), "rZ"(from[5]),
"rZ"(from[6]), "rZ"(from[7]), "r"(to));
break;
case 4:
asm volatile("str %x0, [%4, #8 * 0]\n"
"str %x1, [%4, #8 * 1]\n"
"str %x2, [%4, #8 * 2]\n"
"str %x3, [%4, #8 * 3]\n"
:
: "rZ"(from[0]), "rZ"(from[1]), "rZ"(from[2]),
"rZ"(from[3]), "r"(to));
break;
case 2:
asm volatile("str %x0, [%2, #8 * 0]\n"
"str %x1, [%2, #8 * 1]\n"
:
: "rZ"(from[0]), "rZ"(from[1]), "r"(to));
break;
case 1:
__raw_writeq(*from, to);
break;
default:
BUILD_BUG();
}
}
void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count);
static __always_inline void
__iowrite64_copy(void __iomem *to, const void *from, size_t count)
{
if (__builtin_constant_p(count) &&
(count == 8 || count == 4 || count == 2 || count == 1)) {
__const_memcpy_toio_aligned64(to, from, count);
dgh();
} else {
__iowrite64_copy_full(to, from, count);
}
}
#define __iowrite64_copy __iowrite64_copy
/*
* I/O memory mapping functions.
*/
#define ioremap_prot ioremap_prot
#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
#define ioremap_wc(addr, size) \
ioremap_prot((addr), (size), PROT_NORMAL_NC)
#define ioremap_np(addr, size) \
ioremap_prot((addr), (size), PROT_DEVICE_nGnRnE)
/*
* io{read,write}{16,32,64}be() macros
*/
#define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(__v); __v; })
#define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(__v); __v; })
#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(__v); __v; })
#define iowrite16be(v,p) ({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); })
#define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); })
#define iowrite64be(v,p) ({ __iowmb(); __raw_writeq((__force __u64)cpu_to_be64(v), p); })
#include <asm-generic/io.h>
#define ioremap_cache ioremap_cache
static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size)
{
if (pfn_is_map_memory(__phys_to_pfn(addr)))
return (void __iomem *)__phys_to_virt(addr);
return ioremap_prot(addr, size, PROT_NORMAL);
}
/*
* More restrictive address range checking than the default implementation
* (PHYS_OFFSET and PHYS_MASK taken into account).
*/
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
unsigned long flags);
#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap
#endif /* __ASM_IO_H */