From 637be9183e0475c430fc77162c222bcaab887989 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 00:07:46 +0200 Subject: [PATCH 01/13] asm-generic: use asm-generic/unaligned.h for most architectures There are several architectures that just duplicate the contents of asm-generic/unaligned.h, so change those over to use the file directly, to make future modifications easier. The exceptions are: - arm32 sets HAVE_EFFICIENT_UNALIGNED_ACCESS, but wants the unaligned-struct version - ppc64le disables HAVE_EFFICIENT_UNALIGNED_ACCESS but includes the access-ok version - most m68k also uses the access-ok version without setting HAVE_EFFICIENT_UNALIGNED_ACCESS. - sh4a has a custom inline asm version - openrisc is the only one using the memmove version that generally leads to worse code. Signed-off-by: Arnd Bergmann Reviewed-by: Thomas Gleixner Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven --- arch/alpha/include/asm/unaligned.h | 12 ---------- arch/ia64/include/asm/unaligned.h | 12 ---------- arch/m68k/include/asm/unaligned.h | 9 +------- arch/microblaze/include/asm/unaligned.h | 27 ----------------------- arch/parisc/include/asm/unaligned.h | 6 +---- arch/sparc/include/asm/unaligned.h | 11 ---------- arch/x86/include/asm/unaligned.h | 15 ------------- arch/xtensa/include/asm/unaligned.h | 29 ------------------------- 8 files changed, 2 insertions(+), 119 deletions(-) delete mode 100644 arch/alpha/include/asm/unaligned.h delete mode 100644 arch/ia64/include/asm/unaligned.h delete mode 100644 arch/microblaze/include/asm/unaligned.h delete mode 100644 arch/sparc/include/asm/unaligned.h delete mode 100644 arch/x86/include/asm/unaligned.h delete mode 100644 arch/xtensa/include/asm/unaligned.h diff --git a/arch/alpha/include/asm/unaligned.h b/arch/alpha/include/asm/unaligned.h deleted file mode 100644 index 863c807b66f8..000000000000 --- a/arch/alpha/include/asm/unaligned.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ALPHA_UNALIGNED_H -#define _ASM_ALPHA_UNALIGNED_H - -#include -#include -#include - -#define get_unaligned __get_unaligned_le -#define put_unaligned __put_unaligned_le - -#endif /* _ASM_ALPHA_UNALIGNED_H */ diff --git a/arch/ia64/include/asm/unaligned.h b/arch/ia64/include/asm/unaligned.h deleted file mode 100644 index 328942e3cbce..000000000000 --- a/arch/ia64/include/asm/unaligned.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_IA64_UNALIGNED_H -#define _ASM_IA64_UNALIGNED_H - -#include -#include -#include - -#define get_unaligned __get_unaligned_le -#define put_unaligned __put_unaligned_le - -#endif /* _ASM_IA64_UNALIGNED_H */ diff --git a/arch/m68k/include/asm/unaligned.h b/arch/m68k/include/asm/unaligned.h index 98c8930d3d35..84e437337344 100644 --- a/arch/m68k/include/asm/unaligned.h +++ b/arch/m68k/include/asm/unaligned.h @@ -2,15 +2,8 @@ #ifndef _ASM_M68K_UNALIGNED_H #define _ASM_M68K_UNALIGNED_H - #ifdef CONFIG_CPU_HAS_NO_UNALIGNED -#include -#include -#include - -#define get_unaligned __get_unaligned_be -#define put_unaligned __put_unaligned_be - +#include #else /* * The m68k can do unaligned accesses itself. diff --git a/arch/microblaze/include/asm/unaligned.h b/arch/microblaze/include/asm/unaligned.h deleted file mode 100644 index 448299beab69..000000000000 --- a/arch/microblaze/include/asm/unaligned.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2008 Michal Simek - * Copyright (C) 2006 Atmark Techno, Inc. - */ - -#ifndef _ASM_MICROBLAZE_UNALIGNED_H -#define _ASM_MICROBLAZE_UNALIGNED_H - -# ifdef __KERNEL__ - -# ifdef __MICROBLAZEEL__ -# include -# include -# define get_unaligned __get_unaligned_le -# define put_unaligned __put_unaligned_le -# else -# include -# include -# define get_unaligned __get_unaligned_be -# define put_unaligned __put_unaligned_be -# endif - -# include - -# endif /* __KERNEL__ */ -#endif /* _ASM_MICROBLAZE_UNALIGNED_H */ diff --git a/arch/parisc/include/asm/unaligned.h b/arch/parisc/include/asm/unaligned.h index e9029c7c2a69..3bda16773ba6 100644 --- a/arch/parisc/include/asm/unaligned.h +++ b/arch/parisc/include/asm/unaligned.h @@ -2,11 +2,7 @@ #ifndef _ASM_PARISC_UNALIGNED_H #define _ASM_PARISC_UNALIGNED_H -#include -#include -#include -#define get_unaligned __get_unaligned_be -#define put_unaligned __put_unaligned_be +#include #ifdef __KERNEL__ struct pt_regs; diff --git a/arch/sparc/include/asm/unaligned.h b/arch/sparc/include/asm/unaligned.h deleted file mode 100644 index 7971d89d2f54..000000000000 --- a/arch/sparc/include/asm/unaligned.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SPARC_UNALIGNED_H -#define _ASM_SPARC_UNALIGNED_H - -#include -#include -#include -#define get_unaligned __get_unaligned_be -#define put_unaligned __put_unaligned_be - -#endif /* _ASM_SPARC_UNALIGNED_H */ diff --git a/arch/x86/include/asm/unaligned.h b/arch/x86/include/asm/unaligned.h deleted file mode 100644 index 9c754a7447aa..000000000000 --- a/arch/x86/include/asm/unaligned.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_UNALIGNED_H -#define _ASM_X86_UNALIGNED_H - -/* - * The x86 can do unaligned accesses itself. - */ - -#include -#include - -#define get_unaligned __get_unaligned_le -#define put_unaligned __put_unaligned_le - -#endif /* _ASM_X86_UNALIGNED_H */ diff --git a/arch/xtensa/include/asm/unaligned.h b/arch/xtensa/include/asm/unaligned.h deleted file mode 100644 index 8e7ed046bfed..000000000000 --- a/arch/xtensa/include/asm/unaligned.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Xtensa doesn't handle unaligned accesses efficiently. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2001 - 2005 Tensilica Inc. - */ -#ifndef _ASM_XTENSA_UNALIGNED_H -#define _ASM_XTENSA_UNALIGNED_H - -#include - -#ifdef __LITTLE_ENDIAN -# include -# include -# include -# define get_unaligned __get_unaligned_le -# define put_unaligned __put_unaligned_le -#else -# include -# include -# include -# define get_unaligned __get_unaligned_be -# define put_unaligned __put_unaligned_be -#endif - -#endif /* _ASM_XTENSA_UNALIGNED_H */ From bf067edf5d2f5b2948ee7197974a719aae3e526c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 00:07:47 +0200 Subject: [PATCH 02/13] openrisc: always use unaligned-struct header openrisc is the only architecture using the linux/unaligned/*memmove infrastructure. There is a comment saying that this version is more efficient, but this was added in 2011 before the openrisc gcc port was merged upstream. I checked a couple of files to see what the actual difference is with the mainline gcc (9.4 and 11.1), and found that the generic header seems to produce better code now, regardless of the gcc version. Specifically, the be_memmove leads to allocating a stack slot and copying the data one byte at a time, then reading the whole word from the stack: 00000000 : 0: 9c 21 ff f4 l.addi r1,r1,-12 4: d4 01 10 04 l.sw 4(r1),r2 8: 8e 63 00 00 l.lbz r19,0(r3) c: 9c 41 00 0c l.addi r2,r1,12 10: 8e 23 00 01 l.lbz r17,1(r3) 14: db e2 9f f4 l.sb -12(r2),r19 18: db e2 8f f5 l.sb -11(r2),r17 1c: 8e 63 00 02 l.lbz r19,2(r3) 20: 8e 23 00 03 l.lbz r17,3(r3) 24: d4 01 48 08 l.sw 8(r1),r9 28: db e2 9f f6 l.sb -10(r2),r19 2c: db e2 8f f7 l.sb -9(r2),r17 30: 85 62 ff f4 l.lwz r11,-12(r2) 34: 85 21 00 08 l.lwz r9,8(r1) 38: 84 41 00 04 l.lwz r2,4(r1) 3c: 44 00 48 00 l.jr r9 40: 9c 21 00 0c l.addi r1,r1,12 while the be_struct version reads each byte into a register and does a shift to the right position: 00000000 : 0: 9c 21 ff f8 l.addi r1,r1,-8 4: 8e 63 00 00 l.lbz r19,0(r3) 8: aa 20 00 18 l.ori r17,r0,0x18 c: e2 73 88 08 l.sll r19,r19,r17 10: 8d 63 00 01 l.lbz r11,1(r3) 14: aa 20 00 10 l.ori r17,r0,0x10 18: e1 6b 88 08 l.sll r11,r11,r17 1c: e1 6b 98 04 l.or r11,r11,r19 20: 8e 23 00 02 l.lbz r17,2(r3) 24: aa 60 00 08 l.ori r19,r0,0x8 28: e2 31 98 08 l.sll r17,r17,r19 2c: d4 01 10 00 l.sw 0(r1),r2 30: d4 01 48 04 l.sw 4(r1),r9 34: 9c 41 00 08 l.addi r2,r1,8 38: e2 31 58 04 l.or r17,r17,r11 3c: 8d 63 00 03 l.lbz r11,3(r3) 40: e1 6b 88 04 l.or r11,r11,r17 44: 84 41 00 00 l.lwz r2,0(r1) 48: 85 21 00 04 l.lwz r9,4(r1) 4c: 44 00 48 00 l.jr r9 50: 9c 21 00 08 l.addi r1,r1,8 According to Stafford Horne, the new version should in fact perform better. In the trivial example, the struct version is a few instructions longer, but building a whole kernel shows an overall reduction in code size, presumably because it now has to manage fewer stack slots: text data bss dec hex filename 4792010 181480 82324 5055814 4d2546 vmlinux-unaligned-memmove 4790642 181480 82324 5054446 4d1fee vmlinux-unaligned-struct Remove the memmove version completely and let openrisc use the same code as everyone else, as a simplification. Signed-off-by: Arnd Bergmann Acked-by: Stafford Horne --- arch/openrisc/include/asm/unaligned.h | 47 --------------------------- include/linux/unaligned/be_memmove.h | 37 --------------------- include/linux/unaligned/le_memmove.h | 37 --------------------- include/linux/unaligned/memmove.h | 46 -------------------------- 4 files changed, 167 deletions(-) delete mode 100644 arch/openrisc/include/asm/unaligned.h delete mode 100644 include/linux/unaligned/be_memmove.h delete mode 100644 include/linux/unaligned/le_memmove.h delete mode 100644 include/linux/unaligned/memmove.h diff --git a/arch/openrisc/include/asm/unaligned.h b/arch/openrisc/include/asm/unaligned.h deleted file mode 100644 index 14353f2101f2..000000000000 --- a/arch/openrisc/include/asm/unaligned.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * OpenRISC Linux - * - * Linux architectural port borrowing liberally from similar works of - * others. All original copyrights apply as per the original source - * declaration. - * - * OpenRISC implementation: - * Copyright (C) 2003 Matjaz Breskvar - * Copyright (C) 2010-2011 Jonas Bonn - * et al. - */ - -#ifndef __ASM_OPENRISC_UNALIGNED_H -#define __ASM_OPENRISC_UNALIGNED_H - -/* - * This is copied from the generic implementation and the C-struct - * variant replaced with the memmove variant. The GCC compiler - * for the OR32 arch optimizes too aggressively for the C-struct - * variant to work, so use the memmove variant instead. - * - * It may be worth considering implementing the unaligned access - * exception handler and allowing unaligned accesses (access_ok.h)... - * not sure if it would be much of a performance win without further - * investigation. - */ -#include - -#if defined(__LITTLE_ENDIAN) -# include -# include -# include -# define get_unaligned __get_unaligned_le -# define put_unaligned __put_unaligned_le -#elif defined(__BIG_ENDIAN) -# include -# include -# include -# define get_unaligned __get_unaligned_be -# define put_unaligned __put_unaligned_be -#else -# error need to define endianess -#endif - -#endif /* __ASM_OPENRISC_UNALIGNED_H */ diff --git a/include/linux/unaligned/be_memmove.h b/include/linux/unaligned/be_memmove.h deleted file mode 100644 index 7164214a4ba1..000000000000 --- a/include/linux/unaligned/be_memmove.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_UNALIGNED_BE_MEMMOVE_H -#define _LINUX_UNALIGNED_BE_MEMMOVE_H - -#include - -static inline u16 get_unaligned_be16(const void *p) -{ - return __get_unaligned_memmove16((const u8 *)p); -} - -static inline u32 get_unaligned_be32(const void *p) -{ - return __get_unaligned_memmove32((const u8 *)p); -} - -static inline u64 get_unaligned_be64(const void *p) -{ - return __get_unaligned_memmove64((const u8 *)p); -} - -static inline void put_unaligned_be16(u16 val, void *p) -{ - __put_unaligned_memmove16(val, p); -} - -static inline void put_unaligned_be32(u32 val, void *p) -{ - __put_unaligned_memmove32(val, p); -} - -static inline void put_unaligned_be64(u64 val, void *p) -{ - __put_unaligned_memmove64(val, p); -} - -#endif /* _LINUX_UNALIGNED_LE_MEMMOVE_H */ diff --git a/include/linux/unaligned/le_memmove.h b/include/linux/unaligned/le_memmove.h deleted file mode 100644 index 9202e864d026..000000000000 --- a/include/linux/unaligned/le_memmove.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_UNALIGNED_LE_MEMMOVE_H -#define _LINUX_UNALIGNED_LE_MEMMOVE_H - -#include - -static inline u16 get_unaligned_le16(const void *p) -{ - return __get_unaligned_memmove16((const u8 *)p); -} - -static inline u32 get_unaligned_le32(const void *p) -{ - return __get_unaligned_memmove32((const u8 *)p); -} - -static inline u64 get_unaligned_le64(const void *p) -{ - return __get_unaligned_memmove64((const u8 *)p); -} - -static inline void put_unaligned_le16(u16 val, void *p) -{ - __put_unaligned_memmove16(val, p); -} - -static inline void put_unaligned_le32(u32 val, void *p) -{ - __put_unaligned_memmove32(val, p); -} - -static inline void put_unaligned_le64(u64 val, void *p) -{ - __put_unaligned_memmove64(val, p); -} - -#endif /* _LINUX_UNALIGNED_LE_MEMMOVE_H */ diff --git a/include/linux/unaligned/memmove.h b/include/linux/unaligned/memmove.h deleted file mode 100644 index ac71b53bc6dc..000000000000 --- a/include/linux/unaligned/memmove.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_UNALIGNED_MEMMOVE_H -#define _LINUX_UNALIGNED_MEMMOVE_H - -#include -#include - -/* Use memmove here, so gcc does not insert a __builtin_memcpy. */ - -static inline u16 __get_unaligned_memmove16(const void *p) -{ - u16 tmp; - memmove(&tmp, p, 2); - return tmp; -} - -static inline u32 __get_unaligned_memmove32(const void *p) -{ - u32 tmp; - memmove(&tmp, p, 4); - return tmp; -} - -static inline u64 __get_unaligned_memmove64(const void *p) -{ - u64 tmp; - memmove(&tmp, p, 8); - return tmp; -} - -static inline void __put_unaligned_memmove16(u16 val, void *p) -{ - memmove(p, &val, 2); -} - -static inline void __put_unaligned_memmove32(u32 val, void *p) -{ - memmove(p, &val, 4); -} - -static inline void __put_unaligned_memmove64(u64 val, void *p) -{ - memmove(p, &val, 8); -} - -#endif /* _LINUX_UNALIGNED_MEMMOVE_H */ From 94528b70781a4c63db9e1309d4cdbcaef2387904 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 00:07:48 +0200 Subject: [PATCH 03/13] sh: remove unaligned access for sh4a Unlike every other architecture, sh4a uses an inline asm implementation for get_unaligned(). I have shown that this produces better object code than the asm-generic version. However, there are very few users of arch/sh/ overall, and most of those seem to use sh4 rather than sh4a CPU cores, so it seems not worth keeping the complexity in the architecture independent code. Change over to the generic version to allow simplifying that in a follow-up patch. If there are sh4a users that want the best performance, it would probably be best to add support for the movua instruction in gcc itself, as this would not just help get_unaligned() callers but any code that accesses a __packed variable in user space or kernel. Signed-off-by: Arnd Bergmann --- arch/sh/include/asm/unaligned-sh4a.h | 199 --------------------------- arch/sh/include/asm/unaligned.h | 13 -- 2 files changed, 212 deletions(-) delete mode 100644 arch/sh/include/asm/unaligned-sh4a.h delete mode 100644 arch/sh/include/asm/unaligned.h diff --git a/arch/sh/include/asm/unaligned-sh4a.h b/arch/sh/include/asm/unaligned-sh4a.h deleted file mode 100644 index d311f00ed530..000000000000 --- a/arch/sh/include/asm/unaligned-sh4a.h +++ /dev/null @@ -1,199 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_SH_UNALIGNED_SH4A_H -#define __ASM_SH_UNALIGNED_SH4A_H - -/* - * SH-4A has support for unaligned 32-bit loads, and 32-bit loads only. - * Support for 64-bit accesses are done through shifting and masking - * relative to the endianness. Unaligned stores are not supported by the - * instruction encoding, so these continue to use the packed - * struct. - * - * The same note as with the movli.l/movco.l pair applies here, as long - * as the load is guaranteed to be inlined, nothing else will hook in to - * r0 and we get the return value for free. - * - * NOTE: Due to the fact we require r0 encoding, care should be taken to - * avoid mixing these heavily with other r0 consumers, such as the atomic - * ops. Failure to adhere to this can result in the compiler running out - * of spill registers and blowing up when building at low optimization - * levels. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34777. - */ -#include -#include -#include - -static inline u16 sh4a_get_unaligned_cpu16(const u8 *p) -{ -#ifdef __LITTLE_ENDIAN - return p[0] | p[1] << 8; -#else - return p[0] << 8 | p[1]; -#endif -} - -static __always_inline u32 sh4a_get_unaligned_cpu32(const u8 *p) -{ - unsigned long unaligned; - - __asm__ __volatile__ ( - "movua.l @%1, %0\n\t" - : "=z" (unaligned) - : "r" (p) - ); - - return unaligned; -} - -/* - * Even though movua.l supports auto-increment on the read side, it can - * only store to r0 due to instruction encoding constraints, so just let - * the compiler sort it out on its own. - */ -static inline u64 sh4a_get_unaligned_cpu64(const u8 *p) -{ -#ifdef __LITTLE_ENDIAN - return (u64)sh4a_get_unaligned_cpu32(p + 4) << 32 | - sh4a_get_unaligned_cpu32(p); -#else - return (u64)sh4a_get_unaligned_cpu32(p) << 32 | - sh4a_get_unaligned_cpu32(p + 4); -#endif -} - -static inline u16 get_unaligned_le16(const void *p) -{ - return le16_to_cpu(sh4a_get_unaligned_cpu16(p)); -} - -static inline u32 get_unaligned_le32(const void *p) -{ - return le32_to_cpu(sh4a_get_unaligned_cpu32(p)); -} - -static inline u64 get_unaligned_le64(const void *p) -{ - return le64_to_cpu(sh4a_get_unaligned_cpu64(p)); -} - -static inline u16 get_unaligned_be16(const void *p) -{ - return be16_to_cpu(sh4a_get_unaligned_cpu16(p)); -} - -static inline u32 get_unaligned_be32(const void *p) -{ - return be32_to_cpu(sh4a_get_unaligned_cpu32(p)); -} - -static inline u64 get_unaligned_be64(const void *p) -{ - return be64_to_cpu(sh4a_get_unaligned_cpu64(p)); -} - -static inline void nonnative_put_le16(u16 val, u8 *p) -{ - *p++ = val; - *p++ = val >> 8; -} - -static inline void nonnative_put_le32(u32 val, u8 *p) -{ - nonnative_put_le16(val, p); - nonnative_put_le16(val >> 16, p + 2); -} - -static inline void nonnative_put_le64(u64 val, u8 *p) -{ - nonnative_put_le32(val, p); - nonnative_put_le32(val >> 32, p + 4); -} - -static inline void nonnative_put_be16(u16 val, u8 *p) -{ - *p++ = val >> 8; - *p++ = val; -} - -static inline void nonnative_put_be32(u32 val, u8 *p) -{ - nonnative_put_be16(val >> 16, p); - nonnative_put_be16(val, p + 2); -} - -static inline void nonnative_put_be64(u64 val, u8 *p) -{ - nonnative_put_be32(val >> 32, p); - nonnative_put_be32(val, p + 4); -} - -static inline void put_unaligned_le16(u16 val, void *p) -{ -#ifdef __LITTLE_ENDIAN - __put_unaligned_cpu16(val, p); -#else - nonnative_put_le16(val, p); -#endif -} - -static inline void put_unaligned_le32(u32 val, void *p) -{ -#ifdef __LITTLE_ENDIAN - __put_unaligned_cpu32(val, p); -#else - nonnative_put_le32(val, p); -#endif -} - -static inline void put_unaligned_le64(u64 val, void *p) -{ -#ifdef __LITTLE_ENDIAN - __put_unaligned_cpu64(val, p); -#else - nonnative_put_le64(val, p); -#endif -} - -static inline void put_unaligned_be16(u16 val, void *p) -{ -#ifdef __BIG_ENDIAN - __put_unaligned_cpu16(val, p); -#else - nonnative_put_be16(val, p); -#endif -} - -static inline void put_unaligned_be32(u32 val, void *p) -{ -#ifdef __BIG_ENDIAN - __put_unaligned_cpu32(val, p); -#else - nonnative_put_be32(val, p); -#endif -} - -static inline void put_unaligned_be64(u64 val, void *p) -{ -#ifdef __BIG_ENDIAN - __put_unaligned_cpu64(val, p); -#else - nonnative_put_be64(val, p); -#endif -} - -/* - * While it's a bit non-obvious, even though the generic le/be wrappers - * use the __get/put_xxx prefixing, they actually wrap in to the - * non-prefixed get/put_xxx variants as provided above. - */ -#include - -#ifdef __LITTLE_ENDIAN -# define get_unaligned __get_unaligned_le -# define put_unaligned __put_unaligned_le -#else -# define get_unaligned __get_unaligned_be -# define put_unaligned __put_unaligned_be -#endif - -#endif /* __ASM_SH_UNALIGNED_SH4A_H */ diff --git a/arch/sh/include/asm/unaligned.h b/arch/sh/include/asm/unaligned.h deleted file mode 100644 index 0c92e2c73af4..000000000000 --- a/arch/sh/include/asm/unaligned.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_SH_UNALIGNED_H -#define _ASM_SH_UNALIGNED_H - -#ifdef CONFIG_CPU_SH4A -/* SH-4A can handle unaligned loads in a relatively neutered fashion. */ -#include -#else -/* Otherwise, SH can't handle unaligned accesses. */ -#include -#endif - -#endif /* _ASM_SH_UNALIGNED_H */ From 3aec1db05d1ba631311beca4715655591a9e17d5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 00:07:49 +0200 Subject: [PATCH 04/13] m68k: select CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS All supported CPUs other than the old dragonball and in theory other 68000 derivatives use the include/linux/unaligned/access_ok.h implementation for accessing unaligned variables, so presumably this works everywhere. However, m68k never selects CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, so none of the other conditionals in the kernel get the optimized implementation. Select this based on CPU_HAS_NO_UNALIGNED to make the two settings always match, and then use the generic version of the header. Signed-off-by: Arnd Bergmann Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven --- arch/m68k/Kconfig | 1 + arch/m68k/include/asm/unaligned.h | 19 ------------------- 2 files changed, 1 insertion(+), 19 deletions(-) delete mode 100644 arch/m68k/include/asm/unaligned.h diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 372e4e69c43a..46089f3b9603 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -21,6 +21,7 @@ config M68K select HAVE_AOUT if MMU select HAVE_ASM_MODVERSIONS select HAVE_DEBUG_BUGVERBOSE + select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED select HAVE_FUTEX_CMPXCHG if MMU && FUTEX select HAVE_IDE select HAVE_MOD_ARCH_SPECIFIC diff --git a/arch/m68k/include/asm/unaligned.h b/arch/m68k/include/asm/unaligned.h deleted file mode 100644 index 84e437337344..000000000000 --- a/arch/m68k/include/asm/unaligned.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_M68K_UNALIGNED_H -#define _ASM_M68K_UNALIGNED_H - -#ifdef CONFIG_CPU_HAS_NO_UNALIGNED -#include -#else -/* - * The m68k can do unaligned accesses itself. - */ -#include -#include - -#define get_unaligned __get_unaligned_be -#define put_unaligned __put_unaligned_be - -#endif - -#endif /* _ASM_M68K_UNALIGNED_H */ From f12d3ff3f41cc92f67cfaf29697685e8834fe4a4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 00:07:50 +0200 Subject: [PATCH 05/13] powerpc: use linux/unaligned/le_struct.h on LE power7 Little-endian POWER7 kernels disable CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS because that is not supported on the hardware, but the kernel still uses direct load/store for explicti get_unaligned()/put_unaligned(). I assume this is a mistake that leads to power7 having to trap and fix up all these unaligned accesses at a noticeable performance cost. The fix is completely trivial, just remove the file and use the generic version that gets it right. Signed-off-by: Arnd Bergmann --- arch/powerpc/include/asm/unaligned.h | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 arch/powerpc/include/asm/unaligned.h diff --git a/arch/powerpc/include/asm/unaligned.h b/arch/powerpc/include/asm/unaligned.h deleted file mode 100644 index ce69c5eff95e..000000000000 --- a/arch/powerpc/include/asm/unaligned.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_UNALIGNED_H -#define _ASM_POWERPC_UNALIGNED_H - -#ifdef __KERNEL__ - -/* - * The PowerPC can do unaligned accesses itself based on its endian mode. - */ -#include -#include - -#ifdef __LITTLE_ENDIAN__ -#define get_unaligned __get_unaligned_le -#define put_unaligned __put_unaligned_le -#else -#define get_unaligned __get_unaligned_be -#define put_unaligned __put_unaligned_be -#endif - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_UNALIGNED_H */ From 0652035a57945e14e611dafae2ec5b46a05bc1d1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 00:07:51 +0200 Subject: [PATCH 06/13] asm-generic: unaligned: remove byteshift helpers In theory, compilers should be able to work this out themselves so we can use a simpler version based on the swab() helpers. I have verified that this works on all supported compiler versions (gcc-4.9 and up, clang-10 and up). Looking at the object code produced by gcc-11, I found that the impact is mostly a change in inlining decisions that lead to slightly larger code. In other cases, this version produces explicit byte swaps in place of separate byte access, or comparing against pre-swapped constants. While the source code is clearly simpler, I have not seen an indication of the new version actually producing better code on Arm, so maybe we want to skip this after all. From what I can tell, gcc recognizes the byteswap pattern in the byteshift.h header and can turn it into explicit instructions, but it does not turn a __builtin_bswap32() back into individual bytes when that would result in better output, e.g. when storing a byte-reversed constant. Suggested-by: Linus Torvalds Signed-off-by: Arnd Bergmann --- arch/arm/include/asm/unaligned.h | 2 - include/asm-generic/unaligned.h | 2 - include/linux/unaligned/be_byteshift.h | 71 -------------------------- include/linux/unaligned/be_struct.h | 30 +++++++++++ include/linux/unaligned/le_byteshift.h | 71 -------------------------- include/linux/unaligned/le_struct.h | 30 +++++++++++ 6 files changed, 60 insertions(+), 146 deletions(-) delete mode 100644 include/linux/unaligned/be_byteshift.h delete mode 100644 include/linux/unaligned/le_byteshift.h diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h index ab905ffcf193..3c5248fb4cdc 100644 --- a/arch/arm/include/asm/unaligned.h +++ b/arch/arm/include/asm/unaligned.h @@ -10,13 +10,11 @@ #if defined(__LITTLE_ENDIAN) # include -# include # include # define get_unaligned __get_unaligned_le # define put_unaligned __put_unaligned_le #elif defined(__BIG_ENDIAN) # include -# include # include # define get_unaligned __get_unaligned_be # define put_unaligned __put_unaligned_be diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h index 374c940e9be1..d79df721ae60 100644 --- a/include/asm-generic/unaligned.h +++ b/include/asm-generic/unaligned.h @@ -16,7 +16,6 @@ #if defined(__LITTLE_ENDIAN) # ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS # include -# include # endif # include # define get_unaligned __get_unaligned_le @@ -24,7 +23,6 @@ #elif defined(__BIG_ENDIAN) # ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS # include -# include # endif # include # define get_unaligned __get_unaligned_be diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h deleted file mode 100644 index c43ff5918c8a..000000000000 --- a/include/linux/unaligned/be_byteshift.h +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H -#define _LINUX_UNALIGNED_BE_BYTESHIFT_H - -#include - -static inline u16 __get_unaligned_be16(const u8 *p) -{ - return p[0] << 8 | p[1]; -} - -static inline u32 __get_unaligned_be32(const u8 *p) -{ - return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]; -} - -static inline u64 __get_unaligned_be64(const u8 *p) -{ - return (u64)__get_unaligned_be32(p) << 32 | - __get_unaligned_be32(p + 4); -} - -static inline void __put_unaligned_be16(u16 val, u8 *p) -{ - *p++ = val >> 8; - *p++ = val; -} - -static inline void __put_unaligned_be32(u32 val, u8 *p) -{ - __put_unaligned_be16(val >> 16, p); - __put_unaligned_be16(val, p + 2); -} - -static inline void __put_unaligned_be64(u64 val, u8 *p) -{ - __put_unaligned_be32(val >> 32, p); - __put_unaligned_be32(val, p + 4); -} - -static inline u16 get_unaligned_be16(const void *p) -{ - return __get_unaligned_be16(p); -} - -static inline u32 get_unaligned_be32(const void *p) -{ - return __get_unaligned_be32(p); -} - -static inline u64 get_unaligned_be64(const void *p) -{ - return __get_unaligned_be64(p); -} - -static inline void put_unaligned_be16(u16 val, void *p) -{ - __put_unaligned_be16(val, p); -} - -static inline void put_unaligned_be32(u32 val, void *p) -{ - __put_unaligned_be32(val, p); -} - -static inline void put_unaligned_be64(u64 val, void *p) -{ - __put_unaligned_be64(val, p); -} - -#endif /* _LINUX_UNALIGNED_BE_BYTESHIFT_H */ diff --git a/include/linux/unaligned/be_struct.h b/include/linux/unaligned/be_struct.h index 15ea503a13fc..76d9fe297c33 100644 --- a/include/linux/unaligned/be_struct.h +++ b/include/linux/unaligned/be_struct.h @@ -34,4 +34,34 @@ static inline void put_unaligned_be64(u64 val, void *p) __put_unaligned_cpu64(val, p); } +static inline u16 get_unaligned_le16(const void *p) +{ + return swab16(__get_unaligned_cpu16((const u8 *)p)); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return swab32(__get_unaligned_cpu32((const u8 *)p)); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return swab64(__get_unaligned_cpu64((const u8 *)p)); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_cpu16(swab16(val), p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_cpu32(swab32(val), p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_cpu64(swab64(val), p); +} + #endif /* _LINUX_UNALIGNED_BE_STRUCT_H */ diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h deleted file mode 100644 index 2248dcb0df76..000000000000 --- a/include/linux/unaligned/le_byteshift.h +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H -#define _LINUX_UNALIGNED_LE_BYTESHIFT_H - -#include - -static inline u16 __get_unaligned_le16(const u8 *p) -{ - return p[0] | p[1] << 8; -} - -static inline u32 __get_unaligned_le32(const u8 *p) -{ - return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; -} - -static inline u64 __get_unaligned_le64(const u8 *p) -{ - return (u64)__get_unaligned_le32(p + 4) << 32 | - __get_unaligned_le32(p); -} - -static inline void __put_unaligned_le16(u16 val, u8 *p) -{ - *p++ = val; - *p++ = val >> 8; -} - -static inline void __put_unaligned_le32(u32 val, u8 *p) -{ - __put_unaligned_le16(val >> 16, p + 2); - __put_unaligned_le16(val, p); -} - -static inline void __put_unaligned_le64(u64 val, u8 *p) -{ - __put_unaligned_le32(val >> 32, p + 4); - __put_unaligned_le32(val, p); -} - -static inline u16 get_unaligned_le16(const void *p) -{ - return __get_unaligned_le16(p); -} - -static inline u32 get_unaligned_le32(const void *p) -{ - return __get_unaligned_le32(p); -} - -static inline u64 get_unaligned_le64(const void *p) -{ - return __get_unaligned_le64(p); -} - -static inline void put_unaligned_le16(u16 val, void *p) -{ - __put_unaligned_le16(val, p); -} - -static inline void put_unaligned_le32(u32 val, void *p) -{ - __put_unaligned_le32(val, p); -} - -static inline void put_unaligned_le64(u64 val, void *p) -{ - __put_unaligned_le64(val, p); -} - -#endif /* _LINUX_UNALIGNED_LE_BYTESHIFT_H */ diff --git a/include/linux/unaligned/le_struct.h b/include/linux/unaligned/le_struct.h index 9977987883a6..22f90a4afaa5 100644 --- a/include/linux/unaligned/le_struct.h +++ b/include/linux/unaligned/le_struct.h @@ -34,4 +34,34 @@ static inline void put_unaligned_le64(u64 val, void *p) __put_unaligned_cpu64(val, p); } +static inline u16 get_unaligned_be16(const void *p) +{ + return swab16(__get_unaligned_cpu16((const u8 *)p)); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return swab32(__get_unaligned_cpu32((const u8 *)p)); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return swab64(__get_unaligned_cpu64((const u8 *)p)); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_cpu16(swab16(val), p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_cpu32(swab32(val), p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_cpu64(swab64(val), p); +} + #endif /* _LINUX_UNALIGNED_LE_STRUCT_H */ From 778aaefb8e864fc61f850539ea479554dd4caea1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 00:07:52 +0200 Subject: [PATCH 07/13] asm-generic: unaligned always use struct helpers As found by Vineet Gupta and Linus Torvalds, gcc has somewhat unexpected behavior when faced with overlapping unaligned pointers. The kernel's unaligned/access-ok.h header technically invokes undefined behavior that happens to usually work on the architectures using it, but if the compiler optimizes code based on the assumption that undefined behavior doesn't happen, it can create output that actually causes data corruption. A related problem was previously found on 32-bit ARMv7, where most instructions can be used on unaligned data, but 64-bit ldrd/strd causes an exception. The workaround was to always use the unaligned/le_struct.h helper instead of unaligned/access-ok.h, in commit 1cce91dfc8f7 ("ARM: 8715/1: add a private asm/unaligned.h"). The same solution should work on all other architectures as well, so remove the access-ok.h variant and use the other one unconditionally on all architectures, picking either the big-endian or little-endian version. With this, the arm specific header can be removed as well, and the only file including linux/unaligned/access_ok.h gets moved to including the normal file. Fortunately, this made almost no difference to the object code produced by gcc-11. On x86, s390, powerpc, and arc, the resulting binary appears to be identical to the previous version, while on arm64 and m68k there are minimal differences that looks like an optimization pass went into a different direction, usually using fewer stack spills on the new version. Signed-off-by: Arnd Bergmann Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100363 --- arch/arm/include/asm/unaligned.h | 25 ----------- arch/mips/crypto/crc32-mips.c | 2 +- include/asm-generic/unaligned.h | 13 +----- include/linux/unaligned/access_ok.h | 68 ----------------------------- 4 files changed, 3 insertions(+), 105 deletions(-) delete mode 100644 arch/arm/include/asm/unaligned.h delete mode 100644 include/linux/unaligned/access_ok.h diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h deleted file mode 100644 index 3c5248fb4cdc..000000000000 --- a/arch/arm/include/asm/unaligned.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __ASM_ARM_UNALIGNED_H -#define __ASM_ARM_UNALIGNED_H - -/* - * We generally want to set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS on ARMv6+, - * but we don't want to use linux/unaligned/access_ok.h since that can lead - * to traps on unaligned stm/ldm or strd/ldrd. - */ -#include - -#if defined(__LITTLE_ENDIAN) -# include -# include -# define get_unaligned __get_unaligned_le -# define put_unaligned __put_unaligned_le -#elif defined(__BIG_ENDIAN) -# include -# include -# define get_unaligned __get_unaligned_be -# define put_unaligned __put_unaligned_be -#else -# error need to define endianess -#endif - -#endif /* __ASM_ARM_UNALIGNED_H */ diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c index faa88a6a74c0..0a03529cf317 100644 --- a/arch/mips/crypto/crc32-mips.c +++ b/arch/mips/crypto/crc32-mips.c @@ -8,13 +8,13 @@ * Copyright (C) 2018 MIPS Tech, LLC */ -#include #include #include #include #include #include #include +#include #include diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h index d79df721ae60..36bf03aaa674 100644 --- a/include/asm-generic/unaligned.h +++ b/include/asm-generic/unaligned.h @@ -8,22 +8,13 @@ */ #include -/* Set by the arch if it can handle unaligned accesses in hardware. */ -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -# include -#endif - #if defined(__LITTLE_ENDIAN) -# ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -# include -# endif +# include # include # define get_unaligned __get_unaligned_le # define put_unaligned __put_unaligned_le #elif defined(__BIG_ENDIAN) -# ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -# include -# endif +# include # include # define get_unaligned __get_unaligned_be # define put_unaligned __put_unaligned_be diff --git a/include/linux/unaligned/access_ok.h b/include/linux/unaligned/access_ok.h deleted file mode 100644 index 167aa849c0ce..000000000000 --- a/include/linux/unaligned/access_ok.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_UNALIGNED_ACCESS_OK_H -#define _LINUX_UNALIGNED_ACCESS_OK_H - -#include -#include - -static __always_inline u16 get_unaligned_le16(const void *p) -{ - return le16_to_cpup((__le16 *)p); -} - -static __always_inline u32 get_unaligned_le32(const void *p) -{ - return le32_to_cpup((__le32 *)p); -} - -static __always_inline u64 get_unaligned_le64(const void *p) -{ - return le64_to_cpup((__le64 *)p); -} - -static __always_inline u16 get_unaligned_be16(const void *p) -{ - return be16_to_cpup((__be16 *)p); -} - -static __always_inline u32 get_unaligned_be32(const void *p) -{ - return be32_to_cpup((__be32 *)p); -} - -static __always_inline u64 get_unaligned_be64(const void *p) -{ - return be64_to_cpup((__be64 *)p); -} - -static __always_inline void put_unaligned_le16(u16 val, void *p) -{ - *((__le16 *)p) = cpu_to_le16(val); -} - -static __always_inline void put_unaligned_le32(u32 val, void *p) -{ - *((__le32 *)p) = cpu_to_le32(val); -} - -static __always_inline void put_unaligned_le64(u64 val, void *p) -{ - *((__le64 *)p) = cpu_to_le64(val); -} - -static __always_inline void put_unaligned_be16(u16 val, void *p) -{ - *((__be16 *)p) = cpu_to_be16(val); -} - -static __always_inline void put_unaligned_be32(u32 val, void *p) -{ - *((__be32 *)p) = cpu_to_be32(val); -} - -static __always_inline void put_unaligned_be64(u64 val, void *p) -{ - *((__be64 *)p) = cpu_to_be64(val); -} - -#endif /* _LINUX_UNALIGNED_ACCESS_OK_H */ From 1b1774998b2dec837a57d729d1a22e5eb2d6d206 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 00:07:53 +0200 Subject: [PATCH 08/13] partitions: msdos: fix one-byte get_unaligned() A simplification of get_unaligned() clashes with callers that pass in a character pointer, causing a harmless warning like: block/partitions/msdos.c: In function 'msdos_partition': include/asm-generic/unaligned.h:13:22: warning: 'packed' attribute ignored for field of type 'u8' {aka 'unsigned char'} [-Wattributes] Remove the SYS_IND() macro with the get_unaligned() call and just use the ->ind field directly. Signed-off-by: Arnd Bergmann --- block/partitions/ldm.c | 2 +- block/partitions/ldm.h | 3 --- block/partitions/msdos.c | 24 +++++++++++------------- 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index d333786b5c7e..cc86534c80ad 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -510,7 +510,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state) p = (struct msdos_partition *)(data + 0x01BE); for (i = 0; i < 4; i++, p++) - if (SYS_IND (p) == LDM_PARTITION) { + if (p->sys_ind == LDM_PARTITION) { result = true; break; } diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h index d8d6beaa72c4..8693704dcf5e 100644 --- a/block/partitions/ldm.h +++ b/block/partitions/ldm.h @@ -84,9 +84,6 @@ struct parsed_partitions; #define TOC_BITMAP1 "config" /* Names of the two defined */ #define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */ -/* Borrowed from msdos.c */ -#define SYS_IND(p) (get_unaligned(&(p)->sys_ind)) - struct frag { /* VBLK Fragment handling */ struct list_head list; u32 group; diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c index 8f2fcc080264..c94de377c502 100644 --- a/block/partitions/msdos.c +++ b/block/partitions/msdos.c @@ -38,8 +38,6 @@ */ #include -#define SYS_IND(p) get_unaligned(&p->sys_ind) - static inline sector_t nr_sects(struct msdos_partition *p) { return (sector_t)get_unaligned_le32(&p->nr_sects); @@ -52,9 +50,9 @@ static inline sector_t start_sect(struct msdos_partition *p) static inline int is_extended_partition(struct msdos_partition *p) { - return (SYS_IND(p) == DOS_EXTENDED_PARTITION || - SYS_IND(p) == WIN98_EXTENDED_PARTITION || - SYS_IND(p) == LINUX_EXTENDED_PARTITION); + return (p->sys_ind == DOS_EXTENDED_PARTITION || + p->sys_ind == WIN98_EXTENDED_PARTITION || + p->sys_ind == LINUX_EXTENDED_PARTITION); } #define MSDOS_LABEL_MAGIC1 0x55 @@ -193,7 +191,7 @@ static void parse_extended(struct parsed_partitions *state, put_partition(state, state->next, next, size); set_info(state, state->next, disksig); - if (SYS_IND(p) == LINUX_RAID_PARTITION) + if (p->sys_ind == LINUX_RAID_PARTITION) state->parts[state->next].flags = ADDPART_FLAG_RAID; loopct = 0; if (++state->next == state->limit) @@ -546,7 +544,7 @@ static void parse_minix(struct parsed_partitions *state, * a secondary MBR describing its subpartitions, or * the normal boot sector. */ if (msdos_magic_present(data + 510) && - SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ + p->sys_ind == MINIX_PARTITION) { /* subpartition table present */ char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1]; snprintf(tmp, sizeof(tmp), " %s%d: name, origin); @@ -555,7 +553,7 @@ static void parse_minix(struct parsed_partitions *state, if (state->next == state->limit) break; /* add each partition in use */ - if (SYS_IND(p) == MINIX_PARTITION) + if (p->sys_ind == MINIX_PARTITION) put_partition(state, state->next++, start_sect(p), nr_sects(p)); } @@ -643,7 +641,7 @@ int msdos_partition(struct parsed_partitions *state) p = (struct msdos_partition *) (data + 0x1be); for (slot = 1 ; slot <= 4 ; slot++, p++) { /* If this is an EFI GPT disk, msdos should ignore it. */ - if (SYS_IND(p) == EFI_PMBR_OSTYPE_EFI_GPT) { + if (p->sys_ind == EFI_PMBR_OSTYPE_EFI_GPT) { put_dev_sector(sect); return 0; } @@ -685,11 +683,11 @@ int msdos_partition(struct parsed_partitions *state) } put_partition(state, slot, start, size); set_info(state, slot, disksig); - if (SYS_IND(p) == LINUX_RAID_PARTITION) + if (p->sys_ind == LINUX_RAID_PARTITION) state->parts[slot].flags = ADDPART_FLAG_RAID; - if (SYS_IND(p) == DM6_PARTITION) + if (p->sys_ind == DM6_PARTITION) strlcat(state->pp_buf, "[DM]", PAGE_SIZE); - if (SYS_IND(p) == EZD_PARTITION) + if (p->sys_ind == EZD_PARTITION) strlcat(state->pp_buf, "[EZD]", PAGE_SIZE); } @@ -698,7 +696,7 @@ int msdos_partition(struct parsed_partitions *state) /* second pass - output for each on a separate line */ p = (struct msdos_partition *) (0x1be + data); for (slot = 1 ; slot <= 4 ; slot++, p++) { - unsigned char id = SYS_IND(p); + unsigned char id = p->sys_ind; int n; if (!nr_sects(p)) From dd979d7a08adb473744f2e9d4bbaddb039dc39dc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 00:07:54 +0200 Subject: [PATCH 09/13] apparmor: use get_unaligned() only for multi-byte words Using get_unaligned() on a u8 pointer is pointless, and will result in a compiler warning after a planned cleanup: In file included from arch/x86/include/generated/asm/unaligned.h:1, from security/apparmor/policy_unpack.c:16: security/apparmor/policy_unpack.c: In function 'unpack_u8': include/asm-generic/unaligned.h:13:15: error: 'packed' attribute ignored for field of type 'u8' {aka 'unsigned char'} [-Werror=attributes] 13 | const struct { type x __packed; } *__pptr = (typeof(__pptr))(ptr); \ | ^ Simply dereference this pointer directly. Signed-off-by: Arnd Bergmann Acked-by: John Johansen --- security/apparmor/policy_unpack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index dc345ac93205..4e1f96b216a8 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -304,7 +304,7 @@ static bool unpack_u8(struct aa_ext *e, u8 *data, const char *name) if (!inbounds(e, sizeof(u8))) goto fail; if (data) - *data = get_unaligned((u8 *)e->pos); + *data = *((u8 *)e->pos); e->pos += sizeof(u8); return true; } From 8f4e3d48bb50765ab27ae5bebed2595b20de80a1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 00:07:55 +0200 Subject: [PATCH 10/13] mwifiex: re-fix for unaligned accesses A patch from 2017 changed some accesses to DMA memory to use get_unaligned_le32() and similar interfaces, to avoid problems with doing unaligned accesson uncached memory. However, the change in the mwifiex_pcie_alloc_sleep_cookie_buf() function ended up changing the size of the access instead, as it operates on a pointer to u8. Change this function back to actually access the entire 32 bits. Note that the pointer is aligned by definition because it came from dma_alloc_coherent(). Fixes: 92c70a958b0b ("mwifiex: fix for unaligned reads") Acked-by: Kalle Valo Signed-off-by: Arnd Bergmann --- drivers/net/wireless/marvell/mwifiex/pcie.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 94228b316df1..46517515ba72 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -1231,7 +1231,7 @@ static int mwifiex_pcie_delete_cmdrsp_buf(struct mwifiex_adapter *adapter) static int mwifiex_pcie_alloc_sleep_cookie_buf(struct mwifiex_adapter *adapter) { struct pcie_service_card *card = adapter->card; - u32 tmp; + u32 *cookie; card->sleep_cookie_vbase = dma_alloc_coherent(&card->dev->dev, sizeof(u32), @@ -1242,13 +1242,11 @@ static int mwifiex_pcie_alloc_sleep_cookie_buf(struct mwifiex_adapter *adapter) "dma_alloc_coherent failed!\n"); return -ENOMEM; } + cookie = (u32 *)card->sleep_cookie_vbase; /* Init val of Sleep Cookie */ - tmp = FW_AWAKE_COOKIE; - put_unaligned(tmp, card->sleep_cookie_vbase); + *cookie = FW_AWAKE_COOKIE; - mwifiex_dbg(adapter, INFO, - "alloc_scook: sleep cookie=0x%x\n", - get_unaligned(card->sleep_cookie_vbase)); + mwifiex_dbg(adapter, INFO, "alloc_scook: sleep cookie=0x%x\n", *cookie); return 0; } From e3e22076710632250cfaee853499f3de6e9be35d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 00:07:56 +0200 Subject: [PATCH 11/13] netpoll: avoid put_unaligned() on single character With a planned cleanup, using put_unaligned() on a single character results in a harmless warning: In file included from ./arch/x86/include/generated/asm/unaligned.h:1, from include/linux/etherdevice.h:24, from net/core/netpoll.c:18: net/core/netpoll.c: In function 'netpoll_send_udp': include/asm-generic/unaligned.h:23:9: error: 'packed' attribute ignored for field of type 'unsigned char' [-Werror=attributes] net/core/netpoll.c:431:3: note: in expansion of macro 'put_unaligned' 431 | put_unaligned(0x60, (unsigned char *)ip6h); | ^~~~~~~~~~~~~ include/asm-generic/unaligned.h:23:9: error: 'packed' attribute ignored for field of type 'unsigned char' [-Werror=attributes] net/core/netpoll.c:459:3: note: in expansion of macro 'put_unaligned' 459 | put_unaligned(0x45, (unsigned char *)iph); | ^~~~~~~~~~~~~ Replace this with an open-coded pointer dereference. Signed-off-by: Arnd Bergmann --- net/core/netpoll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c310c7c1cef7..9c49a38fa315 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -428,7 +428,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) ip6h = ipv6_hdr(skb); /* ip6h->version = 6; ip6h->priority = 0; */ - put_unaligned(0x60, (unsigned char *)ip6h); + *(unsigned char *)ip6h = 0x60; ip6h->flow_lbl[0] = 0; ip6h->flow_lbl[1] = 0; ip6h->flow_lbl[2] = 0; @@ -456,7 +456,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) iph = ip_hdr(skb); /* iph->version = 4; iph->ihl = 5; */ - put_unaligned(0x45, (unsigned char *)iph); + *(unsigned char *)iph = 0x45; iph->tos = 0; put_unaligned(htons(ip_len), &(iph->tot_len)); iph->id = htons(atomic_inc_return(&ip_ident)); From d40d8179482c330df5b9049797fe94c2e8eb4f6e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 14:58:46 +0200 Subject: [PATCH 12/13] asm-generic: uaccess: 1-byte access is always aligned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the cleaned up version of asm-generic/unaligned.h, there is a warning about the get_user/put_user helpers using unaligned access for single-byte variables: include/asm-generic/uaccess.h: In function ‘__get_user_fn’: include/asm-generic/unaligned.h:13:15: warning: ‘packed’ attribute ignored for field of type ‘u8’ {aka ‘unsigned char’} [-Wattributes] const struct { type x __packed; } *__pptr = (typeof(__pptr))(ptr); \ Change these to use a direct pointer dereference to avoid the warnings. Signed-off-by: Arnd Bergmann --- include/asm-generic/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 4973328f3c6e..a0b2f270dddc 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -19,7 +19,7 @@ __get_user_fn(size_t size, const void __user *from, void *to) switch (size) { case 1: - *(u8 *)to = get_unaligned((u8 __force *)from); + *(u8 *)to = *((u8 __force *)from); return 0; case 2: *(u16 *)to = get_unaligned((u16 __force *)from); @@ -45,7 +45,7 @@ __put_user_fn(size_t size, void __user *to, void *from) switch (size) { case 1: - put_unaligned(*(u8 *)from, (u8 __force *)to); + *(u8 __force *)to = *(u8 *)from; return 0; case 2: put_unaligned(*(u16 *)from, (u16 __force *)to); From 803f4e1eab7a8938ba3a3c30dd4eb5e9eeef5e63 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 8 May 2021 00:07:57 +0200 Subject: [PATCH 13/13] asm-generic: simplify asm/unaligned.h The get_unaligned()/put_unaligned() implementations are much more complex than necessary, now that all architectures use the same code. Move everything into one file and use a much more compact way to express the same logic. I've compared the binary output using gcc-11 across defconfig builds for all architectures and found this patch to make no difference, except for a single function on powerpc that needs two additional register moves because of random differences in register allocation. There are a handful of callers of the low-level __get_unaligned_cpu32, so leave that in place for the time being even though the common code no longer uses it. This adds a warning for any caller of get_unaligned()/put_unaligned() that passes in a single-byte pointer, but I've sent patches for all instances that show up in x86 and randconfig builds. It would be nice to change the arguments of the endian-specific accessors to take the matching __be16/__be32/__be64/__le16/__le32/__le64 arguments instead of a void pointer, but that requires more changes to the rest of the kernel. This new version does allow aggregate types into get_unaligned(), which was not the original goal but might come in handy. Signed-off-by: Arnd Bergmann --- include/asm-generic/unaligned.h | 130 +++++++++++++++++++++++++--- include/linux/unaligned/be_struct.h | 67 -------------- include/linux/unaligned/generic.h | 115 ------------------------ include/linux/unaligned/le_struct.h | 67 -------------- 4 files changed, 117 insertions(+), 262 deletions(-) delete mode 100644 include/linux/unaligned/be_struct.h delete mode 100644 include/linux/unaligned/generic.h delete mode 100644 include/linux/unaligned/le_struct.h diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h index 36bf03aaa674..1c4242416c9f 100644 --- a/include/asm-generic/unaligned.h +++ b/include/asm-generic/unaligned.h @@ -6,20 +6,124 @@ * This is the most generic implementation of unaligned accesses * and should work almost anywhere. */ +#include #include -#if defined(__LITTLE_ENDIAN) -# include -# include -# define get_unaligned __get_unaligned_le -# define put_unaligned __put_unaligned_le -#elif defined(__BIG_ENDIAN) -# include -# include -# define get_unaligned __get_unaligned_be -# define put_unaligned __put_unaligned_be -#else -# error need to define endianess -#endif +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x = (val); \ +} while (0) + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) +#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr)) + +static inline u16 get_unaligned_le16(const void *p) +{ + return le16_to_cpu(__get_unaligned_t(__le16, p)); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return le32_to_cpu(__get_unaligned_t(__le32, p)); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return le64_to_cpu(__get_unaligned_t(__le64, p)); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_t(__le16, cpu_to_le16(val), p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_t(__le32, cpu_to_le32(val), p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_t(__le64, cpu_to_le64(val), p); +} + +static inline u16 get_unaligned_be16(const void *p) +{ + return be16_to_cpu(__get_unaligned_t(__be16, p)); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return be32_to_cpu(__get_unaligned_t(__be32, p)); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return be64_to_cpu(__get_unaligned_t(__be64, p)); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_t(__be16, cpu_to_be16(val), p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_t(__be32, cpu_to_be32(val), p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_t(__be64, cpu_to_be64(val), p); +} + +static inline u32 __get_unaligned_be24(const u8 *p) +{ + return p[0] << 16 | p[1] << 8 | p[2]; +} + +static inline u32 get_unaligned_be24(const void *p) +{ + return __get_unaligned_be24(p); +} + +static inline u32 __get_unaligned_le24(const u8 *p) +{ + return p[0] | p[1] << 8 | p[2] << 16; +} + +static inline u32 get_unaligned_le24(const void *p) +{ + return __get_unaligned_le24(p); +} + +static inline void __put_unaligned_be24(const u32 val, u8 *p) +{ + *p++ = val >> 16; + *p++ = val >> 8; + *p++ = val; +} + +static inline void put_unaligned_be24(const u32 val, void *p) +{ + __put_unaligned_be24(val, p); +} + +static inline void __put_unaligned_le24(const u32 val, u8 *p) +{ + *p++ = val; + *p++ = val >> 8; + *p++ = val >> 16; +} + +static inline void put_unaligned_le24(const u32 val, void *p) +{ + __put_unaligned_le24(val, p); +} #endif /* __ASM_GENERIC_UNALIGNED_H */ diff --git a/include/linux/unaligned/be_struct.h b/include/linux/unaligned/be_struct.h deleted file mode 100644 index 76d9fe297c33..000000000000 --- a/include/linux/unaligned/be_struct.h +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_UNALIGNED_BE_STRUCT_H -#define _LINUX_UNALIGNED_BE_STRUCT_H - -#include - -static inline u16 get_unaligned_be16(const void *p) -{ - return __get_unaligned_cpu16((const u8 *)p); -} - -static inline u32 get_unaligned_be32(const void *p) -{ - return __get_unaligned_cpu32((const u8 *)p); -} - -static inline u64 get_unaligned_be64(const void *p) -{ - return __get_unaligned_cpu64((const u8 *)p); -} - -static inline void put_unaligned_be16(u16 val, void *p) -{ - __put_unaligned_cpu16(val, p); -} - -static inline void put_unaligned_be32(u32 val, void *p) -{ - __put_unaligned_cpu32(val, p); -} - -static inline void put_unaligned_be64(u64 val, void *p) -{ - __put_unaligned_cpu64(val, p); -} - -static inline u16 get_unaligned_le16(const void *p) -{ - return swab16(__get_unaligned_cpu16((const u8 *)p)); -} - -static inline u32 get_unaligned_le32(const void *p) -{ - return swab32(__get_unaligned_cpu32((const u8 *)p)); -} - -static inline u64 get_unaligned_le64(const void *p) -{ - return swab64(__get_unaligned_cpu64((const u8 *)p)); -} - -static inline void put_unaligned_le16(u16 val, void *p) -{ - __put_unaligned_cpu16(swab16(val), p); -} - -static inline void put_unaligned_le32(u32 val, void *p) -{ - __put_unaligned_cpu32(swab32(val), p); -} - -static inline void put_unaligned_le64(u64 val, void *p) -{ - __put_unaligned_cpu64(swab64(val), p); -} - -#endif /* _LINUX_UNALIGNED_BE_STRUCT_H */ diff --git a/include/linux/unaligned/generic.h b/include/linux/unaligned/generic.h deleted file mode 100644 index 303289492859..000000000000 --- a/include/linux/unaligned/generic.h +++ /dev/null @@ -1,115 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_UNALIGNED_GENERIC_H -#define _LINUX_UNALIGNED_GENERIC_H - -#include - -/* - * Cause a link-time error if we try an unaligned access other than - * 1,2,4 or 8 bytes long - */ -extern void __bad_unaligned_access_size(void); - -#define __get_unaligned_le(ptr) ((__force typeof(*(ptr)))({ \ - __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \ - __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)), \ - __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)), \ - __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)), \ - __bad_unaligned_access_size())))); \ - })) - -#define __get_unaligned_be(ptr) ((__force typeof(*(ptr)))({ \ - __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \ - __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_be16((ptr)), \ - __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_be32((ptr)), \ - __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_be64((ptr)), \ - __bad_unaligned_access_size())))); \ - })) - -#define __put_unaligned_le(val, ptr) ({ \ - void *__gu_p = (ptr); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - *(u8 *)__gu_p = (__force u8)(val); \ - break; \ - case 2: \ - put_unaligned_le16((__force u16)(val), __gu_p); \ - break; \ - case 4: \ - put_unaligned_le32((__force u32)(val), __gu_p); \ - break; \ - case 8: \ - put_unaligned_le64((__force u64)(val), __gu_p); \ - break; \ - default: \ - __bad_unaligned_access_size(); \ - break; \ - } \ - (void)0; }) - -#define __put_unaligned_be(val, ptr) ({ \ - void *__gu_p = (ptr); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - *(u8 *)__gu_p = (__force u8)(val); \ - break; \ - case 2: \ - put_unaligned_be16((__force u16)(val), __gu_p); \ - break; \ - case 4: \ - put_unaligned_be32((__force u32)(val), __gu_p); \ - break; \ - case 8: \ - put_unaligned_be64((__force u64)(val), __gu_p); \ - break; \ - default: \ - __bad_unaligned_access_size(); \ - break; \ - } \ - (void)0; }) - -static inline u32 __get_unaligned_be24(const u8 *p) -{ - return p[0] << 16 | p[1] << 8 | p[2]; -} - -static inline u32 get_unaligned_be24(const void *p) -{ - return __get_unaligned_be24(p); -} - -static inline u32 __get_unaligned_le24(const u8 *p) -{ - return p[0] | p[1] << 8 | p[2] << 16; -} - -static inline u32 get_unaligned_le24(const void *p) -{ - return __get_unaligned_le24(p); -} - -static inline void __put_unaligned_be24(const u32 val, u8 *p) -{ - *p++ = val >> 16; - *p++ = val >> 8; - *p++ = val; -} - -static inline void put_unaligned_be24(const u32 val, void *p) -{ - __put_unaligned_be24(val, p); -} - -static inline void __put_unaligned_le24(const u32 val, u8 *p) -{ - *p++ = val; - *p++ = val >> 8; - *p++ = val >> 16; -} - -static inline void put_unaligned_le24(const u32 val, void *p) -{ - __put_unaligned_le24(val, p); -} - -#endif /* _LINUX_UNALIGNED_GENERIC_H */ diff --git a/include/linux/unaligned/le_struct.h b/include/linux/unaligned/le_struct.h deleted file mode 100644 index 22f90a4afaa5..000000000000 --- a/include/linux/unaligned/le_struct.h +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_UNALIGNED_LE_STRUCT_H -#define _LINUX_UNALIGNED_LE_STRUCT_H - -#include - -static inline u16 get_unaligned_le16(const void *p) -{ - return __get_unaligned_cpu16((const u8 *)p); -} - -static inline u32 get_unaligned_le32(const void *p) -{ - return __get_unaligned_cpu32((const u8 *)p); -} - -static inline u64 get_unaligned_le64(const void *p) -{ - return __get_unaligned_cpu64((const u8 *)p); -} - -static inline void put_unaligned_le16(u16 val, void *p) -{ - __put_unaligned_cpu16(val, p); -} - -static inline void put_unaligned_le32(u32 val, void *p) -{ - __put_unaligned_cpu32(val, p); -} - -static inline void put_unaligned_le64(u64 val, void *p) -{ - __put_unaligned_cpu64(val, p); -} - -static inline u16 get_unaligned_be16(const void *p) -{ - return swab16(__get_unaligned_cpu16((const u8 *)p)); -} - -static inline u32 get_unaligned_be32(const void *p) -{ - return swab32(__get_unaligned_cpu32((const u8 *)p)); -} - -static inline u64 get_unaligned_be64(const void *p) -{ - return swab64(__get_unaligned_cpu64((const u8 *)p)); -} - -static inline void put_unaligned_be16(u16 val, void *p) -{ - __put_unaligned_cpu16(swab16(val), p); -} - -static inline void put_unaligned_be32(u32 val, void *p) -{ - __put_unaligned_cpu32(swab32(val), p); -} - -static inline void put_unaligned_be64(u64 val, void *p) -{ - __put_unaligned_cpu64(swab64(val), p); -} - -#endif /* _LINUX_UNALIGNED_LE_STRUCT_H */