Merge branch 'runtime-constants'

Merge runtime constants infrastructure with implementations for x86 and
arm64.

This is one of four branches that came out of me looking at profiles of
my kernel build filesystem load on my 128-core Altra arm64 system, where
pathname walking and the user copies (particularly strncpy_from_user()
for fetching the pathname from user space) is very hot.

This is a very specialized "instruction alternatives" model where the
dentry hash pointer and hash count will be constants for the lifetime of
the kernel, but the allocation are not static but done early during the
kernel boot.  In order to avoid the pointer load and dynamic shift, we
just rewrite the constants in the instructions in place.

We can't use the "generic" alternative instructions infrastructure,
because different architectures do it very differently, and it's
actually simpler to just have very specific helpers, with a fallback to
the generic ("old") model of just using variables for architectures that
do not implement the runtime constant patching infrastructure.

Link: https://lore.kernel.org/all/CAHk-=widPe38fUNjUOmX11ByDckaeEo9tN4Eiyke9u1SAtu9sA@mail.gmail.com/

* runtime-constants:
  arm64: add 'runtime constant' support
  runtime constants: add x86 architecture support
  runtime constants: add default dummy infrastructure
  vfs: dcache: move hashlen_hash() from callers into d_hash()
This commit is contained in:
Linus Torvalds 2024-07-15 08:36:13 -07:00
commit a5819099f6
8 changed files with 192 additions and 4 deletions

View File

@ -0,0 +1,88 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_RUNTIME_CONST_H
#define _ASM_RUNTIME_CONST_H
#include <asm/cacheflush.h>
/* Sigh. You can still run arm64 in BE mode */
#include <asm/byteorder.h>
#define runtime_const_ptr(sym) ({ \
typeof(sym) __ret; \
asm_inline("1:\t" \
"movz %0, #0xcdef\n\t" \
"movk %0, #0x89ab, lsl #16\n\t" \
"movk %0, #0x4567, lsl #32\n\t" \
"movk %0, #0x0123, lsl #48\n\t" \
".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
".long 1b - .\n\t" \
".popsection" \
:"=r" (__ret)); \
__ret; })
#define runtime_const_shift_right_32(val, sym) ({ \
unsigned long __ret; \
asm_inline("1:\t" \
"lsr %w0,%w1,#12\n\t" \
".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
".long 1b - .\n\t" \
".popsection" \
:"=r" (__ret) \
:"r" (0u+(val))); \
__ret; })
#define runtime_const_init(type, sym) do { \
extern s32 __start_runtime_##type##_##sym[]; \
extern s32 __stop_runtime_##type##_##sym[]; \
runtime_const_fixup(__runtime_fixup_##type, \
(unsigned long)(sym), \
__start_runtime_##type##_##sym, \
__stop_runtime_##type##_##sym); \
} while (0)
/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */
static inline void __runtime_fixup_16(__le32 *p, unsigned int val)
{
u32 insn = le32_to_cpu(*p);
insn &= 0xffe0001f;
insn |= (val & 0xffff) << 5;
*p = cpu_to_le32(insn);
}
static inline void __runtime_fixup_caches(void *where, unsigned int insns)
{
unsigned long va = (unsigned long)where;
caches_clean_inval_pou(va, va + 4*insns);
}
static inline void __runtime_fixup_ptr(void *where, unsigned long val)
{
__le32 *p = lm_alias(where);
__runtime_fixup_16(p, val);
__runtime_fixup_16(p+1, val >> 16);
__runtime_fixup_16(p+2, val >> 32);
__runtime_fixup_16(p+3, val >> 48);
__runtime_fixup_caches(where, 4);
}
/* Immediate value is 6 bits starting at bit #16 */
static inline void __runtime_fixup_shift(void *where, unsigned long val)
{
__le32 *p = lm_alias(where);
u32 insn = le32_to_cpu(*p);
insn &= 0xffc0ffff;
insn |= (val & 63) << 16;
*p = cpu_to_le32(insn);
__runtime_fixup_caches(where, 1);
}
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
unsigned long val, s32 *start, s32 *end)
{
while (start < end) {
fn(*start + (void *)start, val);
start++;
}
}
#endif

View File

@ -264,6 +264,9 @@ SECTIONS
EXIT_DATA
}
RUNTIME_CONST(shift, d_hash_shift)
RUNTIME_CONST(ptr, dentry_hashtable)
PERCPU_SECTION(L1_CACHE_BYTES)
HYPERVISOR_PERCPU_SECTION

View File

@ -0,0 +1,61 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_RUNTIME_CONST_H
#define _ASM_RUNTIME_CONST_H
#define runtime_const_ptr(sym) ({ \
typeof(sym) __ret; \
asm_inline("mov %1,%0\n1:\n" \
".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
".long 1b - %c2 - .\n\t" \
".popsection" \
:"=r" (__ret) \
:"i" ((unsigned long)0x0123456789abcdefull), \
"i" (sizeof(long))); \
__ret; })
// The 'typeof' will create at _least_ a 32-bit type, but
// will happily also take a bigger type and the 'shrl' will
// clear the upper bits
#define runtime_const_shift_right_32(val, sym) ({ \
typeof(0u+(val)) __ret = (val); \
asm_inline("shrl $12,%k0\n1:\n" \
".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
".long 1b - 1 - .\n\t" \
".popsection" \
:"+r" (__ret)); \
__ret; })
#define runtime_const_init(type, sym) do { \
extern s32 __start_runtime_##type##_##sym[]; \
extern s32 __stop_runtime_##type##_##sym[]; \
runtime_const_fixup(__runtime_fixup_##type, \
(unsigned long)(sym), \
__start_runtime_##type##_##sym, \
__stop_runtime_##type##_##sym); \
} while (0)
/*
* The text patching is trivial - you can only do this at init time,
* when the text section hasn't been marked RO, and before the text
* has ever been executed.
*/
static inline void __runtime_fixup_ptr(void *where, unsigned long val)
{
*(unsigned long *)where = val;
}
static inline void __runtime_fixup_shift(void *where, unsigned long val)
{
*(unsigned char *)where = val;
}
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
unsigned long val, s32 *start, s32 *end)
{
while (start < end) {
fn(*start + (void *)start, val);
start++;
}
}
#endif

View File

@ -357,6 +357,9 @@ SECTIONS
PERCPU_SECTION(INTERNODE_CACHE_BYTES)
#endif
RUNTIME_CONST(shift, d_hash_shift)
RUNTIME_CONST(ptr, dentry_hashtable)
. = ALIGN(PAGE_SIZE);
/* freed after init ends here */

View File

@ -35,6 +35,8 @@
#include "internal.h"
#include "mount.h"
#include <asm/runtime-const.h>
/*
* Usage:
* dcache->d_inode->i_lock protects:
@ -100,9 +102,10 @@ static unsigned int d_hash_shift __ro_after_init;
static struct hlist_bl_head *dentry_hashtable __ro_after_init;
static inline struct hlist_bl_head *d_hash(unsigned int hash)
static inline struct hlist_bl_head *d_hash(unsigned long hashlen)
{
return dentry_hashtable + (hash >> d_hash_shift);
return runtime_const_ptr(dentry_hashtable) +
runtime_const_shift_right_32(hashlen, d_hash_shift);
}
#define IN_LOOKUP_SHIFT 10
@ -2110,7 +2113,7 @@ static noinline struct dentry *__d_lookup_rcu_op_compare(
unsigned *seqp)
{
u64 hashlen = name->hash_len;
struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
struct hlist_bl_head *b = d_hash(hashlen);
struct hlist_bl_node *node;
struct dentry *dentry;
@ -2177,7 +2180,7 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
{
u64 hashlen = name->hash_len;
const unsigned char *str = name->name;
struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
struct hlist_bl_head *b = d_hash(hashlen);
struct hlist_bl_node *node;
struct dentry *dentry;
@ -3132,6 +3135,9 @@ static void __init dcache_init_early(void)
0,
0);
d_hash_shift = 32 - d_hash_shift;
runtime_const_init(shift, d_hash_shift);
runtime_const_init(ptr, dentry_hashtable);
}
static void __init dcache_init(void)
@ -3160,6 +3166,9 @@ static void __init dcache_init(void)
0,
0);
d_hash_shift = 32 - d_hash_shift;
runtime_const_init(shift, d_hash_shift);
runtime_const_init(ptr, dentry_hashtable);
}
/* SLAB cache for __getname() consumers */

View File

@ -46,6 +46,7 @@ mandatory-y += pci.h
mandatory-y += percpu.h
mandatory-y += pgalloc.h
mandatory-y += preempt.h
mandatory-y += runtime-const.h
mandatory-y += rwonce.h
mandatory-y += sections.h
mandatory-y += serial.h

View File

@ -0,0 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_RUNTIME_CONST_H
#define _ASM_RUNTIME_CONST_H
/*
* This is the fallback for when the architecture doesn't
* support the runtime const operations.
*
* We just use the actual symbols as-is.
*/
#define runtime_const_ptr(sym) (sym)
#define runtime_const_shift_right_32(val, sym) ((u32)(val)>>(sym))
#define runtime_const_init(type,sym) do { } while (0)
#endif

View File

@ -944,6 +944,14 @@
#define CON_INITCALL \
BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
#define RUNTIME_NAME(t,x) runtime_##t##_##x
#define RUNTIME_CONST(t,x) \
. = ALIGN(8); \
RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) { \
*(RUNTIME_NAME(t,x)); \
}
/* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
#define KUNIT_TABLE() \
. = ALIGN(8); \