mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
Merge branch 'runtime-constants'
Merge runtime constants infrastructure with implementations for x86 and arm64. This is one of four branches that came out of me looking at profiles of my kernel build filesystem load on my 128-core Altra arm64 system, where pathname walking and the user copies (particularly strncpy_from_user() for fetching the pathname from user space) is very hot. This is a very specialized "instruction alternatives" model where the dentry hash pointer and hash count will be constants for the lifetime of the kernel, but the allocation are not static but done early during the kernel boot. In order to avoid the pointer load and dynamic shift, we just rewrite the constants in the instructions in place. We can't use the "generic" alternative instructions infrastructure, because different architectures do it very differently, and it's actually simpler to just have very specific helpers, with a fallback to the generic ("old") model of just using variables for architectures that do not implement the runtime constant patching infrastructure. Link: https://lore.kernel.org/all/CAHk-=widPe38fUNjUOmX11ByDckaeEo9tN4Eiyke9u1SAtu9sA@mail.gmail.com/ * runtime-constants: arm64: add 'runtime constant' support runtime constants: add x86 architecture support runtime constants: add default dummy infrastructure vfs: dcache: move hashlen_hash() from callers into d_hash()
This commit is contained in:
commit
a5819099f6
88
arch/arm64/include/asm/runtime-const.h
Normal file
88
arch/arm64/include/asm/runtime-const.h
Normal file
@ -0,0 +1,88 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_RUNTIME_CONST_H
|
||||
#define _ASM_RUNTIME_CONST_H
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
/* Sigh. You can still run arm64 in BE mode */
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
#define runtime_const_ptr(sym) ({ \
|
||||
typeof(sym) __ret; \
|
||||
asm_inline("1:\t" \
|
||||
"movz %0, #0xcdef\n\t" \
|
||||
"movk %0, #0x89ab, lsl #16\n\t" \
|
||||
"movk %0, #0x4567, lsl #32\n\t" \
|
||||
"movk %0, #0x0123, lsl #48\n\t" \
|
||||
".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
|
||||
".long 1b - .\n\t" \
|
||||
".popsection" \
|
||||
:"=r" (__ret)); \
|
||||
__ret; })
|
||||
|
||||
#define runtime_const_shift_right_32(val, sym) ({ \
|
||||
unsigned long __ret; \
|
||||
asm_inline("1:\t" \
|
||||
"lsr %w0,%w1,#12\n\t" \
|
||||
".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
|
||||
".long 1b - .\n\t" \
|
||||
".popsection" \
|
||||
:"=r" (__ret) \
|
||||
:"r" (0u+(val))); \
|
||||
__ret; })
|
||||
|
||||
#define runtime_const_init(type, sym) do { \
|
||||
extern s32 __start_runtime_##type##_##sym[]; \
|
||||
extern s32 __stop_runtime_##type##_##sym[]; \
|
||||
runtime_const_fixup(__runtime_fixup_##type, \
|
||||
(unsigned long)(sym), \
|
||||
__start_runtime_##type##_##sym, \
|
||||
__stop_runtime_##type##_##sym); \
|
||||
} while (0)
|
||||
|
||||
/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */
|
||||
static inline void __runtime_fixup_16(__le32 *p, unsigned int val)
|
||||
{
|
||||
u32 insn = le32_to_cpu(*p);
|
||||
insn &= 0xffe0001f;
|
||||
insn |= (val & 0xffff) << 5;
|
||||
*p = cpu_to_le32(insn);
|
||||
}
|
||||
|
||||
static inline void __runtime_fixup_caches(void *where, unsigned int insns)
|
||||
{
|
||||
unsigned long va = (unsigned long)where;
|
||||
caches_clean_inval_pou(va, va + 4*insns);
|
||||
}
|
||||
|
||||
static inline void __runtime_fixup_ptr(void *where, unsigned long val)
|
||||
{
|
||||
__le32 *p = lm_alias(where);
|
||||
__runtime_fixup_16(p, val);
|
||||
__runtime_fixup_16(p+1, val >> 16);
|
||||
__runtime_fixup_16(p+2, val >> 32);
|
||||
__runtime_fixup_16(p+3, val >> 48);
|
||||
__runtime_fixup_caches(where, 4);
|
||||
}
|
||||
|
||||
/* Immediate value is 6 bits starting at bit #16 */
|
||||
static inline void __runtime_fixup_shift(void *where, unsigned long val)
|
||||
{
|
||||
__le32 *p = lm_alias(where);
|
||||
u32 insn = le32_to_cpu(*p);
|
||||
insn &= 0xffc0ffff;
|
||||
insn |= (val & 63) << 16;
|
||||
*p = cpu_to_le32(insn);
|
||||
__runtime_fixup_caches(where, 1);
|
||||
}
|
||||
|
||||
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
|
||||
unsigned long val, s32 *start, s32 *end)
|
||||
{
|
||||
while (start < end) {
|
||||
fn(*start + (void *)start, val);
|
||||
start++;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -264,6 +264,9 @@ SECTIONS
|
||||
EXIT_DATA
|
||||
}
|
||||
|
||||
RUNTIME_CONST(shift, d_hash_shift)
|
||||
RUNTIME_CONST(ptr, dentry_hashtable)
|
||||
|
||||
PERCPU_SECTION(L1_CACHE_BYTES)
|
||||
HYPERVISOR_PERCPU_SECTION
|
||||
|
||||
|
61
arch/x86/include/asm/runtime-const.h
Normal file
61
arch/x86/include/asm/runtime-const.h
Normal file
@ -0,0 +1,61 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_RUNTIME_CONST_H
|
||||
#define _ASM_RUNTIME_CONST_H
|
||||
|
||||
#define runtime_const_ptr(sym) ({ \
|
||||
typeof(sym) __ret; \
|
||||
asm_inline("mov %1,%0\n1:\n" \
|
||||
".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
|
||||
".long 1b - %c2 - .\n\t" \
|
||||
".popsection" \
|
||||
:"=r" (__ret) \
|
||||
:"i" ((unsigned long)0x0123456789abcdefull), \
|
||||
"i" (sizeof(long))); \
|
||||
__ret; })
|
||||
|
||||
// The 'typeof' will create at _least_ a 32-bit type, but
|
||||
// will happily also take a bigger type and the 'shrl' will
|
||||
// clear the upper bits
|
||||
#define runtime_const_shift_right_32(val, sym) ({ \
|
||||
typeof(0u+(val)) __ret = (val); \
|
||||
asm_inline("shrl $12,%k0\n1:\n" \
|
||||
".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
|
||||
".long 1b - 1 - .\n\t" \
|
||||
".popsection" \
|
||||
:"+r" (__ret)); \
|
||||
__ret; })
|
||||
|
||||
#define runtime_const_init(type, sym) do { \
|
||||
extern s32 __start_runtime_##type##_##sym[]; \
|
||||
extern s32 __stop_runtime_##type##_##sym[]; \
|
||||
runtime_const_fixup(__runtime_fixup_##type, \
|
||||
(unsigned long)(sym), \
|
||||
__start_runtime_##type##_##sym, \
|
||||
__stop_runtime_##type##_##sym); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* The text patching is trivial - you can only do this at init time,
|
||||
* when the text section hasn't been marked RO, and before the text
|
||||
* has ever been executed.
|
||||
*/
|
||||
static inline void __runtime_fixup_ptr(void *where, unsigned long val)
|
||||
{
|
||||
*(unsigned long *)where = val;
|
||||
}
|
||||
|
||||
static inline void __runtime_fixup_shift(void *where, unsigned long val)
|
||||
{
|
||||
*(unsigned char *)where = val;
|
||||
}
|
||||
|
||||
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
|
||||
unsigned long val, s32 *start, s32 *end)
|
||||
{
|
||||
while (start < end) {
|
||||
fn(*start + (void *)start, val);
|
||||
start++;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -357,6 +357,9 @@ SECTIONS
|
||||
PERCPU_SECTION(INTERNODE_CACHE_BYTES)
|
||||
#endif
|
||||
|
||||
RUNTIME_CONST(shift, d_hash_shift)
|
||||
RUNTIME_CONST(ptr, dentry_hashtable)
|
||||
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
|
||||
/* freed after init ends here */
|
||||
|
17
fs/dcache.c
17
fs/dcache.c
@ -35,6 +35,8 @@
|
||||
#include "internal.h"
|
||||
#include "mount.h"
|
||||
|
||||
#include <asm/runtime-const.h>
|
||||
|
||||
/*
|
||||
* Usage:
|
||||
* dcache->d_inode->i_lock protects:
|
||||
@ -100,9 +102,10 @@ static unsigned int d_hash_shift __ro_after_init;
|
||||
|
||||
static struct hlist_bl_head *dentry_hashtable __ro_after_init;
|
||||
|
||||
static inline struct hlist_bl_head *d_hash(unsigned int hash)
|
||||
static inline struct hlist_bl_head *d_hash(unsigned long hashlen)
|
||||
{
|
||||
return dentry_hashtable + (hash >> d_hash_shift);
|
||||
return runtime_const_ptr(dentry_hashtable) +
|
||||
runtime_const_shift_right_32(hashlen, d_hash_shift);
|
||||
}
|
||||
|
||||
#define IN_LOOKUP_SHIFT 10
|
||||
@ -2110,7 +2113,7 @@ static noinline struct dentry *__d_lookup_rcu_op_compare(
|
||||
unsigned *seqp)
|
||||
{
|
||||
u64 hashlen = name->hash_len;
|
||||
struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
|
||||
struct hlist_bl_head *b = d_hash(hashlen);
|
||||
struct hlist_bl_node *node;
|
||||
struct dentry *dentry;
|
||||
|
||||
@ -2177,7 +2180,7 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
|
||||
{
|
||||
u64 hashlen = name->hash_len;
|
||||
const unsigned char *str = name->name;
|
||||
struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
|
||||
struct hlist_bl_head *b = d_hash(hashlen);
|
||||
struct hlist_bl_node *node;
|
||||
struct dentry *dentry;
|
||||
|
||||
@ -3132,6 +3135,9 @@ static void __init dcache_init_early(void)
|
||||
0,
|
||||
0);
|
||||
d_hash_shift = 32 - d_hash_shift;
|
||||
|
||||
runtime_const_init(shift, d_hash_shift);
|
||||
runtime_const_init(ptr, dentry_hashtable);
|
||||
}
|
||||
|
||||
static void __init dcache_init(void)
|
||||
@ -3160,6 +3166,9 @@ static void __init dcache_init(void)
|
||||
0,
|
||||
0);
|
||||
d_hash_shift = 32 - d_hash_shift;
|
||||
|
||||
runtime_const_init(shift, d_hash_shift);
|
||||
runtime_const_init(ptr, dentry_hashtable);
|
||||
}
|
||||
|
||||
/* SLAB cache for __getname() consumers */
|
||||
|
@ -46,6 +46,7 @@ mandatory-y += pci.h
|
||||
mandatory-y += percpu.h
|
||||
mandatory-y += pgalloc.h
|
||||
mandatory-y += preempt.h
|
||||
mandatory-y += runtime-const.h
|
||||
mandatory-y += rwonce.h
|
||||
mandatory-y += sections.h
|
||||
mandatory-y += serial.h
|
||||
|
15
include/asm-generic/runtime-const.h
Normal file
15
include/asm-generic/runtime-const.h
Normal file
@ -0,0 +1,15 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_RUNTIME_CONST_H
|
||||
#define _ASM_RUNTIME_CONST_H
|
||||
|
||||
/*
|
||||
* This is the fallback for when the architecture doesn't
|
||||
* support the runtime const operations.
|
||||
*
|
||||
* We just use the actual symbols as-is.
|
||||
*/
|
||||
#define runtime_const_ptr(sym) (sym)
|
||||
#define runtime_const_shift_right_32(val, sym) ((u32)(val)>>(sym))
|
||||
#define runtime_const_init(type,sym) do { } while (0)
|
||||
|
||||
#endif
|
@ -944,6 +944,14 @@
|
||||
#define CON_INITCALL \
|
||||
BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
|
||||
|
||||
#define RUNTIME_NAME(t,x) runtime_##t##_##x
|
||||
|
||||
#define RUNTIME_CONST(t,x) \
|
||||
. = ALIGN(8); \
|
||||
RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) { \
|
||||
*(RUNTIME_NAME(t,x)); \
|
||||
}
|
||||
|
||||
/* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
|
||||
#define KUNIT_TABLE() \
|
||||
. = ALIGN(8); \
|
||||
|
Loading…
Reference in New Issue
Block a user