When a static call is updated with __static_call_return0() as target, arch_static_call_transform() set it to use an optimised set of instructions which are meant to lay in the same cacheline. But when initialising a static call with DEFINE_STATIC_CALL_RET0(), we get a branch to the real __static_call_return0() function instead of getting the optimised setup: c00d8120 <__SCT__perf_snapshot_branch_stack>: c00d8120: 4b ff ff f4 b c00d8114 <__static_call_return0> c00d8124: 3d 80 c0 0e lis r12,-16370 c00d8128: 81 8c 81 3c lwz r12,-32452(r12) c00d812c: 7d 89 03 a6 mtctr r12 c00d8130: 4e 80 04 20 bctr c00d8134: 38 60 00 00 li r3,0 c00d8138: 4e 80 00 20 blr c00d813c: 00 00 00 00 .long 0x0 Add ARCH_DEFINE_STATIC_CALL_RET0_TRAMP() defined by each architecture to setup the optimised configuration, and rework DEFINE_STATIC_CALL_RET0() to call it: c00d8120 <__SCT__perf_snapshot_branch_stack>: c00d8120: 48 00 00 14 b c00d8134 <__SCT__perf_snapshot_branch_stack+0x14> c00d8124: 3d 80 c0 0e lis r12,-16370 c00d8128: 81 8c 81 3c lwz r12,-32452(r12) c00d812c: 7d 89 03 a6 mtctr r12 c00d8130: 4e 80 04 20 bctr c00d8134: 38 60 00 00 li r3,0 c00d8138: 4e 80 00 20 blr c00d813c: 00 00 00 00 .long 0x0 Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Josh Poimboeuf <jpoimboe@redhat.com> Link: https://lore.kernel.org/r/1e0a61a88f52a460f62a58ffc2a5f847d1f7d9d8.1647253456.git.christophe.leroy@csgroup.eu
51 lines
2.0 KiB
C
51 lines
2.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_STATIC_CALL_H
|
|
#define _ASM_STATIC_CALL_H
|
|
|
|
#include <asm/text-patching.h>
|
|
|
|
/*
|
|
* For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which
|
|
* uses the current value of the key->func pointer to do an indirect jump to
|
|
* the function. This trampoline is only used during boot, before the call
|
|
* sites get patched by static_call_update(). The name of this trampoline has
|
|
* a magical aspect: objtool uses it to find static call sites so it can create
|
|
* the .static_call_sites section.
|
|
*
|
|
* For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which
|
|
* does a direct jump to the function. The direct jump gets patched by
|
|
* static_call_update().
|
|
*
|
|
* Having the trampoline in a special section forces GCC to emit a JMP.d32 when
|
|
* it does tail-call optimization on the call; since you cannot compute the
|
|
* relative displacement across sections.
|
|
*/
|
|
|
|
#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
|
|
asm(".pushsection .static_call.text, \"ax\" \n" \
|
|
".align 4 \n" \
|
|
".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
|
|
STATIC_CALL_TRAMP_STR(name) ": \n" \
|
|
insns " \n" \
|
|
".byte 0x53, 0x43, 0x54 \n" \
|
|
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
|
|
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
|
|
".popsection \n")
|
|
|
|
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
|
|
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
|
|
|
|
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
|
|
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
|
|
|
|
#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \
|
|
ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)
|
|
|
|
#define ARCH_ADD_TRAMP_KEY(name) \
|
|
asm(".pushsection .static_call_tramp_key, \"a\" \n" \
|
|
".long " STATIC_CALL_TRAMP_STR(name) " - . \n" \
|
|
".long " STATIC_CALL_KEY_STR(name) " - . \n" \
|
|
".popsection \n")
|
|
|
|
#endif /* _ASM_STATIC_CALL_H */
|