bpf, arm64: Inline bpf_get_current_task/_btf() helpers

On ARM64, the pointer to task_struct is always available in the sp_el0
register and therefore the calls to bpf_get_current_task() and
bpf_get_current_task_btf() can be inlined into a single MRS instruction.

Here is the difference before and after this change:

Before:

; struct task_struct *task = bpf_get_current_task_btf();
  54:   mov     x10, #0xffffffffffff7978        // #-34440
  58:   movk    x10, #0x802b, lsl #16
  5c:   movk    x10, #0x8000, lsl #32
  60:   blr     x10          -------------->    0xffff8000802b7978 <+0>:     mrs     x0, sp_el0
  64:   add     x7, x0, #0x0 <--------------    0xffff8000802b797c <+4>:     ret

After:

; struct task_struct *task = bpf_get_current_task_btf();
  54:   mrs     x7, sp_el0

This shows around 1% performance improvement in artificial microbenchmark.

Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Xu Kuohai <xukuohai@huawei.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20240619131334.4297-1-puranjay@kernel.org
This commit is contained in:
Puranjay Mohan 2024-06-19 13:13:34 +00:00 committed by Andrii Nakryiko
parent 2807db78ab
commit 2bb138cb20

View File

@ -1244,6 +1244,13 @@ emit_cond_jmp:
break;
}
/* Implement helper call to bpf_get_current_task/_btf() inline */
if (insn->src_reg == 0 && (insn->imm == BPF_FUNC_get_current_task ||
insn->imm == BPF_FUNC_get_current_task_btf)) {
emit(A64_MRS_SP_EL0(r0), ctx);
break;
}
ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
&func_addr, &func_addr_fixed);
if (ret < 0)
@ -2580,6 +2587,8 @@ bool bpf_jit_inlines_helper_call(s32 imm)
{
switch (imm) {
case BPF_FUNC_get_smp_processor_id:
case BPF_FUNC_get_current_task:
case BPF_FUNC_get_current_task_btf:
return true;
default:
return false;