2014-08-27 04:15:30 +00:00
|
|
|
/*
|
|
|
|
* BPF JIT compiler for ARM64
|
|
|
|
*
|
2016-01-14 07:33:22 +00:00
|
|
|
* Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
|
2014-08-27 04:15:30 +00:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define pr_fmt(fmt) "bpf_jit: " fmt
|
|
|
|
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
#include <linux/bpf.h>
|
2014-08-27 04:15:30 +00:00
|
|
|
#include <linux/filter.h>
|
|
|
|
#include <linux/printk.h>
|
|
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <linux/slab.h>
|
2014-09-16 07:48:50 +00:00
|
|
|
|
2014-08-27 04:15:30 +00:00
|
|
|
#include <asm/byteorder.h>
|
|
|
|
#include <asm/cacheflush.h>
|
2014-09-16 07:48:50 +00:00
|
|
|
#include <asm/debug-monitors.h>
|
2017-05-08 22:58:05 +00:00
|
|
|
#include <asm/set_memory.h>
|
2014-08-27 04:15:30 +00:00
|
|
|
|
|
|
|
#include "bpf_jit.h"
|
|
|
|
|
|
|
|
int bpf_jit_enable __read_mostly;
|
|
|
|
|
2016-05-13 17:08:34 +00:00
|
|
|
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
|
|
|
|
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
|
bpf, arm64: use separate register for state in stxr
Will reported that in BPF_XADD we must use a different register in stxr
instruction for the status flag due to otherwise CONSTRAINED UNPREDICTABLE
behavior per architecture. Reference manual says [1]:
If s == t, then one of the following behaviors must occur:
* The instruction is UNDEFINED.
* The instruction executes as a NOP.
* The instruction performs the store to the specified address, but
the value stored is UNKNOWN.
Thus, use a different temporary register for the status flag to fix it.
Disassembly extract from test 226/STX_XADD_DW from test_bpf.ko:
[...]
0000003c: c85f7d4b ldxr x11, [x10]
00000040: 8b07016b add x11, x11, x7
00000044: c80c7d4b stxr w12, x11, [x10]
00000048: 35ffffac cbnz w12, 0x0000003c
[...]
[1] https://static.docs.arm.com/ddi0487/b/DDI0487B_a_armv8_arm.pdf, p.6132
Fixes: 85f68fe89832 ("bpf, arm64: implement jiting of BPF_XADD")
Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-06-07 11:45:37 +00:00
|
|
|
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
|
2014-08-27 04:15:30 +00:00
|
|
|
|
|
|
|
/* Map BPF registers to A64 registers */
|
|
|
|
static const int bpf2a64[] = {
|
|
|
|
/* return value from in-kernel function, and exit value from eBPF */
|
|
|
|
[BPF_REG_0] = A64_R(7),
|
|
|
|
/* arguments from eBPF program to in-kernel function */
|
|
|
|
[BPF_REG_1] = A64_R(0),
|
|
|
|
[BPF_REG_2] = A64_R(1),
|
|
|
|
[BPF_REG_3] = A64_R(2),
|
|
|
|
[BPF_REG_4] = A64_R(3),
|
|
|
|
[BPF_REG_5] = A64_R(4),
|
|
|
|
/* callee saved registers that in-kernel function will preserve */
|
|
|
|
[BPF_REG_6] = A64_R(19),
|
|
|
|
[BPF_REG_7] = A64_R(20),
|
|
|
|
[BPF_REG_8] = A64_R(21),
|
|
|
|
[BPF_REG_9] = A64_R(22),
|
|
|
|
/* read-only frame pointer to access stack */
|
2015-11-16 22:35:35 +00:00
|
|
|
[BPF_REG_FP] = A64_R(25),
|
2016-05-16 23:36:26 +00:00
|
|
|
/* temporary registers for internal BPF JIT */
|
|
|
|
[TMP_REG_1] = A64_R(10),
|
|
|
|
[TMP_REG_2] = A64_R(11),
|
bpf, arm64: use separate register for state in stxr
Will reported that in BPF_XADD we must use a different register in stxr
instruction for the status flag due to otherwise CONSTRAINED UNPREDICTABLE
behavior per architecture. Reference manual says [1]:
If s == t, then one of the following behaviors must occur:
* The instruction is UNDEFINED.
* The instruction executes as a NOP.
* The instruction performs the store to the specified address, but
the value stored is UNKNOWN.
Thus, use a different temporary register for the status flag to fix it.
Disassembly extract from test 226/STX_XADD_DW from test_bpf.ko:
[...]
0000003c: c85f7d4b ldxr x11, [x10]
00000040: 8b07016b add x11, x11, x7
00000044: c80c7d4b stxr w12, x11, [x10]
00000048: 35ffffac cbnz w12, 0x0000003c
[...]
[1] https://static.docs.arm.com/ddi0487/b/DDI0487B_a_armv8_arm.pdf, p.6132
Fixes: 85f68fe89832 ("bpf, arm64: implement jiting of BPF_XADD")
Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-06-07 11:45:37 +00:00
|
|
|
[TMP_REG_3] = A64_R(12),
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
/* tail_call_cnt */
|
|
|
|
[TCALL_CNT] = A64_R(26),
|
2016-05-13 17:08:34 +00:00
|
|
|
/* temporary register for blinding constants */
|
|
|
|
[BPF_REG_AX] = A64_R(9),
|
2014-08-27 04:15:30 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct jit_ctx {
|
|
|
|
const struct bpf_prog *prog;
|
|
|
|
int idx;
|
2014-12-03 08:38:01 +00:00
|
|
|
int epilogue_offset;
|
2014-08-27 04:15:30 +00:00
|
|
|
int *offset;
|
|
|
|
u32 *image;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline void emit(const u32 insn, struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
if (ctx->image != NULL)
|
|
|
|
ctx->image[ctx->idx] = cpu_to_le32(insn);
|
|
|
|
|
|
|
|
ctx->idx++;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void emit_a64_mov_i64(const int reg, const u64 val,
|
|
|
|
struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
u64 tmp = val;
|
|
|
|
int shift = 0;
|
|
|
|
|
|
|
|
emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
|
|
|
|
tmp >>= 16;
|
|
|
|
shift += 16;
|
|
|
|
while (tmp) {
|
|
|
|
if (tmp & 0xffff)
|
|
|
|
emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
|
|
|
|
tmp >>= 16;
|
|
|
|
shift += 16;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void emit_a64_mov_i(const int is64, const int reg,
|
|
|
|
const s32 val, struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
u16 hi = val >> 16;
|
|
|
|
u16 lo = val & 0xffff;
|
|
|
|
|
|
|
|
if (hi & 0x8000) {
|
|
|
|
if (hi == 0xffff) {
|
|
|
|
emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
|
|
|
|
} else {
|
|
|
|
emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
|
|
|
|
emit(A64_MOVK(is64, reg, lo, 0), ctx);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
emit(A64_MOVZ(is64, reg, lo, 0), ctx);
|
|
|
|
if (hi)
|
|
|
|
emit(A64_MOVK(is64, reg, hi, 16), ctx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int bpf2a64_offset(int bpf_to, int bpf_from,
|
|
|
|
const struct jit_ctx *ctx)
|
|
|
|
{
|
2015-06-25 12:47:39 +00:00
|
|
|
int to = ctx->offset[bpf_to];
|
2014-08-27 04:15:30 +00:00
|
|
|
/* -1 to account for the Branch instruction */
|
2015-06-25 12:47:39 +00:00
|
|
|
int from = ctx->offset[bpf_from] - 1;
|
2014-08-27 04:15:30 +00:00
|
|
|
|
|
|
|
return to - from;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:48:50 +00:00
|
|
|
static void jit_fill_hole(void *area, unsigned int size)
|
|
|
|
{
|
|
|
|
u32 *ptr;
|
|
|
|
/* We are guaranteed to have aligned memory. */
|
|
|
|
for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
|
|
|
|
*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
|
|
|
|
}
|
|
|
|
|
2014-08-27 04:15:30 +00:00
|
|
|
static inline int epilogue_offset(const struct jit_ctx *ctx)
|
|
|
|
{
|
2014-12-03 08:38:01 +00:00
|
|
|
int to = ctx->epilogue_offset;
|
|
|
|
int from = ctx->idx;
|
2014-08-27 04:15:30 +00:00
|
|
|
|
|
|
|
return to - from;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Stack must be multiples of 16B */
|
|
|
|
#define STACK_ALIGN(sz) (((sz) + 15) & ~15)
|
|
|
|
|
2015-11-18 08:56:02 +00:00
|
|
|
#define _STACK_SIZE \
|
|
|
|
(MAX_BPF_STACK \
|
|
|
|
+ 4 /* extra for skb_copy_bits buffer */)
|
|
|
|
|
|
|
|
#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
|
|
|
|
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
#define PROLOGUE_OFFSET 8
|
|
|
|
|
|
|
|
static int build_prologue(struct jit_ctx *ctx)
|
2014-08-27 04:15:30 +00:00
|
|
|
{
|
|
|
|
const u8 r6 = bpf2a64[BPF_REG_6];
|
|
|
|
const u8 r7 = bpf2a64[BPF_REG_7];
|
|
|
|
const u8 r8 = bpf2a64[BPF_REG_8];
|
|
|
|
const u8 r9 = bpf2a64[BPF_REG_9];
|
|
|
|
const u8 fp = bpf2a64[BPF_REG_FP];
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
const u8 tcc = bpf2a64[TCALL_CNT];
|
|
|
|
const int idx0 = ctx->idx;
|
|
|
|
int cur_offset;
|
2014-08-27 04:15:30 +00:00
|
|
|
|
2015-11-16 22:35:35 +00:00
|
|
|
/*
|
|
|
|
* BPF prog stack layout
|
|
|
|
*
|
|
|
|
* high
|
|
|
|
* original A64_SP => 0:+-----+ BPF prologue
|
|
|
|
* |FP/LR|
|
|
|
|
* current A64_FP => -16:+-----+
|
|
|
|
* | ... | callee saved registers
|
2016-05-16 23:36:26 +00:00
|
|
|
* BPF fp register => -64:+-----+ <= (BPF_FP)
|
2015-11-16 22:35:35 +00:00
|
|
|
* | |
|
|
|
|
* | ... | BPF prog stack
|
|
|
|
* | |
|
2015-11-18 08:56:02 +00:00
|
|
|
* +-----+ <= (BPF_FP - MAX_BPF_STACK)
|
|
|
|
* |RSVD | JIT scratchpad
|
|
|
|
* current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE)
|
2015-11-16 22:35:35 +00:00
|
|
|
* | |
|
|
|
|
* | ... | Function call stack
|
|
|
|
* | |
|
|
|
|
* +-----+
|
|
|
|
* low
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Save FP and LR registers to stay align with ARM64 AAPCS */
|
|
|
|
emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
|
|
|
|
emit(A64_MOV(1, A64_FP, A64_SP), ctx);
|
|
|
|
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
/* Save callee-saved registers */
|
2014-08-27 04:15:30 +00:00
|
|
|
emit(A64_PUSH(r6, r7, A64_SP), ctx);
|
|
|
|
emit(A64_PUSH(r8, r9, A64_SP), ctx);
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
emit(A64_PUSH(fp, tcc, A64_SP), ctx);
|
2014-08-27 04:15:30 +00:00
|
|
|
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
/* Set up BPF prog stack base register */
|
2014-08-27 04:15:30 +00:00
|
|
|
emit(A64_MOV(1, fp, A64_SP), ctx);
|
|
|
|
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
/* Initialize tail_call_cnt */
|
|
|
|
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
|
|
|
|
|
2015-11-16 22:35:35 +00:00
|
|
|
/* Set up function call stack */
|
2015-11-18 08:56:02 +00:00
|
|
|
emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
|
|
|
|
cur_offset = ctx->idx - idx0;
|
|
|
|
if (cur_offset != PROLOGUE_OFFSET) {
|
|
|
|
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
|
|
|
|
cur_offset, PROLOGUE_OFFSET);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int out_offset = -1; /* initialized on the first pass of build_body() */
|
|
|
|
static int emit_bpf_tail_call(struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
|
|
|
|
const u8 r2 = bpf2a64[BPF_REG_2];
|
|
|
|
const u8 r3 = bpf2a64[BPF_REG_3];
|
|
|
|
|
|
|
|
const u8 tmp = bpf2a64[TMP_REG_1];
|
|
|
|
const u8 prg = bpf2a64[TMP_REG_2];
|
|
|
|
const u8 tcc = bpf2a64[TCALL_CNT];
|
|
|
|
const int idx0 = ctx->idx;
|
|
|
|
#define cur_offset (ctx->idx - idx0)
|
|
|
|
#define jmp_offset (out_offset - (cur_offset))
|
|
|
|
size_t off;
|
|
|
|
|
|
|
|
/* if (index >= array->map.max_entries)
|
|
|
|
* goto out;
|
|
|
|
*/
|
|
|
|
off = offsetof(struct bpf_array, map.max_entries);
|
|
|
|
emit_a64_mov_i64(tmp, off, ctx);
|
|
|
|
emit(A64_LDR32(tmp, r2, tmp), ctx);
|
|
|
|
emit(A64_CMP(0, r3, tmp), ctx);
|
|
|
|
emit(A64_B_(A64_COND_GE, jmp_offset), ctx);
|
|
|
|
|
|
|
|
/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
|
|
|
|
* goto out;
|
|
|
|
* tail_call_cnt++;
|
|
|
|
*/
|
|
|
|
emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
|
|
|
|
emit(A64_CMP(1, tcc, tmp), ctx);
|
|
|
|
emit(A64_B_(A64_COND_GT, jmp_offset), ctx);
|
|
|
|
emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
|
|
|
|
|
|
|
|
/* prog = array->ptrs[index];
|
|
|
|
* if (prog == NULL)
|
|
|
|
* goto out;
|
|
|
|
*/
|
|
|
|
off = offsetof(struct bpf_array, ptrs);
|
|
|
|
emit_a64_mov_i64(tmp, off, ctx);
|
bpf, arm64: fix faulty emission of map access in tail calls
Shubham was recently asking on netdev why in arm64 JIT we don't multiply
the index for accessing the tail call map by 8. That led me into testing
out arm64 JIT wrt tail calls and it turned out I got a NULL pointer
dereference on the tail call.
The buggy access is at:
prog = array->ptrs[index];
if (prog == NULL)
goto out;
[...]
00000060: d2800e0a mov x10, #0x70 // #112
00000064: f86a682a ldr x10, [x1,x10]
00000068: f862694b ldr x11, [x10,x2]
0000006c: b40000ab cbz x11, 0x00000080
[...]
The code triggering the crash is f862694b. x1 at the time contains the
address of the bpf array, x10 offsetof(struct bpf_array, ptrs). Meaning,
above we load the pointer to the program at map slot 0 into x10. x10
can then be NULL if the slot is not occupied, which we later on try to
access with a user given offset in x2 that is the map index.
Fix this by emitting the following instead:
[...]
00000060: d2800e0a mov x10, #0x70 // #112
00000064: 8b0a002a add x10, x1, x10
00000068: d37df04b lsl x11, x2, #3
0000006c: f86b694b ldr x11, [x10,x11]
00000070: b40000ab cbz x11, 0x00000084
[...]
This basically adds the offset to ptrs to the base address of the bpf
array we got and we later on access the map with an index * 8 offset
relative to that. The tail call map itself is basically one large area
with meta data at the head followed by the array of prog pointers.
This makes tail calls working again, tested on Cavium ThunderX ARMv8.
Fixes: ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper")
Reported-by: Shubham Bansal <illusionist.neo@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-10 23:53:15 +00:00
|
|
|
emit(A64_ADD(1, tmp, r2, tmp), ctx);
|
|
|
|
emit(A64_LSL(1, prg, r3, 3), ctx);
|
|
|
|
emit(A64_LDR64(prg, tmp, prg), ctx);
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
emit(A64_CBZ(1, prg, jmp_offset), ctx);
|
|
|
|
|
|
|
|
/* goto *(prog->bpf_func + prologue_size); */
|
|
|
|
off = offsetof(struct bpf_prog, bpf_func);
|
|
|
|
emit_a64_mov_i64(tmp, off, ctx);
|
|
|
|
emit(A64_LDR64(tmp, prg, tmp), ctx);
|
|
|
|
emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
|
|
|
|
emit(A64_BR(tmp), ctx);
|
|
|
|
|
|
|
|
/* out: */
|
|
|
|
if (out_offset == -1)
|
|
|
|
out_offset = cur_offset;
|
|
|
|
if (cur_offset != out_offset) {
|
|
|
|
pr_err_once("tail_call out_offset = %d, expected %d!\n",
|
|
|
|
cur_offset, out_offset);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
#undef cur_offset
|
|
|
|
#undef jmp_offset
|
2014-08-27 04:15:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void build_epilogue(struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
const u8 r0 = bpf2a64[BPF_REG_0];
|
|
|
|
const u8 r6 = bpf2a64[BPF_REG_6];
|
|
|
|
const u8 r7 = bpf2a64[BPF_REG_7];
|
|
|
|
const u8 r8 = bpf2a64[BPF_REG_8];
|
|
|
|
const u8 r9 = bpf2a64[BPF_REG_9];
|
|
|
|
const u8 fp = bpf2a64[BPF_REG_FP];
|
|
|
|
|
|
|
|
/* We're done with BPF stack */
|
2015-11-18 08:56:02 +00:00
|
|
|
emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
|
2014-08-27 04:15:30 +00:00
|
|
|
|
2015-11-16 22:35:35 +00:00
|
|
|
/* Restore fs (x25) and x26 */
|
|
|
|
emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
|
|
|
|
|
2014-08-27 04:15:30 +00:00
|
|
|
/* Restore callee-saved register */
|
|
|
|
emit(A64_POP(r8, r9, A64_SP), ctx);
|
|
|
|
emit(A64_POP(r6, r7, A64_SP), ctx);
|
|
|
|
|
2015-11-16 22:35:35 +00:00
|
|
|
/* Restore FP/LR registers */
|
|
|
|
emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
|
2014-08-27 04:15:30 +00:00
|
|
|
|
|
|
|
/* Set return value */
|
|
|
|
emit(A64_MOV(1, A64_R(0), r0), ctx);
|
|
|
|
|
|
|
|
emit(A64_RET(A64_LR), ctx);
|
|
|
|
}
|
|
|
|
|
2014-09-16 20:29:23 +00:00
|
|
|
/* JITs an eBPF instruction.
|
|
|
|
* Returns:
|
|
|
|
* 0 - successfully JITed an 8-byte eBPF instruction.
|
|
|
|
* >0 - successfully JITed a 16-byte eBPF instruction.
|
|
|
|
* <0 - failed to JIT.
|
|
|
|
*/
|
2014-08-27 04:15:30 +00:00
|
|
|
static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
const u8 code = insn->code;
|
|
|
|
const u8 dst = bpf2a64[insn->dst_reg];
|
|
|
|
const u8 src = bpf2a64[insn->src_reg];
|
|
|
|
const u8 tmp = bpf2a64[TMP_REG_1];
|
|
|
|
const u8 tmp2 = bpf2a64[TMP_REG_2];
|
bpf, arm64: use separate register for state in stxr
Will reported that in BPF_XADD we must use a different register in stxr
instruction for the status flag due to otherwise CONSTRAINED UNPREDICTABLE
behavior per architecture. Reference manual says [1]:
If s == t, then one of the following behaviors must occur:
* The instruction is UNDEFINED.
* The instruction executes as a NOP.
* The instruction performs the store to the specified address, but
the value stored is UNKNOWN.
Thus, use a different temporary register for the status flag to fix it.
Disassembly extract from test 226/STX_XADD_DW from test_bpf.ko:
[...]
0000003c: c85f7d4b ldxr x11, [x10]
00000040: 8b07016b add x11, x11, x7
00000044: c80c7d4b stxr w12, x11, [x10]
00000048: 35ffffac cbnz w12, 0x0000003c
[...]
[1] https://static.docs.arm.com/ddi0487/b/DDI0487B_a_armv8_arm.pdf, p.6132
Fixes: 85f68fe89832 ("bpf, arm64: implement jiting of BPF_XADD")
Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-06-07 11:45:37 +00:00
|
|
|
const u8 tmp3 = bpf2a64[TMP_REG_3];
|
2014-08-27 04:15:30 +00:00
|
|
|
const s16 off = insn->off;
|
|
|
|
const s32 imm = insn->imm;
|
|
|
|
const int i = insn - ctx->prog->insnsi;
|
|
|
|
const bool is64 = BPF_CLASS(code) == BPF_ALU64;
|
bpf, arm64: implement jiting of BPF_XADD
This work adds BPF_XADD for BPF_W/BPF_DW to the arm64 JIT and therefore
completes JITing of all BPF instructions, meaning we can thus also remove
the 'notyet' label and do not need to fall back to the interpreter when
BPF_XADD is used in a program!
This now also brings arm64 JIT in line with x86_64, s390x, ppc64, sparc64,
where all current eBPF features are supported.
BPF_W example from test_bpf:
.u.insns_int = {
BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
BPF_ST_MEM(BPF_W, R10, -40, 0x10),
BPF_STX_XADD(BPF_W, R10, R0, -40),
BPF_LDX_MEM(BPF_W, R0, R10, -40),
BPF_EXIT_INSN(),
},
[...]
00000020: 52800247 mov w7, #0x12 // #18
00000024: 928004eb mov x11, #0xffffffffffffffd8 // #-40
00000028: d280020a mov x10, #0x10 // #16
0000002c: b82b6b2a str w10, [x25,x11]
// start of xadd mapping:
00000030: 928004ea mov x10, #0xffffffffffffffd8 // #-40
00000034: 8b19014a add x10, x10, x25
00000038: f9800151 prfm pstl1strm, [x10]
0000003c: 885f7d4b ldxr w11, [x10]
00000040: 0b07016b add w11, w11, w7
00000044: 880b7d4b stxr w11, w11, [x10]
00000048: 35ffffab cbnz w11, 0x0000003c
// end of xadd mapping:
[...]
BPF_DW example from test_bpf:
.u.insns_int = {
BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
BPF_STX_XADD(BPF_DW, R10, R0, -40),
BPF_LDX_MEM(BPF_DW, R0, R10, -40),
BPF_EXIT_INSN(),
},
[...]
00000020: 52800247 mov w7, #0x12 // #18
00000024: 928004eb mov x11, #0xffffffffffffffd8 // #-40
00000028: d280020a mov x10, #0x10 // #16
0000002c: f82b6b2a str x10, [x25,x11]
// start of xadd mapping:
00000030: 928004ea mov x10, #0xffffffffffffffd8 // #-40
00000034: 8b19014a add x10, x10, x25
00000038: f9800151 prfm pstl1strm, [x10]
0000003c: c85f7d4b ldxr x11, [x10]
00000040: 8b07016b add x11, x11, x7
00000044: c80b7d4b stxr w11, x11, [x10]
00000048: 35ffffab cbnz w11, 0x0000003c
// end of xadd mapping:
[...]
Tested on Cavium ThunderX ARMv8, test suite results after the patch:
No JIT: [ 3751.855362] test_bpf: Summary: 311 PASSED, 0 FAILED, [0/303 JIT'ed]
With JIT: [ 3573.759527] test_bpf: Summary: 311 PASSED, 0 FAILED, [303/303 JIT'ed]
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-01 00:57:20 +00:00
|
|
|
const bool isdw = BPF_SIZE(code) == BPF_DW;
|
2014-08-27 04:15:30 +00:00
|
|
|
u8 jmp_cond;
|
|
|
|
s32 jmp_offset;
|
|
|
|
|
2015-11-04 06:56:44 +00:00
|
|
|
#define check_imm(bits, imm) do { \
|
|
|
|
if ((((imm) > 0) && ((imm) >> (bits))) || \
|
|
|
|
(((imm) < 0) && (~(imm) >> (bits)))) { \
|
|
|
|
pr_info("[%2d] imm=%d(0x%x) out of range\n", \
|
|
|
|
i, imm, imm); \
|
|
|
|
return -EINVAL; \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
#define check_imm19(imm) check_imm(19, imm)
|
|
|
|
#define check_imm26(imm) check_imm(26, imm)
|
|
|
|
|
2014-08-27 04:15:30 +00:00
|
|
|
switch (code) {
|
|
|
|
/* dst = src */
|
|
|
|
case BPF_ALU | BPF_MOV | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_MOV | BPF_X:
|
|
|
|
emit(A64_MOV(is64, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
/* dst = dst OP src */
|
|
|
|
case BPF_ALU | BPF_ADD | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_ADD | BPF_X:
|
|
|
|
emit(A64_ADD(is64, dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_SUB | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_SUB | BPF_X:
|
|
|
|
emit(A64_SUB(is64, dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_AND | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_AND | BPF_X:
|
|
|
|
emit(A64_AND(is64, dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_OR | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_OR | BPF_X:
|
|
|
|
emit(A64_ORR(is64, dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_XOR | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_XOR | BPF_X:
|
|
|
|
emit(A64_EOR(is64, dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_MUL | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_MUL | BPF_X:
|
|
|
|
emit(A64_MUL(is64, dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_DIV | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_DIV | BPF_X:
|
|
|
|
case BPF_ALU | BPF_MOD | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_MOD | BPF_X:
|
2015-11-04 06:56:44 +00:00
|
|
|
{
|
|
|
|
const u8 r0 = bpf2a64[BPF_REG_0];
|
|
|
|
|
|
|
|
/* if (src == 0) return 0 */
|
|
|
|
jmp_offset = 3; /* skip ahead to else path */
|
|
|
|
check_imm19(jmp_offset);
|
|
|
|
emit(A64_CBNZ(is64, src, jmp_offset), ctx);
|
|
|
|
emit(A64_MOVZ(1, r0, 0, 0), ctx);
|
|
|
|
jmp_offset = epilogue_offset(ctx);
|
|
|
|
check_imm26(jmp_offset);
|
|
|
|
emit(A64_B(jmp_offset), ctx);
|
|
|
|
/* else */
|
2015-11-05 04:43:59 +00:00
|
|
|
switch (BPF_OP(code)) {
|
|
|
|
case BPF_DIV:
|
|
|
|
emit(A64_UDIV(is64, dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_MOD:
|
|
|
|
emit(A64_UDIV(is64, tmp, dst, src), ctx);
|
|
|
|
emit(A64_MUL(is64, tmp, tmp, src), ctx);
|
|
|
|
emit(A64_SUB(is64, dst, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
}
|
2014-08-27 04:15:30 +00:00
|
|
|
break;
|
2015-11-04 06:56:44 +00:00
|
|
|
}
|
2014-09-16 18:37:35 +00:00
|
|
|
case BPF_ALU | BPF_LSH | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_LSH | BPF_X:
|
|
|
|
emit(A64_LSLV(is64, dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_RSH | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_RSH | BPF_X:
|
|
|
|
emit(A64_LSRV(is64, dst, dst, src), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_ARSH | BPF_X:
|
|
|
|
case BPF_ALU64 | BPF_ARSH | BPF_X:
|
|
|
|
emit(A64_ASRV(is64, dst, dst, src), ctx);
|
|
|
|
break;
|
2014-08-27 04:15:30 +00:00
|
|
|
/* dst = -dst */
|
|
|
|
case BPF_ALU | BPF_NEG:
|
|
|
|
case BPF_ALU64 | BPF_NEG:
|
|
|
|
emit(A64_NEG(is64, dst, dst), ctx);
|
|
|
|
break;
|
|
|
|
/* dst = BSWAP##imm(dst) */
|
|
|
|
case BPF_ALU | BPF_END | BPF_FROM_LE:
|
|
|
|
case BPF_ALU | BPF_END | BPF_FROM_BE:
|
|
|
|
#ifdef CONFIG_CPU_BIG_ENDIAN
|
|
|
|
if (BPF_SRC(code) == BPF_FROM_BE)
|
2015-06-26 01:39:15 +00:00
|
|
|
goto emit_bswap_uxt;
|
2014-08-27 04:15:30 +00:00
|
|
|
#else /* !CONFIG_CPU_BIG_ENDIAN */
|
|
|
|
if (BPF_SRC(code) == BPF_FROM_LE)
|
2015-06-26 01:39:15 +00:00
|
|
|
goto emit_bswap_uxt;
|
2014-08-27 04:15:30 +00:00
|
|
|
#endif
|
|
|
|
switch (imm) {
|
|
|
|
case 16:
|
|
|
|
emit(A64_REV16(is64, dst, dst), ctx);
|
2015-06-26 01:39:15 +00:00
|
|
|
/* zero-extend 16 bits into 64 bits */
|
|
|
|
emit(A64_UXTH(is64, dst, dst), ctx);
|
2014-08-27 04:15:30 +00:00
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
emit(A64_REV32(is64, dst, dst), ctx);
|
2015-06-26 01:39:15 +00:00
|
|
|
/* upper 32 bits already cleared */
|
2014-08-27 04:15:30 +00:00
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
emit(A64_REV64(dst, dst), ctx);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2015-06-26 01:39:15 +00:00
|
|
|
emit_bswap_uxt:
|
|
|
|
switch (imm) {
|
|
|
|
case 16:
|
|
|
|
/* zero-extend 16 bits into 64 bits */
|
|
|
|
emit(A64_UXTH(is64, dst, dst), ctx);
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
/* zero-extend 32 bits into 64 bits */
|
|
|
|
emit(A64_UXTW(is64, dst, dst), ctx);
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
/* nop */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2014-08-27 04:15:30 +00:00
|
|
|
/* dst = imm */
|
|
|
|
case BPF_ALU | BPF_MOV | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_MOV | BPF_K:
|
|
|
|
emit_a64_mov_i(is64, dst, imm, ctx);
|
|
|
|
break;
|
|
|
|
/* dst = dst OP imm */
|
|
|
|
case BPF_ALU | BPF_ADD | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_ADD | BPF_K:
|
|
|
|
emit_a64_mov_i(is64, tmp, imm, ctx);
|
|
|
|
emit(A64_ADD(is64, dst, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_SUB | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_SUB | BPF_K:
|
|
|
|
emit_a64_mov_i(is64, tmp, imm, ctx);
|
|
|
|
emit(A64_SUB(is64, dst, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_AND | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_AND | BPF_K:
|
|
|
|
emit_a64_mov_i(is64, tmp, imm, ctx);
|
|
|
|
emit(A64_AND(is64, dst, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_OR | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_OR | BPF_K:
|
|
|
|
emit_a64_mov_i(is64, tmp, imm, ctx);
|
|
|
|
emit(A64_ORR(is64, dst, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_XOR | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_XOR | BPF_K:
|
|
|
|
emit_a64_mov_i(is64, tmp, imm, ctx);
|
|
|
|
emit(A64_EOR(is64, dst, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_MUL | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_MUL | BPF_K:
|
|
|
|
emit_a64_mov_i(is64, tmp, imm, ctx);
|
|
|
|
emit(A64_MUL(is64, dst, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_DIV | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_DIV | BPF_K:
|
|
|
|
emit_a64_mov_i(is64, tmp, imm, ctx);
|
|
|
|
emit(A64_UDIV(is64, dst, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_MOD | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_MOD | BPF_K:
|
|
|
|
emit_a64_mov_i(is64, tmp2, imm, ctx);
|
|
|
|
emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
|
|
|
|
emit(A64_MUL(is64, tmp, tmp, tmp2), ctx);
|
|
|
|
emit(A64_SUB(is64, dst, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_LSH | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_LSH | BPF_K:
|
|
|
|
emit(A64_LSL(is64, dst, dst, imm), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_RSH | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_RSH | BPF_K:
|
|
|
|
emit(A64_LSR(is64, dst, dst, imm), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_ALU | BPF_ARSH | BPF_K:
|
|
|
|
case BPF_ALU64 | BPF_ARSH | BPF_K:
|
|
|
|
emit(A64_ASR(is64, dst, dst, imm), ctx);
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* JUMP off */
|
|
|
|
case BPF_JMP | BPF_JA:
|
|
|
|
jmp_offset = bpf2a64_offset(i + off, i, ctx);
|
|
|
|
check_imm26(jmp_offset);
|
|
|
|
emit(A64_B(jmp_offset), ctx);
|
|
|
|
break;
|
|
|
|
/* IF (dst COND src) JUMP off */
|
|
|
|
case BPF_JMP | BPF_JEQ | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JGT | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JGE | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JNE | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JSGT | BPF_X:
|
|
|
|
case BPF_JMP | BPF_JSGE | BPF_X:
|
|
|
|
emit(A64_CMP(1, dst, src), ctx);
|
|
|
|
emit_cond_jmp:
|
|
|
|
jmp_offset = bpf2a64_offset(i + off, i, ctx);
|
|
|
|
check_imm19(jmp_offset);
|
|
|
|
switch (BPF_OP(code)) {
|
|
|
|
case BPF_JEQ:
|
|
|
|
jmp_cond = A64_COND_EQ;
|
|
|
|
break;
|
|
|
|
case BPF_JGT:
|
|
|
|
jmp_cond = A64_COND_HI;
|
|
|
|
break;
|
|
|
|
case BPF_JGE:
|
|
|
|
jmp_cond = A64_COND_CS;
|
|
|
|
break;
|
2016-05-13 06:37:58 +00:00
|
|
|
case BPF_JSET:
|
2014-08-27 04:15:30 +00:00
|
|
|
case BPF_JNE:
|
|
|
|
jmp_cond = A64_COND_NE;
|
|
|
|
break;
|
|
|
|
case BPF_JSGT:
|
|
|
|
jmp_cond = A64_COND_GT;
|
|
|
|
break;
|
|
|
|
case BPF_JSGE:
|
|
|
|
jmp_cond = A64_COND_GE;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
emit(A64_B_(jmp_cond, jmp_offset), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_JMP | BPF_JSET | BPF_X:
|
|
|
|
emit(A64_TST(1, dst, src), ctx);
|
|
|
|
goto emit_cond_jmp;
|
|
|
|
/* IF (dst COND imm) JUMP off */
|
|
|
|
case BPF_JMP | BPF_JEQ | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JGT | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JGE | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JNE | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JSGT | BPF_K:
|
|
|
|
case BPF_JMP | BPF_JSGE | BPF_K:
|
|
|
|
emit_a64_mov_i(1, tmp, imm, ctx);
|
|
|
|
emit(A64_CMP(1, dst, tmp), ctx);
|
|
|
|
goto emit_cond_jmp;
|
|
|
|
case BPF_JMP | BPF_JSET | BPF_K:
|
|
|
|
emit_a64_mov_i(1, tmp, imm, ctx);
|
|
|
|
emit(A64_TST(1, dst, tmp), ctx);
|
|
|
|
goto emit_cond_jmp;
|
|
|
|
/* function call */
|
|
|
|
case BPF_JMP | BPF_CALL:
|
|
|
|
{
|
|
|
|
const u8 r0 = bpf2a64[BPF_REG_0];
|
|
|
|
const u64 func = (u64)__bpf_call_base + imm;
|
|
|
|
|
|
|
|
emit_a64_mov_i64(tmp, func, ctx);
|
|
|
|
emit(A64_BLR(tmp), ctx);
|
|
|
|
emit(A64_MOV(1, r0, A64_R(0)), ctx);
|
|
|
|
break;
|
|
|
|
}
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
/* tail call */
|
|
|
|
case BPF_JMP | BPF_CALL | BPF_X:
|
|
|
|
if (emit_bpf_tail_call(ctx))
|
|
|
|
return -EFAULT;
|
|
|
|
break;
|
2014-08-27 04:15:30 +00:00
|
|
|
/* function return */
|
|
|
|
case BPF_JMP | BPF_EXIT:
|
2014-12-03 08:38:01 +00:00
|
|
|
/* Optimization: when last instruction is EXIT,
|
|
|
|
simply fallthrough to epilogue. */
|
2014-08-27 04:15:30 +00:00
|
|
|
if (i == ctx->prog->len - 1)
|
|
|
|
break;
|
|
|
|
jmp_offset = epilogue_offset(ctx);
|
|
|
|
check_imm26(jmp_offset);
|
|
|
|
emit(A64_B(jmp_offset), ctx);
|
|
|
|
break;
|
|
|
|
|
2014-09-16 20:29:23 +00:00
|
|
|
/* dst = imm64 */
|
|
|
|
case BPF_LD | BPF_IMM | BPF_DW:
|
|
|
|
{
|
|
|
|
const struct bpf_insn insn1 = insn[1];
|
|
|
|
u64 imm64;
|
|
|
|
|
2015-05-08 05:39:51 +00:00
|
|
|
imm64 = (u64)insn1.imm << 32 | (u32)imm;
|
2014-09-16 20:29:23 +00:00
|
|
|
emit_a64_mov_i64(dst, imm64, ctx);
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2014-08-27 04:15:30 +00:00
|
|
|
/* LDX: dst = *(size *)(src + off) */
|
|
|
|
case BPF_LDX | BPF_MEM | BPF_W:
|
|
|
|
case BPF_LDX | BPF_MEM | BPF_H:
|
|
|
|
case BPF_LDX | BPF_MEM | BPF_B:
|
|
|
|
case BPF_LDX | BPF_MEM | BPF_DW:
|
|
|
|
emit_a64_mov_i(1, tmp, off, ctx);
|
|
|
|
switch (BPF_SIZE(code)) {
|
|
|
|
case BPF_W:
|
|
|
|
emit(A64_LDR32(dst, src, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_H:
|
|
|
|
emit(A64_LDRH(dst, src, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_B:
|
|
|
|
emit(A64_LDRB(dst, src, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_DW:
|
|
|
|
emit(A64_LDR64(dst, src, tmp), ctx);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* ST: *(size *)(dst + off) = imm */
|
|
|
|
case BPF_ST | BPF_MEM | BPF_W:
|
|
|
|
case BPF_ST | BPF_MEM | BPF_H:
|
|
|
|
case BPF_ST | BPF_MEM | BPF_B:
|
|
|
|
case BPF_ST | BPF_MEM | BPF_DW:
|
2015-11-30 22:24:07 +00:00
|
|
|
/* Load imm to a register then store it */
|
|
|
|
emit_a64_mov_i(1, tmp2, off, ctx);
|
|
|
|
emit_a64_mov_i(1, tmp, imm, ctx);
|
|
|
|
switch (BPF_SIZE(code)) {
|
|
|
|
case BPF_W:
|
|
|
|
emit(A64_STR32(tmp, dst, tmp2), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_H:
|
|
|
|
emit(A64_STRH(tmp, dst, tmp2), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_B:
|
|
|
|
emit(A64_STRB(tmp, dst, tmp2), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_DW:
|
|
|
|
emit(A64_STR64(tmp, dst, tmp2), ctx);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2014-08-27 04:15:30 +00:00
|
|
|
|
|
|
|
/* STX: *(size *)(dst + off) = src */
|
|
|
|
case BPF_STX | BPF_MEM | BPF_W:
|
|
|
|
case BPF_STX | BPF_MEM | BPF_H:
|
|
|
|
case BPF_STX | BPF_MEM | BPF_B:
|
|
|
|
case BPF_STX | BPF_MEM | BPF_DW:
|
|
|
|
emit_a64_mov_i(1, tmp, off, ctx);
|
|
|
|
switch (BPF_SIZE(code)) {
|
|
|
|
case BPF_W:
|
|
|
|
emit(A64_STR32(src, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_H:
|
|
|
|
emit(A64_STRH(src, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_B:
|
|
|
|
emit(A64_STRB(src, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
case BPF_DW:
|
|
|
|
emit(A64_STR64(src, dst, tmp), ctx);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
/* STX XADD: lock *(u32 *)(dst + off) += src */
|
|
|
|
case BPF_STX | BPF_XADD | BPF_W:
|
|
|
|
/* STX XADD: lock *(u64 *)(dst + off) += src */
|
|
|
|
case BPF_STX | BPF_XADD | BPF_DW:
|
bpf, arm64: implement jiting of BPF_XADD
This work adds BPF_XADD for BPF_W/BPF_DW to the arm64 JIT and therefore
completes JITing of all BPF instructions, meaning we can thus also remove
the 'notyet' label and do not need to fall back to the interpreter when
BPF_XADD is used in a program!
This now also brings arm64 JIT in line with x86_64, s390x, ppc64, sparc64,
where all current eBPF features are supported.
BPF_W example from test_bpf:
.u.insns_int = {
BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
BPF_ST_MEM(BPF_W, R10, -40, 0x10),
BPF_STX_XADD(BPF_W, R10, R0, -40),
BPF_LDX_MEM(BPF_W, R0, R10, -40),
BPF_EXIT_INSN(),
},
[...]
00000020: 52800247 mov w7, #0x12 // #18
00000024: 928004eb mov x11, #0xffffffffffffffd8 // #-40
00000028: d280020a mov x10, #0x10 // #16
0000002c: b82b6b2a str w10, [x25,x11]
// start of xadd mapping:
00000030: 928004ea mov x10, #0xffffffffffffffd8 // #-40
00000034: 8b19014a add x10, x10, x25
00000038: f9800151 prfm pstl1strm, [x10]
0000003c: 885f7d4b ldxr w11, [x10]
00000040: 0b07016b add w11, w11, w7
00000044: 880b7d4b stxr w11, w11, [x10]
00000048: 35ffffab cbnz w11, 0x0000003c
// end of xadd mapping:
[...]
BPF_DW example from test_bpf:
.u.insns_int = {
BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
BPF_STX_XADD(BPF_DW, R10, R0, -40),
BPF_LDX_MEM(BPF_DW, R0, R10, -40),
BPF_EXIT_INSN(),
},
[...]
00000020: 52800247 mov w7, #0x12 // #18
00000024: 928004eb mov x11, #0xffffffffffffffd8 // #-40
00000028: d280020a mov x10, #0x10 // #16
0000002c: f82b6b2a str x10, [x25,x11]
// start of xadd mapping:
00000030: 928004ea mov x10, #0xffffffffffffffd8 // #-40
00000034: 8b19014a add x10, x10, x25
00000038: f9800151 prfm pstl1strm, [x10]
0000003c: c85f7d4b ldxr x11, [x10]
00000040: 8b07016b add x11, x11, x7
00000044: c80b7d4b stxr w11, x11, [x10]
00000048: 35ffffab cbnz w11, 0x0000003c
// end of xadd mapping:
[...]
Tested on Cavium ThunderX ARMv8, test suite results after the patch:
No JIT: [ 3751.855362] test_bpf: Summary: 311 PASSED, 0 FAILED, [0/303 JIT'ed]
With JIT: [ 3573.759527] test_bpf: Summary: 311 PASSED, 0 FAILED, [303/303 JIT'ed]
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-01 00:57:20 +00:00
|
|
|
emit_a64_mov_i(1, tmp, off, ctx);
|
|
|
|
emit(A64_ADD(1, tmp, tmp, dst), ctx);
|
|
|
|
emit(A64_PRFM(tmp, PST, L1, STRM), ctx);
|
|
|
|
emit(A64_LDXR(isdw, tmp2, tmp), ctx);
|
|
|
|
emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
|
bpf, arm64: use separate register for state in stxr
Will reported that in BPF_XADD we must use a different register in stxr
instruction for the status flag due to otherwise CONSTRAINED UNPREDICTABLE
behavior per architecture. Reference manual says [1]:
If s == t, then one of the following behaviors must occur:
* The instruction is UNDEFINED.
* The instruction executes as a NOP.
* The instruction performs the store to the specified address, but
the value stored is UNKNOWN.
Thus, use a different temporary register for the status flag to fix it.
Disassembly extract from test 226/STX_XADD_DW from test_bpf.ko:
[...]
0000003c: c85f7d4b ldxr x11, [x10]
00000040: 8b07016b add x11, x11, x7
00000044: c80c7d4b stxr w12, x11, [x10]
00000048: 35ffffac cbnz w12, 0x0000003c
[...]
[1] https://static.docs.arm.com/ddi0487/b/DDI0487B_a_armv8_arm.pdf, p.6132
Fixes: 85f68fe89832 ("bpf, arm64: implement jiting of BPF_XADD")
Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-06-07 11:45:37 +00:00
|
|
|
emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx);
|
bpf, arm64: implement jiting of BPF_XADD
This work adds BPF_XADD for BPF_W/BPF_DW to the arm64 JIT and therefore
completes JITing of all BPF instructions, meaning we can thus also remove
the 'notyet' label and do not need to fall back to the interpreter when
BPF_XADD is used in a program!
This now also brings arm64 JIT in line with x86_64, s390x, ppc64, sparc64,
where all current eBPF features are supported.
BPF_W example from test_bpf:
.u.insns_int = {
BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
BPF_ST_MEM(BPF_W, R10, -40, 0x10),
BPF_STX_XADD(BPF_W, R10, R0, -40),
BPF_LDX_MEM(BPF_W, R0, R10, -40),
BPF_EXIT_INSN(),
},
[...]
00000020: 52800247 mov w7, #0x12 // #18
00000024: 928004eb mov x11, #0xffffffffffffffd8 // #-40
00000028: d280020a mov x10, #0x10 // #16
0000002c: b82b6b2a str w10, [x25,x11]
// start of xadd mapping:
00000030: 928004ea mov x10, #0xffffffffffffffd8 // #-40
00000034: 8b19014a add x10, x10, x25
00000038: f9800151 prfm pstl1strm, [x10]
0000003c: 885f7d4b ldxr w11, [x10]
00000040: 0b07016b add w11, w11, w7
00000044: 880b7d4b stxr w11, w11, [x10]
00000048: 35ffffab cbnz w11, 0x0000003c
// end of xadd mapping:
[...]
BPF_DW example from test_bpf:
.u.insns_int = {
BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
BPF_STX_XADD(BPF_DW, R10, R0, -40),
BPF_LDX_MEM(BPF_DW, R0, R10, -40),
BPF_EXIT_INSN(),
},
[...]
00000020: 52800247 mov w7, #0x12 // #18
00000024: 928004eb mov x11, #0xffffffffffffffd8 // #-40
00000028: d280020a mov x10, #0x10 // #16
0000002c: f82b6b2a str x10, [x25,x11]
// start of xadd mapping:
00000030: 928004ea mov x10, #0xffffffffffffffd8 // #-40
00000034: 8b19014a add x10, x10, x25
00000038: f9800151 prfm pstl1strm, [x10]
0000003c: c85f7d4b ldxr x11, [x10]
00000040: 8b07016b add x11, x11, x7
00000044: c80b7d4b stxr w11, x11, [x10]
00000048: 35ffffab cbnz w11, 0x0000003c
// end of xadd mapping:
[...]
Tested on Cavium ThunderX ARMv8, test suite results after the patch:
No JIT: [ 3751.855362] test_bpf: Summary: 311 PASSED, 0 FAILED, [0/303 JIT'ed]
With JIT: [ 3573.759527] test_bpf: Summary: 311 PASSED, 0 FAILED, [303/303 JIT'ed]
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-01 00:57:20 +00:00
|
|
|
jmp_offset = -3;
|
|
|
|
check_imm19(jmp_offset);
|
bpf, arm64: use separate register for state in stxr
Will reported that in BPF_XADD we must use a different register in stxr
instruction for the status flag due to otherwise CONSTRAINED UNPREDICTABLE
behavior per architecture. Reference manual says [1]:
If s == t, then one of the following behaviors must occur:
* The instruction is UNDEFINED.
* The instruction executes as a NOP.
* The instruction performs the store to the specified address, but
the value stored is UNKNOWN.
Thus, use a different temporary register for the status flag to fix it.
Disassembly extract from test 226/STX_XADD_DW from test_bpf.ko:
[...]
0000003c: c85f7d4b ldxr x11, [x10]
00000040: 8b07016b add x11, x11, x7
00000044: c80c7d4b stxr w12, x11, [x10]
00000048: 35ffffac cbnz w12, 0x0000003c
[...]
[1] https://static.docs.arm.com/ddi0487/b/DDI0487B_a_armv8_arm.pdf, p.6132
Fixes: 85f68fe89832 ("bpf, arm64: implement jiting of BPF_XADD")
Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-06-07 11:45:37 +00:00
|
|
|
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
|
bpf, arm64: implement jiting of BPF_XADD
This work adds BPF_XADD for BPF_W/BPF_DW to the arm64 JIT and therefore
completes JITing of all BPF instructions, meaning we can thus also remove
the 'notyet' label and do not need to fall back to the interpreter when
BPF_XADD is used in a program!
This now also brings arm64 JIT in line with x86_64, s390x, ppc64, sparc64,
where all current eBPF features are supported.
BPF_W example from test_bpf:
.u.insns_int = {
BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
BPF_ST_MEM(BPF_W, R10, -40, 0x10),
BPF_STX_XADD(BPF_W, R10, R0, -40),
BPF_LDX_MEM(BPF_W, R0, R10, -40),
BPF_EXIT_INSN(),
},
[...]
00000020: 52800247 mov w7, #0x12 // #18
00000024: 928004eb mov x11, #0xffffffffffffffd8 // #-40
00000028: d280020a mov x10, #0x10 // #16
0000002c: b82b6b2a str w10, [x25,x11]
// start of xadd mapping:
00000030: 928004ea mov x10, #0xffffffffffffffd8 // #-40
00000034: 8b19014a add x10, x10, x25
00000038: f9800151 prfm pstl1strm, [x10]
0000003c: 885f7d4b ldxr w11, [x10]
00000040: 0b07016b add w11, w11, w7
00000044: 880b7d4b stxr w11, w11, [x10]
00000048: 35ffffab cbnz w11, 0x0000003c
// end of xadd mapping:
[...]
BPF_DW example from test_bpf:
.u.insns_int = {
BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
BPF_STX_XADD(BPF_DW, R10, R0, -40),
BPF_LDX_MEM(BPF_DW, R0, R10, -40),
BPF_EXIT_INSN(),
},
[...]
00000020: 52800247 mov w7, #0x12 // #18
00000024: 928004eb mov x11, #0xffffffffffffffd8 // #-40
00000028: d280020a mov x10, #0x10 // #16
0000002c: f82b6b2a str x10, [x25,x11]
// start of xadd mapping:
00000030: 928004ea mov x10, #0xffffffffffffffd8 // #-40
00000034: 8b19014a add x10, x10, x25
00000038: f9800151 prfm pstl1strm, [x10]
0000003c: c85f7d4b ldxr x11, [x10]
00000040: 8b07016b add x11, x11, x7
00000044: c80b7d4b stxr w11, x11, [x10]
00000048: 35ffffab cbnz w11, 0x0000003c
// end of xadd mapping:
[...]
Tested on Cavium ThunderX ARMv8, test suite results after the patch:
No JIT: [ 3751.855362] test_bpf: Summary: 311 PASSED, 0 FAILED, [0/303 JIT'ed]
With JIT: [ 3573.759527] test_bpf: Summary: 311 PASSED, 0 FAILED, [303/303 JIT'ed]
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-01 00:57:20 +00:00
|
|
|
break;
|
2014-08-27 04:15:30 +00:00
|
|
|
|
|
|
|
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
|
|
|
|
case BPF_LD | BPF_ABS | BPF_W:
|
|
|
|
case BPF_LD | BPF_ABS | BPF_H:
|
|
|
|
case BPF_LD | BPF_ABS | BPF_B:
|
|
|
|
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
|
|
|
|
case BPF_LD | BPF_IND | BPF_W:
|
|
|
|
case BPF_LD | BPF_IND | BPF_H:
|
|
|
|
case BPF_LD | BPF_IND | BPF_B:
|
|
|
|
{
|
|
|
|
const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */
|
|
|
|
const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */
|
|
|
|
const u8 fp = bpf2a64[BPF_REG_FP];
|
|
|
|
const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */
|
|
|
|
const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */
|
|
|
|
const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */
|
|
|
|
const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */
|
|
|
|
const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */
|
|
|
|
int size;
|
|
|
|
|
|
|
|
emit(A64_MOV(1, r1, r6), ctx);
|
|
|
|
emit_a64_mov_i(0, r2, imm, ctx);
|
|
|
|
if (BPF_MODE(code) == BPF_IND)
|
|
|
|
emit(A64_ADD(0, r2, r2, src), ctx);
|
|
|
|
switch (BPF_SIZE(code)) {
|
|
|
|
case BPF_W:
|
|
|
|
size = 4;
|
|
|
|
break;
|
|
|
|
case BPF_H:
|
|
|
|
size = 2;
|
|
|
|
break;
|
|
|
|
case BPF_B:
|
|
|
|
size = 1;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
emit_a64_mov_i64(r3, size, ctx);
|
2015-11-18 08:56:02 +00:00
|
|
|
emit(A64_SUB_I(1, r4, fp, STACK_SIZE), ctx);
|
2014-08-27 04:15:30 +00:00
|
|
|
emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
|
|
|
|
emit(A64_BLR(r5), ctx);
|
|
|
|
emit(A64_MOV(1, r0, A64_R(0)), ctx);
|
|
|
|
|
|
|
|
jmp_offset = epilogue_offset(ctx);
|
|
|
|
check_imm19(jmp_offset);
|
|
|
|
emit(A64_CBZ(1, r0, jmp_offset), ctx);
|
|
|
|
emit(A64_MOV(1, r5, r0), ctx);
|
|
|
|
switch (BPF_SIZE(code)) {
|
|
|
|
case BPF_W:
|
|
|
|
emit(A64_LDR32(r0, r5, A64_ZR), ctx);
|
|
|
|
#ifndef CONFIG_CPU_BIG_ENDIAN
|
|
|
|
emit(A64_REV32(0, r0, r0), ctx);
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
case BPF_H:
|
|
|
|
emit(A64_LDRH(r0, r5, A64_ZR), ctx);
|
|
|
|
#ifndef CONFIG_CPU_BIG_ENDIAN
|
|
|
|
emit(A64_REV16(0, r0, r0), ctx);
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
case BPF_B:
|
|
|
|
emit(A64_LDRB(r0, r5, A64_ZR), ctx);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
pr_err_once("unknown opcode %02x\n", code);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int build_body(struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
const struct bpf_prog *prog = ctx->prog;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < prog->len; i++) {
|
|
|
|
const struct bpf_insn *insn = &prog->insnsi[i];
|
|
|
|
int ret;
|
|
|
|
|
2015-06-25 12:47:39 +00:00
|
|
|
ret = build_insn(insn, ctx);
|
2014-09-16 20:29:23 +00:00
|
|
|
if (ret > 0) {
|
|
|
|
i++;
|
bpf, arm64: fix jit branch offset related to ldimm64
When the instruction right before the branch destination is
a 64 bit load immediate, we currently calculate the wrong
jump offset in the ctx->offset[] array as we only account
one instruction slot for the 64 bit load immediate although
it uses two BPF instructions. Fix it up by setting the offset
into the right slot after we incremented the index.
Before (ldimm64 test 1):
[...]
00000020: 52800007 mov w7, #0x0 // #0
00000024: d2800060 mov x0, #0x3 // #3
00000028: d2800041 mov x1, #0x2 // #2
0000002c: eb01001f cmp x0, x1
00000030: 54ffff82 b.cs 0x00000020
00000034: d29fffe7 mov x7, #0xffff // #65535
00000038: f2bfffe7 movk x7, #0xffff, lsl #16
0000003c: f2dfffe7 movk x7, #0xffff, lsl #32
00000040: f2ffffe7 movk x7, #0xffff, lsl #48
00000044: d29dddc7 mov x7, #0xeeee // #61166
00000048: f2bdddc7 movk x7, #0xeeee, lsl #16
0000004c: f2ddddc7 movk x7, #0xeeee, lsl #32
00000050: f2fdddc7 movk x7, #0xeeee, lsl #48
[...]
After (ldimm64 test 1):
[...]
00000020: 52800007 mov w7, #0x0 // #0
00000024: d2800060 mov x0, #0x3 // #3
00000028: d2800041 mov x1, #0x2 // #2
0000002c: eb01001f cmp x0, x1
00000030: 540000a2 b.cs 0x00000044
00000034: d29fffe7 mov x7, #0xffff // #65535
00000038: f2bfffe7 movk x7, #0xffff, lsl #16
0000003c: f2dfffe7 movk x7, #0xffff, lsl #32
00000040: f2ffffe7 movk x7, #0xffff, lsl #48
00000044: d29dddc7 mov x7, #0xeeee // #61166
00000048: f2bdddc7 movk x7, #0xeeee, lsl #16
0000004c: f2ddddc7 movk x7, #0xeeee, lsl #32
00000050: f2fdddc7 movk x7, #0xeeee, lsl #48
[...]
Also, add a couple of test cases to make sure JITs pass
this test. Tested on Cavium ThunderX ARMv8. The added
test cases all pass after the fix.
Fixes: 8eee539ddea0 ("arm64: bpf: fix out-of-bounds read in bpf2a64_offset()")
Reported-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: Xi Wang <xi.wang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-02 18:34:54 +00:00
|
|
|
if (ctx->image == NULL)
|
|
|
|
ctx->offset[i] = ctx->idx;
|
2014-09-16 20:29:23 +00:00
|
|
|
continue;
|
|
|
|
}
|
bpf, arm64: fix jit branch offset related to ldimm64
When the instruction right before the branch destination is
a 64 bit load immediate, we currently calculate the wrong
jump offset in the ctx->offset[] array as we only account
one instruction slot for the 64 bit load immediate although
it uses two BPF instructions. Fix it up by setting the offset
into the right slot after we incremented the index.
Before (ldimm64 test 1):
[...]
00000020: 52800007 mov w7, #0x0 // #0
00000024: d2800060 mov x0, #0x3 // #3
00000028: d2800041 mov x1, #0x2 // #2
0000002c: eb01001f cmp x0, x1
00000030: 54ffff82 b.cs 0x00000020
00000034: d29fffe7 mov x7, #0xffff // #65535
00000038: f2bfffe7 movk x7, #0xffff, lsl #16
0000003c: f2dfffe7 movk x7, #0xffff, lsl #32
00000040: f2ffffe7 movk x7, #0xffff, lsl #48
00000044: d29dddc7 mov x7, #0xeeee // #61166
00000048: f2bdddc7 movk x7, #0xeeee, lsl #16
0000004c: f2ddddc7 movk x7, #0xeeee, lsl #32
00000050: f2fdddc7 movk x7, #0xeeee, lsl #48
[...]
After (ldimm64 test 1):
[...]
00000020: 52800007 mov w7, #0x0 // #0
00000024: d2800060 mov x0, #0x3 // #3
00000028: d2800041 mov x1, #0x2 // #2
0000002c: eb01001f cmp x0, x1
00000030: 540000a2 b.cs 0x00000044
00000034: d29fffe7 mov x7, #0xffff // #65535
00000038: f2bfffe7 movk x7, #0xffff, lsl #16
0000003c: f2dfffe7 movk x7, #0xffff, lsl #32
00000040: f2ffffe7 movk x7, #0xffff, lsl #48
00000044: d29dddc7 mov x7, #0xeeee // #61166
00000048: f2bdddc7 movk x7, #0xeeee, lsl #16
0000004c: f2ddddc7 movk x7, #0xeeee, lsl #32
00000050: f2fdddc7 movk x7, #0xeeee, lsl #48
[...]
Also, add a couple of test cases to make sure JITs pass
this test. Tested on Cavium ThunderX ARMv8. The added
test cases all pass after the fix.
Fixes: 8eee539ddea0 ("arm64: bpf: fix out-of-bounds read in bpf2a64_offset()")
Reported-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: Xi Wang <xi.wang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-05-02 18:34:54 +00:00
|
|
|
if (ctx->image == NULL)
|
|
|
|
ctx->offset[i] = ctx->idx;
|
2014-08-27 04:15:30 +00:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-01-14 07:33:22 +00:00
|
|
|
static int validate_code(struct jit_ctx *ctx)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ctx->idx; i++) {
|
|
|
|
u32 a64_insn = le32_to_cpu(ctx->image[i]);
|
|
|
|
|
|
|
|
if (a64_insn == AARCH64_BREAK_FAULT)
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-08-27 04:15:30 +00:00
|
|
|
static inline void bpf_flush_icache(void *start, void *end)
|
|
|
|
{
|
|
|
|
flush_icache_range((unsigned long)start, (unsigned long)end);
|
|
|
|
}
|
|
|
|
|
2016-05-13 17:08:31 +00:00
|
|
|
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
|
2014-08-27 04:15:30 +00:00
|
|
|
{
|
2016-05-13 17:08:34 +00:00
|
|
|
struct bpf_prog *tmp, *orig_prog = prog;
|
2014-09-16 07:48:50 +00:00
|
|
|
struct bpf_binary_header *header;
|
2016-05-13 17:08:34 +00:00
|
|
|
bool tmp_blinded = false;
|
2014-08-27 04:15:30 +00:00
|
|
|
struct jit_ctx ctx;
|
|
|
|
int image_size;
|
2014-09-16 07:48:50 +00:00
|
|
|
u8 *image_ptr;
|
2014-08-27 04:15:30 +00:00
|
|
|
|
|
|
|
if (!bpf_jit_enable)
|
2016-05-13 17:08:34 +00:00
|
|
|
return orig_prog;
|
|
|
|
|
|
|
|
tmp = bpf_jit_blind_constants(prog);
|
|
|
|
/* If blinding was requested and we failed during blinding,
|
|
|
|
* we must fall back to the interpreter.
|
|
|
|
*/
|
|
|
|
if (IS_ERR(tmp))
|
|
|
|
return orig_prog;
|
|
|
|
if (tmp != prog) {
|
|
|
|
tmp_blinded = true;
|
|
|
|
prog = tmp;
|
|
|
|
}
|
2014-08-27 04:15:30 +00:00
|
|
|
|
|
|
|
memset(&ctx, 0, sizeof(ctx));
|
|
|
|
ctx.prog = prog;
|
|
|
|
|
|
|
|
ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
|
2016-05-13 17:08:34 +00:00
|
|
|
if (ctx.offset == NULL) {
|
|
|
|
prog = orig_prog;
|
|
|
|
goto out;
|
|
|
|
}
|
2014-08-27 04:15:30 +00:00
|
|
|
|
|
|
|
/* 1. Initial fake pass to compute ctx->idx. */
|
|
|
|
|
2016-05-16 23:36:26 +00:00
|
|
|
/* Fake pass to fill in ctx->offset. */
|
2016-05-13 17:08:34 +00:00
|
|
|
if (build_body(&ctx)) {
|
|
|
|
prog = orig_prog;
|
|
|
|
goto out_off;
|
|
|
|
}
|
2014-08-27 04:15:30 +00:00
|
|
|
|
arm64: bpf: implement bpf_tail_call() helper
Add support for JMP_CALL_X (tail call) introduced by commit 04fd61ab36ec
("bpf: allow bpf programs to tail-call other bpf programs").
bpf_tail_call() arguments:
ctx - context pointer passed to next program
array - pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
index - index inside array that selects specific program to run
In this implementation arm64 JIT jumps into callee program after prologue,
so callee program reuses the same stack. For tail_call_cnt, we use the
callee-saved R26 (which was already saved/restored but previously unused
by JIT).
With this patch a tail call generates the following code on arm64:
if (index >= array->map.max_entries)
goto out;
34: mov x10, #0x10 // #16
38: ldr w10, [x1,x10]
3c: cmp w2, w10
40: b.ge 0x0000000000000074
if (tail_call_cnt > MAX_TAIL_CALL_CNT)
goto out;
tail_call_cnt++;
44: mov x10, #0x20 // #32
48: cmp x26, x10
4c: b.gt 0x0000000000000074
50: add x26, x26, #0x1
prog = array->ptrs[index];
if (prog == NULL)
goto out;
54: mov x10, #0x68 // #104
58: ldr x10, [x1,x10]
5c: ldr x11, [x10,x2]
60: cbz x11, 0x0000000000000074
goto *(prog->bpf_func + prologue_size);
64: mov x10, #0x20 // #32
68: ldr x10, [x11,x10]
6c: add x10, x10, #0x20
70: br x10
74:
Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-09 04:18:48 +00:00
|
|
|
if (build_prologue(&ctx)) {
|
|
|
|
prog = orig_prog;
|
|
|
|
goto out_off;
|
|
|
|
}
|
2014-12-03 08:38:01 +00:00
|
|
|
|
|
|
|
ctx.epilogue_offset = ctx.idx;
|
2014-08-27 04:15:30 +00:00
|
|
|
build_epilogue(&ctx);
|
|
|
|
|
|
|
|
/* Now we know the actual image size. */
|
|
|
|
image_size = sizeof(u32) * ctx.idx;
|
2014-09-16 07:48:50 +00:00
|
|
|
header = bpf_jit_binary_alloc(image_size, &image_ptr,
|
|
|
|
sizeof(u32), jit_fill_hole);
|
2016-05-13 17:08:34 +00:00
|
|
|
if (header == NULL) {
|
|
|
|
prog = orig_prog;
|
|
|
|
goto out_off;
|
|
|
|
}
|
2014-08-27 04:15:30 +00:00
|
|
|
|
|
|
|
/* 2. Now, the actual pass. */
|
|
|
|
|
2014-09-16 07:48:50 +00:00
|
|
|
ctx.image = (u32 *)image_ptr;
|
2014-08-27 04:15:30 +00:00
|
|
|
ctx.idx = 0;
|
2014-09-16 07:48:50 +00:00
|
|
|
|
2014-08-27 04:15:30 +00:00
|
|
|
build_prologue(&ctx);
|
|
|
|
|
2014-09-11 09:36:48 +00:00
|
|
|
if (build_body(&ctx)) {
|
2014-09-16 07:48:50 +00:00
|
|
|
bpf_jit_binary_free(header);
|
2016-05-13 17:08:34 +00:00
|
|
|
prog = orig_prog;
|
|
|
|
goto out_off;
|
2014-09-11 09:36:48 +00:00
|
|
|
}
|
2014-08-27 04:15:30 +00:00
|
|
|
|
|
|
|
build_epilogue(&ctx);
|
|
|
|
|
2016-01-14 07:33:22 +00:00
|
|
|
/* 3. Extra pass to validate JITed code. */
|
|
|
|
if (validate_code(&ctx)) {
|
|
|
|
bpf_jit_binary_free(header);
|
2016-05-13 17:08:34 +00:00
|
|
|
prog = orig_prog;
|
|
|
|
goto out_off;
|
2016-01-14 07:33:22 +00:00
|
|
|
}
|
|
|
|
|
2014-08-27 04:15:30 +00:00
|
|
|
/* And we're done. */
|
|
|
|
if (bpf_jit_enable > 1)
|
|
|
|
bpf_jit_dump(prog->len, image_size, 2, ctx.image);
|
|
|
|
|
2015-11-14 00:16:18 +00:00
|
|
|
bpf_flush_icache(header, ctx.image + ctx.idx);
|
2014-09-16 07:48:50 +00:00
|
|
|
|
2017-02-21 15:09:34 +00:00
|
|
|
bpf_jit_binary_lock_ro(header);
|
2014-08-27 04:15:30 +00:00
|
|
|
prog->bpf_func = (void *)ctx.image;
|
2015-09-29 23:41:50 +00:00
|
|
|
prog->jited = 1;
|
2016-05-13 17:08:34 +00:00
|
|
|
|
|
|
|
out_off:
|
2014-08-27 04:15:30 +00:00
|
|
|
kfree(ctx.offset);
|
2016-05-13 17:08:34 +00:00
|
|
|
out:
|
|
|
|
if (tmp_blinded)
|
|
|
|
bpf_jit_prog_release_other(prog, prog == orig_prog ?
|
|
|
|
tmp : orig_prog);
|
2016-05-13 17:08:31 +00:00
|
|
|
return prog;
|
2014-08-27 04:15:30 +00:00
|
|
|
}
|