From d449ceb11b3884770c06e71cf15edc9f3b4c9b05 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:31:31 +0100 Subject: ARM: net: bpf: enumerate the JIT scratch stack layout Enumerate the contents of the JIT scratch stack layout used for storing some of the JITs 64-bit registers, tail call counter and AX register. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 59 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 17 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index f6a62ae44a65..f2e6ffe57788 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -72,7 +72,38 @@ #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) -#define STACK_OFFSET(k) (k) +enum { + /* Stack layout - these are offsets from (top of stack - 4) */ + BPF_R2_HI, + BPF_R2_LO, + BPF_R3_HI, + BPF_R3_LO, + BPF_R4_HI, + BPF_R4_LO, + BPF_R5_HI, + BPF_R5_LO, + BPF_R7_HI, + BPF_R7_LO, + BPF_R8_HI, + BPF_R8_LO, + BPF_R9_HI, + BPF_R9_LO, + BPF_FP_HI, + BPF_FP_LO, + BPF_TC_HI, + BPF_TC_LO, + BPF_AX_HI, + BPF_AX_LO, + /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, + * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, + * BPF_REG_FP and Tail call counts. + */ + BPF_JIT_SCRATCH_REGS, +}; + +#define STACK_OFFSET(k) ((k) * 4) +#define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4) + #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ @@ -100,29 +131,29 @@ static const u8 bpf2a32[][2] = { /* arguments from eBPF program to in-kernel function */ [BPF_REG_1] = {ARM_R3, ARM_R2}, /* Stored on stack scratch space */ - [BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)}, - [BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)}, - [BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)}, - [BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)}, + [BPF_REG_2] = {STACK_OFFSET(BPF_R2_HI), STACK_OFFSET(BPF_R2_LO)}, + [BPF_REG_3] = {STACK_OFFSET(BPF_R3_HI), STACK_OFFSET(BPF_R3_LO)}, + [BPF_REG_4] = {STACK_OFFSET(BPF_R4_HI), STACK_OFFSET(BPF_R4_LO)}, + [BPF_REG_5] = {STACK_OFFSET(BPF_R5_HI), STACK_OFFSET(BPF_R5_LO)}, /* callee saved registers that in-kernel function will preserve */ [BPF_REG_6] = {ARM_R5, ARM_R4}, /* Stored on stack scratch space */ - [BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)}, - [BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)}, - [BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)}, + [BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)}, + [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)}, + [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, /* Read only Frame Pointer to access Stack */ - [BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)}, + [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)}, /* Temporary Register for internal BPF JIT, can be used * for constant blindings and others. */ [TMP_REG_1] = {ARM_R7, ARM_R6}, [TMP_REG_2] = {ARM_R10, ARM_R8}, /* Tail call count. Stored on stack scratch space. */ - [TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)}, + [TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)}, /* temporary register for blinding constants. * Stored on stack scratch space. */ - [BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)}, + [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)}, }; #define dst_lo dst[1] @@ -227,12 +258,6 @@ static void jit_fill_hole(void *area, unsigned int size) #define STACK_ALIGNMENT 4 #endif -/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, - * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, - * BPF_REG_FP and Tail call counts. - */ -#define SCRATCH_SIZE 80 - /* total stack size used in JITed code */ #define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE) #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) -- cgit v1.2.3 From a8ef95a034233190b1dd73ff03472ff0f7f4fbdf Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:31:36 +0100 Subject: ARM: net: bpf: provide load/store ops with negative immediates Provide a set of load/store opcode generators that work with negative immediates as well as positive ones. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 28 ++++++++++++++++++++++++++++ arch/arm/net/bpf_jit_32.h | 35 +++++++++++++---------------------- 2 files changed, 41 insertions(+), 22 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index f2e6ffe57788..c81da1a50834 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -239,6 +239,34 @@ static int16_t imm8m(u32 x) return -1; } +static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12) +{ + op |= rt << 12 | rn << 16; + if (imm12 >= 0) + op |= ARM_INST_LDST__U; + else + imm12 = -imm12; + return op | (imm12 & 0xfff); +} + +static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) +{ + op |= rt << 12 | rn << 16; + if (imm8 >= 0) + op |= ARM_INST_LDST__U; + else + imm8 = -imm8; + return op | (imm8 & 0xf0) << 4 | (imm8 & 0x0f); +} + +#define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off) +#define ARM_LDRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off) +#define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) + +#define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) +#define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) +#define ARM_STRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off) + /* * Initializes the JIT space with undefined instructions. */ diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index d5cf5f6208aa..c55bc39d3e22 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -77,11 +77,12 @@ #define ARM_INST_EOR_R 0x00200000 #define ARM_INST_EOR_I 0x02200000 -#define ARM_INST_LDRB_I 0x05d00000 +#define ARM_INST_LDST__U 0x00800000 +#define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 -#define ARM_INST_LDRH_I 0x01d000b0 +#define ARM_INST_LDRH_I 0x015000b0 #define ARM_INST_LDRH_R 0x019000b0 -#define ARM_INST_LDR_I 0x05900000 +#define ARM_INST_LDR_I 0x05100000 #define ARM_INST_LDR_R 0x07900000 #define ARM_INST_LDM 0x08900000 @@ -124,9 +125,9 @@ #define ARM_INST_SBC_R 0x00c00000 #define ARM_INST_SBCS_R 0x00d00000 -#define ARM_INST_STR_I 0x05800000 -#define ARM_INST_STRB_I 0x05c00000 -#define ARM_INST_STRH_I 0x01c000b0 +#define ARM_INST_STR_I 0x05000000 +#define ARM_INST_STRB_I 0x05400000 +#define ARM_INST_STRH_I 0x014000b0 #define ARM_INST_TST_R 0x01100000 #define ARM_INST_TST_I 0x03100000 @@ -183,17 +184,14 @@ #define ARM_EOR_R(rd, rn, rm) _AL3_R(ARM_INST_EOR, rd, rn, rm) #define ARM_EOR_I(rd, rn, imm) _AL3_I(ARM_INST_EOR, rd, rn, imm) -#define ARM_LDR_I(rt, rn, off) (ARM_INST_LDR_I | (rt) << 12 | (rn) << 16 \ - | ((off) & 0xfff)) -#define ARM_LDR_R(rt, rn, rm) (ARM_INST_LDR_R | (rt) << 12 | (rn) << 16 \ +#define ARM_LDR_R(rt, rn, rm) (ARM_INST_LDR_R | ARM_INST_LDST__U \ + | (rt) << 12 | (rn) << 16 \ | (rm)) -#define ARM_LDRB_I(rt, rn, off) (ARM_INST_LDRB_I | (rt) << 12 | (rn) << 16 \ - | (off)) -#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | (rt) << 12 | (rn) << 16 \ +#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | ARM_INST_LDST__U \ + | (rt) << 12 | (rn) << 16 \ | (rm)) -#define ARM_LDRH_I(rt, rn, off) (ARM_INST_LDRH_I | (rt) << 12 | (rn) << 16 \ - | (((off) & 0xf0) << 4) | ((off) & 0xf)) -#define ARM_LDRH_R(rt, rn, rm) (ARM_INST_LDRH_R | (rt) << 12 | (rn) << 16 \ +#define ARM_LDRH_R(rt, rn, rm) (ARM_INST_LDRH_R | ARM_INST_LDST__U \ + | (rt) << 12 | (rn) << 16 \ | (rm)) #define ARM_LDM(rn, regs) (ARM_INST_LDM | (rn) << 16 | (regs)) @@ -254,13 +252,6 @@ #define ARM_SUBS_I(rd, rn, imm) _AL3_I(ARM_INST_SUBS, rd, rn, imm) #define ARM_SBC_I(rd, rn, imm) _AL3_I(ARM_INST_SBC, rd, rn, imm) -#define ARM_STR_I(rt, rn, off) (ARM_INST_STR_I | (rt) << 12 | (rn) << 16 \ - | ((off) & 0xfff)) -#define ARM_STRH_I(rt, rn, off) (ARM_INST_STRH_I | (rt) << 12 | (rn) << 16 \ - | (((off) & 0xf0) << 4) | ((off) & 0xf)) -#define ARM_STRB_I(rt, rn, off) (ARM_INST_STRB_I | (rt) << 12 | (rn) << 16 \ - | (((off) & 0xf0) << 4) | ((off) & 0xf)) - #define ARM_TST_R(rn, rm) _AL3_R(ARM_INST_TST, 0, rn, rm) #define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm) -- cgit v1.2.3 From 1c35ba122d4a4eb32c3f8d63a445c1ebfd66d7bc Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:31:41 +0100 Subject: ARM: net: bpf: use negative numbers for stacked registers Use negative numbers for eBPF registers that live on the stack. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 200 +++++++++++++++++++++++----------------------- 1 file changed, 102 insertions(+), 98 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index c81da1a50834..69bf7ab18bf9 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -101,7 +101,11 @@ enum { BPF_JIT_SCRATCH_REGS, }; -#define STACK_OFFSET(k) ((k) * 4) +/* + * Negative "register" values indicate the register is stored on the stack + * and are the offset from the top of the eBPF JIT scratch space. + */ +#define STACK_OFFSET(k) (-4 - (k) * 4) #define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4) #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ @@ -125,7 +129,7 @@ enum { * scratch memory space and we have to build eBPF 64 bit register from those. * */ -static const u8 bpf2a32[][2] = { +static const s8 bpf2a32[][2] = { /* return value from in-kernel function, and exit value from eBPF */ [BPF_REG_0] = {ARM_R1, ARM_R0}, /* arguments from eBPF program to in-kernel function */ @@ -291,7 +295,7 @@ static void jit_fill_hole(void *area, unsigned int size) #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) /* Get the offset of eBPF REGISTERs stored on scratch space. */ -#define STACK_VAR(off) (STACK_SIZE - off) +#define STACK_VAR(off) (STACK_SIZE + (off)) #if __LINUX_ARM_ARCH__ < 7 @@ -408,7 +412,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) { - const u8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp = bpf2a32[TMP_REG_1]; #if __LINUX_ARM_ARCH__ == 7 if (elf_hwcap & HWCAP_IDIVA) { @@ -470,10 +474,10 @@ static inline bool is_on_stack(u8 bpf_reg) return false; } -static inline void emit_a32_mov_i(const u8 dst, const u32 val, +static inline void emit_a32_mov_i(const s8 dst, const u32 val, bool dstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp = bpf2a32[TMP_REG_1]; if (dstk) { emit_mov_i(tmp[1], val, ctx); @@ -484,7 +488,7 @@ static inline void emit_a32_mov_i(const u8 dst, const u32 val, } /* Sign extended move */ -static inline void emit_a32_mov_i64(const bool is64, const u8 dst[], +static inline void emit_a32_mov_i64(const bool is64, const s8 dst[], const u32 val, bool dstk, struct jit_ctx *ctx) { u32 hi = 0; @@ -574,12 +578,12 @@ static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, /* ALU operation (32 bit) * dst = dst (op) src */ -static inline void emit_a32_alu_r(const u8 dst, const u8 src, +static inline void emit_a32_alu_r(const s8 dst, const s8 src, bool dstk, bool sstk, struct jit_ctx *ctx, const bool is64, const bool hi, const u8 op) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rn = sstk ? tmp[1] : src; + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rn = sstk ? tmp[1] : src; if (sstk) emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx); @@ -595,8 +599,8 @@ static inline void emit_a32_alu_r(const u8 dst, const u8 src, } /* ALU operation (64 bit) */ -static inline void emit_a32_alu_r64(const bool is64, const u8 dst[], - const u8 src[], bool dstk, +static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], + const s8 src[], bool dstk, bool sstk, struct jit_ctx *ctx, const u8 op) { emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op); @@ -607,11 +611,11 @@ static inline void emit_a32_alu_r64(const bool is64, const u8 dst[], } /* dst = imm (4 bytes)*/ -static inline void emit_a32_mov_r(const u8 dst, const u8 src, +static inline void emit_a32_mov_r(const s8 dst, const s8 src, bool dstk, bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rt = sstk ? tmp[0] : src; + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rt = sstk ? tmp[0] : src; if (sstk) emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx); @@ -622,8 +626,8 @@ static inline void emit_a32_mov_r(const u8 dst, const u8 src, } /* dst = src */ -static inline void emit_a32_mov_r64(const bool is64, const u8 dst[], - const u8 src[], bool dstk, +static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], + const s8 src[], bool dstk, bool sstk, struct jit_ctx *ctx) { emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx); if (is64) { @@ -636,10 +640,10 @@ static inline void emit_a32_mov_r64(const bool is64, const u8 dst[], } /* Shift operations */ -static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk, +static inline void emit_a32_alu_i(const s8 dst, const u32 val, bool dstk, struct jit_ctx *ctx, const u8 op) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rd = dstk ? tmp[0] : dst; + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rd = dstk ? tmp[0] : dst; if (dstk) emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); @@ -662,11 +666,11 @@ static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk, } /* dst = ~dst (64 bit) */ -static inline void emit_a32_neg64(const u8 dst[], bool dstk, +static inline void emit_a32_neg64(const s8 dst[], bool dstk, struct jit_ctx *ctx){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rd = dstk ? tmp[1] : dst[1]; - u8 rm = dstk ? tmp[0] : dst[0]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rd = dstk ? tmp[1] : dst[1]; + s8 rm = dstk ? tmp[0] : dst[0]; /* Setup Operand */ if (dstk) { @@ -685,15 +689,15 @@ static inline void emit_a32_neg64(const u8 dst[], bool dstk, } /* dst = dst << src */ -static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, +static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], bool dstk, bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup Operands */ - u8 rt = sstk ? tmp2[1] : src_lo; - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = sstk ? tmp2[1] : src_lo; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; if (sstk) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); @@ -720,14 +724,14 @@ static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, } /* dst = dst >> src (signed)*/ -static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, +static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], bool dstk, bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup Operands */ - u8 rt = sstk ? tmp2[1] : src_lo; - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = sstk ? tmp2[1] : src_lo; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; if (sstk) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); @@ -754,14 +758,14 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, } /* dst = dst >> src */ -static inline void emit_a32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, +static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], bool dstk, bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup Operands */ - u8 rt = sstk ? tmp2[1] : src_lo; - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = sstk ? tmp2[1] : src_lo; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; if (sstk) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); @@ -787,13 +791,13 @@ static inline void emit_a32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, } /* dst = dst << val */ -static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk, +static inline void emit_a32_lsh_i64(const s8 dst[], bool dstk, const u32 val, struct jit_ctx *ctx){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands */ - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; if (dstk) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); @@ -820,13 +824,13 @@ static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk, } /* dst = dst >> val */ -static inline void emit_a32_rsh_i64(const u8 dst[], bool dstk, +static inline void emit_a32_rsh_i64(const s8 dst[], bool dstk, const u32 val, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands */ - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; if (dstk) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); @@ -853,13 +857,13 @@ static inline void emit_a32_rsh_i64(const u8 dst[], bool dstk, } /* dst = dst >> val (signed) */ -static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk, +static inline void emit_a32_arsh_i64(const s8 dst[], bool dstk, const u32 val, struct jit_ctx *ctx){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands */ - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; if (dstk) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); @@ -885,15 +889,15 @@ static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk, } } -static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk, +static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], bool dstk, bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands for multiplication */ - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; - u8 rt = sstk ? tmp2[1] : src_lo; - u8 rn = sstk ? tmp2[0] : src_hi; + s8 rd = dstk ? tmp[1] : dst_lo; + s8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = sstk ? tmp2[1] : src_lo; + s8 rn = sstk ? tmp2[0] : src_hi; if (dstk) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); @@ -920,10 +924,10 @@ static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk, } /* *(size *)(dst + off) = src */ -static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, +static inline void emit_str_r(const s8 dst, const s8 src, bool dstk, const s32 off, struct jit_ctx *ctx, const u8 sz){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rd = dstk ? tmp[1] : dst; + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rd = dstk ? tmp[1] : dst; if (dstk) emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); @@ -949,11 +953,11 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, } /* dst = *(size*)(src + off) */ -static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, +static inline void emit_ldx_r(const s8 dst[], const s8 src, bool dstk, s32 off, struct jit_ctx *ctx, const u8 sz){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *rd = dstk ? tmp : dst; - u8 rm = src; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rd = dstk ? tmp : dst; + s8 rm = src; s32 off_max; if (sz == BPF_H) @@ -1034,11 +1038,11 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) { /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ - const u8 *r2 = bpf2a32[BPF_REG_2]; - const u8 *r3 = bpf2a32[BPF_REG_3]; - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; - const u8 *tcc = bpf2a32[TCALL_CNT]; + const s8 *r2 = bpf2a32[BPF_REG_2]; + const s8 *r3 = bpf2a32[BPF_REG_3]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tcc = bpf2a32[TCALL_CNT]; const int idx0 = ctx->idx; #define cur_offset (ctx->idx - idx0) #define jmp_offset (out_offset - (cur_offset) - 2) @@ -1112,7 +1116,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx) { #if __LINUX_ARM_ARCH__ < 6 - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx); @@ -1127,7 +1131,7 @@ static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx) static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) { #if __LINUX_ARM_ARCH__ < 6 - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx); @@ -1147,10 +1151,10 @@ static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) } // push the scratch stack register on top of the stack -static inline void emit_push_r64(const u8 src[], const u8 shift, +static inline void emit_push_r64(const s8 src[], const u8 shift, struct jit_ctx *ctx) { - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; u16 reg_set = 0; emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx); @@ -1162,13 +1166,13 @@ static inline void emit_push_r64(const u8 src[], const u8 shift, static void build_prologue(struct jit_ctx *ctx) { - const u8 r0 = bpf2a32[BPF_REG_0][1]; - const u8 r2 = bpf2a32[BPF_REG_1][1]; - const u8 r3 = bpf2a32[BPF_REG_1][0]; - const u8 r4 = bpf2a32[BPF_REG_6][1]; - const u8 fplo = bpf2a32[BPF_REG_FP][1]; - const u8 fphi = bpf2a32[BPF_REG_FP][0]; - const u8 *tcc = bpf2a32[TCALL_CNT]; + const s8 r0 = bpf2a32[BPF_REG_0][1]; + const s8 r2 = bpf2a32[BPF_REG_1][1]; + const s8 r3 = bpf2a32[BPF_REG_1][0]; + const s8 r4 = bpf2a32[BPF_REG_6][1]; + const s8 fplo = bpf2a32[BPF_REG_FP][1]; + const s8 fphi = bpf2a32[BPF_REG_FP][0]; + const s8 *tcc = bpf2a32[TCALL_CNT]; /* Save callee saved registers. */ #ifdef CONFIG_FRAME_POINTER @@ -1231,17 +1235,17 @@ static void build_epilogue(struct jit_ctx *ctx) static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 code = insn->code; - const u8 *dst = bpf2a32[insn->dst_reg]; - const u8 *src = bpf2a32[insn->src_reg]; - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *dst = bpf2a32[insn->dst_reg]; + const s8 *src = bpf2a32[insn->src_reg]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; const s16 off = insn->off; const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; const bool is64 = BPF_CLASS(code) == BPF_ALU64; const bool dstk = is_on_stack(insn->dst_reg); const bool sstk = is_on_stack(insn->src_reg); - u8 rd, rt, rm, rn; + s8 rd, rt, rm, rn; s32 jmp_offset; #define check_imm(bits, imm) do { \ @@ -1672,12 +1676,12 @@ go_jmp: /* function call */ case BPF_JMP | BPF_CALL: { - const u8 *r0 = bpf2a32[BPF_REG_0]; - const u8 *r1 = bpf2a32[BPF_REG_1]; - const u8 *r2 = bpf2a32[BPF_REG_2]; - const u8 *r3 = bpf2a32[BPF_REG_3]; - const u8 *r4 = bpf2a32[BPF_REG_4]; - const u8 *r5 = bpf2a32[BPF_REG_5]; + const s8 *r0 = bpf2a32[BPF_REG_0]; + const s8 *r1 = bpf2a32[BPF_REG_1]; + const s8 *r2 = bpf2a32[BPF_REG_2]; + const s8 *r3 = bpf2a32[BPF_REG_3]; + const s8 *r4 = bpf2a32[BPF_REG_4]; + const s8 *r5 = bpf2a32[BPF_REG_5]; const u32 func = (u32)__bpf_call_base + (u32)imm; emit_a32_mov_r64(true, r0, r1, false, false, ctx); -- cgit v1.2.3 From 47b9c3bf416d80515901469f05aef2870b37c010 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:31:47 +0100 Subject: ARM: net: bpf: remove is_on_stack() and sstk/dstk The decision about whether a BPF register is on the stack or in a CPU register is detected at the top BPF insn processing level, and then percolated throughout the remainder of the code. Since we now use negative register values to represent stacked registers, we can detect where a BPF register is stored without restoring to carrying this additional metadata through all code paths. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 336 ++++++++++++++++++++++------------------------ 1 file changed, 160 insertions(+), 176 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 69bf7ab18bf9..e81401aca2df 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -459,27 +459,18 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); } -/* Checks whether BPF register is on scratch stack space or not. */ -static inline bool is_on_stack(u8 bpf_reg) +/* Is the translated BPF register on stack? */ +static bool is_stacked(s8 reg) { - static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5, - BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT, - BPF_REG_2, BPF_REG_FP}; - int i, reg_len = sizeof(stack_regs); - - for (i = 0 ; i < reg_len ; i++) { - if (bpf_reg == stack_regs[i]) - return true; - } - return false; + return reg < 0; } static inline void emit_a32_mov_i(const s8 dst, const u32 val, - bool dstk, struct jit_ctx *ctx) + struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; - if (dstk) { + if (is_stacked(dst)) { emit_mov_i(tmp[1], val, ctx); emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx); } else { @@ -489,14 +480,13 @@ static inline void emit_a32_mov_i(const s8 dst, const u32 val, /* Sign extended move */ static inline void emit_a32_mov_i64(const bool is64, const s8 dst[], - const u32 val, bool dstk, - struct jit_ctx *ctx) { + const u32 val, struct jit_ctx *ctx) { u32 hi = 0; if (is64 && (val & (1<<31))) hi = (u32)~0; - emit_a32_mov_i(dst_lo, val, dstk, ctx); - emit_a32_mov_i(dst_hi, hi, dstk, ctx); + emit_a32_mov_i(dst_lo, val, ctx); + emit_a32_mov_i(dst_hi, hi, ctx); } static inline void emit_a32_add_r(const u8 dst, const u8 src, @@ -579,17 +569,16 @@ static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, * dst = dst (op) src */ static inline void emit_a32_alu_r(const s8 dst, const s8 src, - bool dstk, bool sstk, struct jit_ctx *ctx, const bool is64, const bool hi, const u8 op) { const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rn = sstk ? tmp[1] : src; + s8 rn = is_stacked(src) ? tmp[1] : src; - if (sstk) + if (is_stacked(src)) emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx); /* ALU operation */ - if (dstk) { + if (is_stacked(dst)) { emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); emit_alu_r(tmp[0], rn, is64, hi, op, ctx); emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); @@ -600,26 +589,24 @@ static inline void emit_a32_alu_r(const s8 dst, const s8 src, /* ALU operation (64 bit) */ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], - const s8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx, + const s8 src[], struct jit_ctx *ctx, const u8 op) { - emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op); + emit_a32_alu_r(dst_lo, src_lo, ctx, is64, false, op); if (is64) - emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op); + emit_a32_alu_r(dst_hi, src_hi, ctx, is64, true, op); else - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_a32_mov_i(dst_hi, 0, ctx); } /* dst = imm (4 bytes)*/ static inline void emit_a32_mov_r(const s8 dst, const s8 src, - bool dstk, bool sstk, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rt = sstk ? tmp[0] : src; + s8 rt = is_stacked(src) ? tmp[0] : src; - if (sstk) + if (is_stacked(src)) emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx); - if (dstk) + if (is_stacked(dst)) emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx); else emit(ARM_MOV_R(dst, rt), ctx); @@ -627,25 +614,25 @@ static inline void emit_a32_mov_r(const s8 dst, const s8 src, /* dst = src */ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], - const s8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { - emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx); + const s8 src[], + struct jit_ctx *ctx) { + emit_a32_mov_r(dst_lo, src_lo, ctx); if (is64) { /* complete 8 byte move */ - emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx); + emit_a32_mov_r(dst_hi, src_hi, ctx); } else { /* Zero out high 4 bytes */ - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_a32_mov_i(dst_hi, 0, ctx); } } /* Shift operations */ -static inline void emit_a32_alu_i(const s8 dst, const u32 val, bool dstk, +static inline void emit_a32_alu_i(const s8 dst, const u32 val, struct jit_ctx *ctx, const u8 op) { const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd = dstk ? tmp[0] : dst; + s8 rd = is_stacked(dst) ? tmp[0] : dst; - if (dstk) + if (is_stacked(dst)) emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); /* Do shift operation */ @@ -661,19 +648,19 @@ static inline void emit_a32_alu_i(const s8 dst, const u32 val, bool dstk, break; } - if (dstk) + if (is_stacked(dst)) emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); } /* dst = ~dst (64 bit) */ -static inline void emit_a32_neg64(const s8 dst[], bool dstk, +static inline void emit_a32_neg64(const s8 dst[], struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd = dstk ? tmp[1] : dst[1]; - s8 rm = dstk ? tmp[0] : dst[0]; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst[1]; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst[0]; /* Setup Operand */ - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -682,26 +669,26 @@ static inline void emit_a32_neg64(const s8 dst[], bool dstk, emit(ARM_RSBS_I(rd, rd, 0), ctx); emit(ARM_RSC_I(rm, rm, 0), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } } /* dst = dst << src */ -static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { +static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup Operands */ - s8 rt = sstk ? tmp2[1] : src_lo; - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - if (sstk) + if (is_stacked(src_lo)) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -714,7 +701,7 @@ static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], bool dstk, emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); } else { @@ -724,18 +711,18 @@ static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], bool dstk, } /* dst = dst >> src (signed)*/ -static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { +static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup Operands */ - s8 rt = sstk ? tmp2[1] : src_lo; - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - if (sstk) + if (is_stacked(src_lo)) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -748,7 +735,7 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], bool dstk, _emit(ARM_COND_MI, ARM_B(0), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); } else { @@ -758,18 +745,18 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], bool dstk, } /* dst = dst >> src */ -static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { +static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup Operands */ - s8 rt = sstk ? tmp2[1] : src_lo; - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; + s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - if (sstk) + if (is_stacked(src_lo)) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -781,7 +768,7 @@ static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], bool dstk, emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); } else { @@ -791,15 +778,15 @@ static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], bool dstk, } /* dst = dst << val */ -static inline void emit_a32_lsh_i64(const s8 dst[], bool dstk, - const u32 val, struct jit_ctx *ctx){ +static inline void emit_a32_lsh_i64(const s8 dst[], + const u32 val, struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands */ - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -817,22 +804,22 @@ static inline void emit_a32_lsh_i64(const s8 dst[], bool dstk, emit(ARM_EOR_R(rd, rd, rd), ctx); } - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } } /* dst = dst >> val */ -static inline void emit_a32_rsh_i64(const s8 dst[], bool dstk, +static inline void emit_a32_rsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands */ - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -850,22 +837,22 @@ static inline void emit_a32_rsh_i64(const s8 dst[], bool dstk, emit(ARM_MOV_I(rm, 0), ctx); } - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } } /* dst = dst >> val (signed) */ -static inline void emit_a32_arsh_i64(const s8 dst[], bool dstk, +static inline void emit_a32_arsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands */ - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -883,27 +870,27 @@ static inline void emit_a32_arsh_i64(const s8 dst[], bool dstk, emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); } - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } } -static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { +static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; /* Setup operands for multiplication */ - s8 rd = dstk ? tmp[1] : dst_lo; - s8 rm = dstk ? tmp[0] : dst_hi; - s8 rt = sstk ? tmp2[1] : src_lo; - s8 rn = sstk ? tmp2[0] : src_hi; + s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; + s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; + s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; + s8 rn = is_stacked(src_lo) ? tmp2[0] : src_hi; - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } - if (sstk) { + if (is_stacked(src_lo)) { emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx); } @@ -915,7 +902,7 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], bool dstk, emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); emit(ARM_ADD_R(rm, ARM_LR, rm), ctx); - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); } else { @@ -924,15 +911,15 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], bool dstk, } /* *(size *)(dst + off) = src */ -static inline void emit_str_r(const s8 dst, const s8 src, bool dstk, +static inline void emit_str_r(const s8 dst, const s8 src, const s32 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd = dstk ? tmp[1] : dst; + s8 rd = is_stacked(dst) ? tmp[1] : dst; - if (dstk) + if (is_stacked(dst)) emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); if (off) { - emit_a32_mov_i(tmp[0], off, false, ctx); + emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx); rd = tmp[0]; } @@ -953,10 +940,10 @@ static inline void emit_str_r(const s8 dst, const s8 src, bool dstk, } /* dst = *(size*)(src + off) */ -static inline void emit_ldx_r(const s8 dst[], const s8 src, bool dstk, +static inline void emit_ldx_r(const s8 dst[], const s8 src, s32 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; - const s8 *rd = dstk ? tmp : dst; + const s8 *rd = is_stacked(dst_lo) ? tmp : dst; s8 rm = src; s32 off_max; @@ -966,7 +953,7 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, bool dstk, off_max = 0xfff; if (off < 0 || off > off_max) { - emit_a32_mov_i(tmp[0], off, false, ctx); + emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; off = 0; @@ -978,17 +965,17 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, bool dstk, case BPF_B: /* Load a Byte */ emit(ARM_LDRB_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, dstk, ctx); + emit_a32_mov_i(dst[0], 0, ctx); break; case BPF_H: /* Load a HalfWord */ emit(ARM_LDRH_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, dstk, ctx); + emit_a32_mov_i(dst[0], 0, ctx); break; case BPF_W: /* Load a Word */ emit(ARM_LDR_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, dstk, ctx); + emit_a32_mov_i(dst[0], 0, ctx); break; case BPF_DW: /* Load a Double Word */ @@ -996,10 +983,10 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, bool dstk, emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); break; } - if (dstk) - emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx); - if (dstk && sz == BPF_DW) - emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx); + if (is_stacked(dst_lo)) + emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst_lo)), ctx); + if (is_stacked(dst_lo) && sz == BPF_DW) + emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst_hi)), ctx); } /* Arithmatic Operation */ @@ -1053,7 +1040,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ off = offsetof(struct bpf_array, map.max_entries); /* array->map.max_entries */ - emit_a32_mov_i(tmp[1], off, false, ctx); + emit_a32_mov_i(tmp[1], off, ctx); emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); /* index is 32-bit for arrays */ @@ -1083,7 +1070,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) * goto out; */ off = offsetof(struct bpf_array, ptrs); - emit_a32_mov_i(tmp[1], off, false, ctx); + emit_a32_mov_i(tmp[1], off, ctx); emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx); emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); @@ -1094,7 +1081,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) /* goto *(prog->bpf_func + prologue_size); */ off = offsetof(struct bpf_prog, bpf_func); - emit_a32_mov_i(tmp2[1], off, false, ctx); + emit_a32_mov_i(tmp2[1], off, ctx); emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); emit_bx_r(tmp[1], ctx); @@ -1193,8 +1180,8 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); /* Set up BPF prog stack base register */ - emit_a32_mov_r(fplo, ARM_IP, true, false, ctx); - emit_a32_mov_i(fphi, 0, true, ctx); + emit_a32_mov_r(fplo, ARM_IP, ctx); + emit_a32_mov_i(fphi, 0, ctx); /* mov r4, 0 */ emit(ARM_MOV_I(r4, 0), ctx); @@ -1243,8 +1230,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; const bool is64 = BPF_CLASS(code) == BPF_ALU64; - const bool dstk = is_on_stack(insn->dst_reg); - const bool sstk = is_on_stack(insn->src_reg); s8 rd, rt, rm, rn; s32 jmp_offset; @@ -1268,11 +1253,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_MOV | BPF_X: switch (BPF_SRC(code)) { case BPF_X: - emit_a32_mov_r64(is64, dst, src, dstk, sstk, ctx); + emit_a32_mov_r64(is64, dst, src, ctx); break; case BPF_K: /* Sign-extend immediate value to destination reg */ - emit_a32_mov_i64(is64, dst, imm, dstk, ctx); + emit_a32_mov_i64(is64, dst, imm, ctx); break; } break; @@ -1312,8 +1297,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_XOR | BPF_X: switch (BPF_SRC(code)) { case BPF_X: - emit_a32_alu_r64(is64, dst, src, dstk, sstk, - ctx, BPF_OP(code)); + emit_a32_alu_r64(is64, dst, src, ctx, BPF_OP(code)); break; case BPF_K: /* Move immediate value to the temporary register @@ -1322,9 +1306,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) * value into temporary reg and then it would be * safe to do the operation on it. */ - emit_a32_mov_i64(is64, tmp2, imm, false, ctx); - emit_a32_alu_r64(is64, dst, tmp2, dstk, false, - ctx, BPF_OP(code)); + emit_a32_mov_i64(is64, tmp2, imm, ctx); + emit_a32_alu_r64(is64, dst, tmp2, ctx, BPF_OP(code)); break; } break; @@ -1334,26 +1317,28 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU | BPF_MOD | BPF_X: - rt = src_lo; - rd = dstk ? tmp2[1] : dst_lo; - if (dstk) + rd = is_stacked(dst_lo) ? tmp2[1] : dst_lo; + if (is_stacked(dst_lo)) emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); switch (BPF_SRC(code)) { case BPF_X: - rt = sstk ? tmp2[0] : rt; - if (sstk) + rt = is_stacked(rt) ? tmp2[0] : src_lo; + if (is_stacked(src_lo)) emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); break; case BPF_K: rt = tmp2[0]; - emit_a32_mov_i(rt, imm, false, ctx); + emit_a32_mov_i(rt, imm, ctx); + break; + default: + rt = src_lo; break; } emit_udivmod(rd, rd, rt, ctx, BPF_OP(code)); - if (dstk) + if (is_stacked(dst_lo)) emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_a32_mov_i(dst_hi, 0, ctx); break; case BPF_ALU64 | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_X: @@ -1367,54 +1352,54 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) if (unlikely(imm > 31)) return -EINVAL; if (imm) - emit_a32_alu_i(dst_lo, imm, dstk, ctx, BPF_OP(code)); - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code)); + emit_a32_mov_i(dst_hi, 0, ctx); break; /* dst = dst << imm */ case BPF_ALU64 | BPF_LSH | BPF_K: if (unlikely(imm > 63)) return -EINVAL; - emit_a32_lsh_i64(dst, dstk, imm, ctx); + emit_a32_lsh_i64(dst, imm, ctx); break; /* dst = dst >> imm */ case BPF_ALU64 | BPF_RSH | BPF_K: if (unlikely(imm > 63)) return -EINVAL; - emit_a32_rsh_i64(dst, dstk, imm, ctx); + emit_a32_rsh_i64(dst, imm, ctx); break; /* dst = dst << src */ case BPF_ALU64 | BPF_LSH | BPF_X: - emit_a32_lsh_r64(dst, src, dstk, sstk, ctx); + emit_a32_lsh_r64(dst, src, ctx); break; /* dst = dst >> src */ case BPF_ALU64 | BPF_RSH | BPF_X: - emit_a32_rsh_r64(dst, src, dstk, sstk, ctx); + emit_a32_rsh_r64(dst, src, ctx); break; /* dst = dst >> src (signed) */ case BPF_ALU64 | BPF_ARSH | BPF_X: - emit_a32_arsh_r64(dst, src, dstk, sstk, ctx); + emit_a32_arsh_r64(dst, src, ctx); break; /* dst = dst >> imm (signed) */ case BPF_ALU64 | BPF_ARSH | BPF_K: if (unlikely(imm > 63)) return -EINVAL; - emit_a32_arsh_i64(dst, dstk, imm, ctx); + emit_a32_arsh_i64(dst, imm, ctx); break; /* dst = ~dst */ case BPF_ALU | BPF_NEG: - emit_a32_alu_i(dst_lo, 0, dstk, ctx, BPF_OP(code)); - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code)); + emit_a32_mov_i(dst_hi, 0, ctx); break; /* dst = ~dst (64 bit) */ case BPF_ALU64 | BPF_NEG: - emit_a32_neg64(dst, dstk, ctx); + emit_a32_neg64(dst, ctx); break; /* dst = dst * src/imm */ case BPF_ALU64 | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_K: switch (BPF_SRC(code)) { case BPF_X: - emit_a32_mul_r64(dst, src, dstk, sstk, ctx); + emit_a32_mul_r64(dst, src, ctx); break; case BPF_K: /* Move immediate value to the temporary register @@ -1423,8 +1408,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) * reg then it would be safe to do the operation * on it. */ - emit_a32_mov_i64(is64, tmp2, imm, false, ctx); - emit_a32_mul_r64(dst, tmp2, dstk, false, ctx); + emit_a32_mov_i64(is64, tmp2, imm, ctx); + emit_a32_mul_r64(dst, tmp2, ctx); break; } break; @@ -1432,9 +1417,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = htobe(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: case BPF_ALU | BPF_END | BPF_FROM_BE: - rd = dstk ? tmp[0] : dst_hi; - rt = dstk ? tmp[1] : dst_lo; - if (dstk) { + rd = is_stacked(dst_lo) ? tmp[0] : dst_hi; + rt = is_stacked(dst_lo) ? tmp[1] : dst_lo; + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -1459,7 +1444,7 @@ emit_bswap_uxt: case 16: /* zero-extend 16 bits into 64 bits */ #if __LINUX_ARM_ARCH__ < 6 - emit_a32_mov_i(tmp2[1], 0xffff, false, ctx); + emit_a32_mov_i(tmp2[1], 0xffff, ctx); emit(ARM_AND_R(rt, rt, tmp2[1]), ctx); #else /* ARMv6+ */ emit(ARM_UXTH(rt, rt), ctx); @@ -1475,7 +1460,7 @@ emit_bswap_uxt: break; } exit: - if (dstk) { + if (is_stacked(dst_lo)) { emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -1487,8 +1472,8 @@ exit: u32 hi, lo = imm; hi = insn1.imm; - emit_a32_mov_i(dst_lo, lo, dstk, ctx); - emit_a32_mov_i(dst_hi, hi, dstk, ctx); + emit_a32_mov_i(dst_lo, lo, ctx); + emit_a32_mov_i(dst_hi, hi, ctx); return 1; } @@ -1497,10 +1482,10 @@ exit: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_DW: - rn = sstk ? tmp2[1] : src_lo; - if (sstk) + rn = is_stacked(src_lo) ? tmp2[1] : src_lo; + if (is_stacked(src_lo)) emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); - emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); + emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: @@ -1510,16 +1495,15 @@ exit: switch (BPF_SIZE(code)) { case BPF_DW: /* Sign-extend immediate value into temp reg */ - emit_a32_mov_i64(true, tmp2, imm, false, ctx); - emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, BPF_W); - emit_str_r(dst_lo, tmp2[0], dstk, off+4, ctx, BPF_W); + emit_a32_mov_i64(true, tmp2, imm, ctx); + emit_str_r(dst_lo, tmp2[1], off, ctx, BPF_W); + emit_str_r(dst_lo, tmp2[0], off+4, ctx, BPF_W); break; case BPF_W: case BPF_H: case BPF_B: - emit_a32_mov_i(tmp2[1], imm, false, ctx); - emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, - BPF_SIZE(code)); + emit_a32_mov_i(tmp2[1], imm, ctx); + emit_str_r(dst_lo, tmp2[1], off, ctx, BPF_SIZE(code)); break; } break; @@ -1536,19 +1520,19 @@ exit: { u8 sz = BPF_SIZE(code); - rn = sstk ? tmp2[1] : src_lo; - rm = sstk ? tmp2[0] : src_hi; - if (sstk) { + rn = is_stacked(src_lo) ? tmp2[1] : src_lo; + rm = is_stacked(src_lo) ? tmp2[0] : src_hi; + if (is_stacked(src_lo)) { emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); } /* Store the value */ if (BPF_SIZE(code) == BPF_DW) { - emit_str_r(dst_lo, rn, dstk, off, ctx, BPF_W); - emit_str_r(dst_lo, rm, dstk, off+4, ctx, BPF_W); + emit_str_r(dst_lo, rn, off, ctx, BPF_W); + emit_str_r(dst_lo, rm, off+4, ctx, BPF_W); } else { - emit_str_r(dst_lo, rn, dstk, off, ctx, sz); + emit_str_r(dst_lo, rn, off, ctx, sz); } break; } @@ -1575,9 +1559,9 @@ exit: case BPF_JMP | BPF_JSLT | BPF_X: case BPF_JMP | BPF_JSLE | BPF_X: /* Setup source registers */ - rm = sstk ? tmp2[0] : src_hi; - rn = sstk ? tmp2[1] : src_lo; - if (sstk) { + rm = is_stacked(src_lo) ? tmp2[0] : src_hi; + rn = is_stacked(src_lo) ? tmp2[1] : src_lo; + if (is_stacked(src_lo)) { emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); } @@ -1609,12 +1593,12 @@ exit: rm = tmp2[0]; rn = tmp2[1]; /* Sign-extend immediate value */ - emit_a32_mov_i64(true, tmp2, imm, false, ctx); + emit_a32_mov_i64(true, tmp2, imm, ctx); go_jmp: /* Setup destination register */ - rd = dstk ? tmp[0] : dst_hi; - rt = dstk ? tmp[1] : dst_lo; - if (dstk) { + rd = is_stacked(dst_lo) ? tmp[0] : dst_hi; + rt = is_stacked(dst_lo) ? tmp[1] : dst_lo; + if (is_stacked(dst_lo)) { emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); } @@ -1684,13 +1668,13 @@ go_jmp: const s8 *r5 = bpf2a32[BPF_REG_5]; const u32 func = (u32)__bpf_call_base + (u32)imm; - emit_a32_mov_r64(true, r0, r1, false, false, ctx); - emit_a32_mov_r64(true, r1, r2, false, true, ctx); + emit_a32_mov_r64(true, r0, r1, ctx); + emit_a32_mov_r64(true, r1, r2, ctx); emit_push_r64(r5, 0, ctx); emit_push_r64(r4, 8, ctx); emit_push_r64(r3, 16, ctx); - emit_a32_mov_i(tmp[1], func, false, ctx); + emit_a32_mov_i(tmp[1], func, ctx); emit_blx_r(tmp[1], ctx); emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean -- cgit v1.2.3 From 7a9870256361d4a36cb42e0301540256bb4b864e Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:31:52 +0100 Subject: ARM: net: bpf: provide accessor functions for BPF registers Many of the code paths need to have knowledge about whether a register is stacked or in a CPU register. Move this decision making to a pair of helper functions instead of having it scattered throughout the code. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 329 ++++++++++++++++++---------------------------- 1 file changed, 128 insertions(+), 201 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index e81401aca2df..08fb4eb285a2 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -465,6 +465,31 @@ static bool is_stacked(s8 reg) return reg < 0; } +/* If a BPF register is on the stack (stk is true), load it to the + * supplied temporary register and return the temporary register + * for subsequent operations, otherwise just use the CPU register. + */ +static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx) +{ + if (is_stacked(reg)) { + emit(ARM_LDR_I(tmp, ARM_SP, STACK_VAR(reg)), ctx); + reg = tmp; + } + return reg; +} + +/* If a BPF register is on the stack (stk is true), save the register + * back to the stack. If the source register is not the same, then + * move it into the correct register. + */ +static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx) +{ + if (is_stacked(reg)) + emit(ARM_STR_I(src, ARM_SP, STACK_VAR(reg)), ctx); + else if (reg != src) + emit(ARM_MOV_R(reg, src), ctx); +} + static inline void emit_a32_mov_i(const s8 dst, const u32 val, struct jit_ctx *ctx) { @@ -472,7 +497,7 @@ static inline void emit_a32_mov_i(const s8 dst, const u32 val, if (is_stacked(dst)) { emit_mov_i(tmp[1], val, ctx); - emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx); + arm_bpf_put_reg32(dst, tmp[1], ctx); } else { emit_mov_i(dst, val, ctx); } @@ -572,19 +597,13 @@ static inline void emit_a32_alu_r(const s8 dst, const s8 src, struct jit_ctx *ctx, const bool is64, const bool hi, const u8 op) { const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rn = is_stacked(src) ? tmp[1] : src; - - if (is_stacked(src)) - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx); + s8 rn, rd; + rn = arm_bpf_get_reg32(src, tmp[1], ctx); + rd = arm_bpf_get_reg32(dst, tmp[0], ctx); /* ALU operation */ - if (is_stacked(dst)) { - emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); - emit_alu_r(tmp[0], rn, is64, hi, op, ctx); - emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); - } else { - emit_alu_r(dst, rn, is64, hi, op, ctx); - } + emit_alu_r(rd, rn, is64, hi, op, ctx); + arm_bpf_put_reg32(dst, rd, ctx); } /* ALU operation (64 bit) */ @@ -598,18 +617,14 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], emit_a32_mov_i(dst_hi, 0, ctx); } -/* dst = imm (4 bytes)*/ +/* dst = src (4 bytes)*/ static inline void emit_a32_mov_r(const s8 dst, const s8 src, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rt = is_stacked(src) ? tmp[0] : src; + s8 rt; - if (is_stacked(src)) - emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx); - if (is_stacked(dst)) - emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx); - else - emit(ARM_MOV_R(dst, rt), ctx); + rt = arm_bpf_get_reg32(src, tmp[0], ctx); + arm_bpf_put_reg32(dst, rt, ctx); } /* dst = src */ @@ -630,10 +645,9 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], static inline void emit_a32_alu_i(const s8 dst, const u32 val, struct jit_ctx *ctx, const u8 op) { const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd = is_stacked(dst) ? tmp[0] : dst; + s8 rd; - if (is_stacked(dst)) - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); + rd = arm_bpf_get_reg32(dst, tmp[0], ctx); /* Do shift operation */ switch (op) { @@ -648,31 +662,25 @@ static inline void emit_a32_alu_i(const s8 dst, const u32 val, break; } - if (is_stacked(dst)) - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); + arm_bpf_put_reg32(dst, rd, ctx); } /* dst = ~dst (64 bit) */ static inline void emit_a32_neg64(const s8 dst[], struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst[1]; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst[0]; + s8 rd, rm; /* Setup Operand */ - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do Negate Operation */ emit(ARM_RSBS_I(rd, rd, 0), ctx); emit(ARM_RSC_I(rm, rm, 0), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg32(dst_lo, rd, ctx); + arm_bpf_put_reg32(dst_hi, rm, ctx); } /* dst = dst << src */ @@ -680,18 +688,12 @@ static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; + s8 rt, rd, rm; /* Setup Operands */ - s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - - if (is_stacked(src_lo)) - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do LSH operation */ emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); @@ -701,13 +703,8 @@ static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); - } else { - emit(ARM_MOV_R(rd, ARM_LR), ctx); - emit(ARM_MOV_R(rm, ARM_IP), ctx); - } + arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); + arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); } /* dst = dst >> src (signed)*/ @@ -715,17 +712,12 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; + s8 rt, rd, rm; + /* Setup Operands */ - s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - - if (is_stacked(src_lo)) - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do the ARSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); @@ -735,13 +727,9 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], _emit(ARM_COND_MI, ARM_B(0), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); - } else { - emit(ARM_MOV_R(rd, ARM_LR), ctx); - emit(ARM_MOV_R(rm, ARM_IP), ctx); - } + + arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); + arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); } /* dst = dst >> src */ @@ -749,17 +737,12 @@ static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; + s8 rt, rd, rm; + /* Setup Operands */ - s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - - if (is_stacked(src_lo)) - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do RSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); @@ -768,13 +751,9 @@ static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); - } else { - emit(ARM_MOV_R(rd, ARM_LR), ctx); - emit(ARM_MOV_R(rm, ARM_IP), ctx); - } + + arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); + arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); } /* dst = dst << val */ @@ -782,14 +761,11 @@ static inline void emit_a32_lsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - /* Setup operands */ - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; + s8 rd, rm; - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + /* Setup operands */ + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do LSH operation */ if (val < 32) { @@ -804,10 +780,8 @@ static inline void emit_a32_lsh_i64(const s8 dst[], emit(ARM_EOR_R(rd, rd, rd), ctx); } - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg32(dst_lo, rd, ctx); + arm_bpf_put_reg32(dst_hi, rm, ctx); } /* dst = dst >> val */ @@ -815,14 +789,11 @@ static inline void emit_a32_rsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - /* Setup operands */ - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; + s8 rd, rm; - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + /* Setup operands */ + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do LSR operation */ if (val < 32) { @@ -837,10 +808,8 @@ static inline void emit_a32_rsh_i64(const s8 dst[], emit(ARM_MOV_I(rm, 0), ctx); } - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg32(dst_lo, rd, ctx); + arm_bpf_put_reg32(dst_hi, rm, ctx); } /* dst = dst >> val (signed) */ @@ -848,14 +817,11 @@ static inline void emit_a32_arsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - /* Setup operands */ - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; + s8 rd, rm; - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + /* Setup operands */ + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Do ARSH operation */ if (val < 32) { @@ -870,30 +836,21 @@ static inline void emit_a32_arsh_i64(const s8 dst[], emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); } - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg32(dst_lo, rd, ctx); + arm_bpf_put_reg32(dst_hi, rm, ctx); } static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; + s8 rd, rm, rt, rn; + /* Setup operands for multiplication */ - s8 rd = is_stacked(dst_lo) ? tmp[1] : dst_lo; - s8 rm = is_stacked(dst_lo) ? tmp[0] : dst_hi; - s8 rt = is_stacked(src_lo) ? tmp2[1] : src_lo; - s8 rn = is_stacked(src_lo) ? tmp2[0] : src_hi; - - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } - if (is_stacked(src_lo)) { - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx); - } + rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rn = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); /* Do Multiplication */ emit(ARM_MUL(ARM_IP, rd, rn), ctx); @@ -902,22 +859,18 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); emit(ARM_ADD_R(rm, ARM_LR, rm), ctx); - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } else { - emit(ARM_MOV_R(rd, ARM_IP), ctx); - } + + arm_bpf_put_reg32(dst_lo, ARM_IP, ctx); + arm_bpf_put_reg32(dst_hi, rm, ctx); } /* *(size *)(dst + off) = src */ static inline void emit_str_r(const s8 dst, const s8 src, const s32 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd = is_stacked(dst) ? tmp[1] : dst; + s8 rd; - if (is_stacked(dst)) - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); + rd = arm_bpf_get_reg32(dst, tmp[1], ctx); if (off) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx); @@ -983,10 +936,9 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); break; } - if (is_stacked(dst_lo)) - emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst_lo)), ctx); - if (is_stacked(dst_lo) && sz == BPF_DW) - emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst_hi)), ctx); + arm_bpf_put_reg32(dst[1], rd[1], ctx); + if (sz == BPF_DW) + arm_bpf_put_reg32(dst[0], rd[0], ctx); } /* Arithmatic Operation */ @@ -1034,6 +986,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) #define cur_offset (ctx->idx - idx0) #define jmp_offset (out_offset - (cur_offset) - 2) u32 off, lo, hi; + s8 r_array, r_index, r_tc_lo, r_tc_hi; /* if (index >= array->map.max_entries) * goto out; @@ -1041,12 +994,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) off = offsetof(struct bpf_array, map.max_entries); /* array->map.max_entries */ emit_a32_mov_i(tmp[1], off, ctx); - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); - emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); + r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); + emit(ARM_LDR_R(tmp[1], r_array, tmp[1]), ctx); /* index is 32-bit for arrays */ - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); + r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); /* index >= array->map.max_entries */ - emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx); + emit(ARM_CMP_R(r_index, tmp[1]), ctx); _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) @@ -1055,15 +1008,15 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ lo = (u32)MAX_TAIL_CALL_CNT; hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); - emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx); - emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx); - emit(ARM_CMP_I(tmp[0], hi), ctx); - _emit(ARM_COND_EQ, ARM_CMP_I(tmp[1], lo), ctx); + r_tc_lo = arm_bpf_get_reg32(tcc[1], tmp[1], ctx); + r_tc_hi = arm_bpf_get_reg32(tcc[0], tmp[0], ctx); + emit(ARM_CMP_I(r_tc_hi, hi), ctx); + _emit(ARM_COND_EQ, ARM_CMP_I(r_tc_lo, lo), ctx); _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); - emit(ARM_ADDS_I(tmp[1], tmp[1], 1), ctx); - emit(ARM_ADC_I(tmp[0], tmp[0], 0), ctx); - emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx); - emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx); + emit(ARM_ADDS_I(r_tc_lo, r_tc_lo, 1), ctx); + emit(ARM_ADC_I(r_tc_hi, r_tc_hi, 0), ctx); + arm_bpf_put_reg32(tcc[1], r_tc_lo, ctx); + arm_bpf_put_reg32(tcc[0], r_tc_hi, ctx); /* prog = array->ptrs[index] * if (prog == NULL) @@ -1071,10 +1024,10 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ off = offsetof(struct bpf_array, ptrs); emit_a32_mov_i(tmp[1], off, ctx); - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); - emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx); - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); - emit(ARM_MOV_SI(tmp[0], tmp2[1], SRTYPE_ASL, 2), ctx); + r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); + emit(ARM_ADD_R(tmp[1], r_array, tmp[1]), ctx); + r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); + emit(ARM_MOV_SI(tmp[0], r_index, SRTYPE_ASL, 2), ctx); emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx); emit(ARM_CMP_I(tmp[1], 0), ctx); _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); @@ -1317,15 +1270,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU | BPF_MOD | BPF_X: - rd = is_stacked(dst_lo) ? tmp2[1] : dst_lo; - if (is_stacked(dst_lo)) - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + rd = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx); switch (BPF_SRC(code)) { case BPF_X: - rt = is_stacked(rt) ? tmp2[0] : src_lo; - if (is_stacked(src_lo)) - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), - ctx); + rt = arm_bpf_get_reg32(src_lo, tmp2[0], ctx); break; case BPF_K: rt = tmp2[0]; @@ -1336,8 +1284,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; } emit_udivmod(rd, rd, rt, ctx, BPF_OP(code)); - if (is_stacked(dst_lo)) - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + arm_bpf_put_reg32(dst_lo, rd, ctx); emit_a32_mov_i(dst_hi, 0, ctx); break; case BPF_ALU64 | BPF_DIV | BPF_K: @@ -1417,12 +1364,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = htobe(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: case BPF_ALU | BPF_END | BPF_FROM_BE: - rd = is_stacked(dst_lo) ? tmp[0] : dst_hi; - rt = is_stacked(dst_lo) ? tmp[1] : dst_lo; - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rt = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rd = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); if (BPF_SRC(code) == BPF_FROM_LE) goto emit_bswap_uxt; switch (imm) { @@ -1460,10 +1403,8 @@ emit_bswap_uxt: break; } exit: - if (is_stacked(dst_lo)) { - emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg32(dst_lo, rt, ctx); + arm_bpf_put_reg32(dst_hi, rd, ctx); break; /* dst = imm64 */ case BPF_LD | BPF_IMM | BPF_DW: @@ -1482,9 +1423,7 @@ exit: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_DW: - rn = is_stacked(src_lo) ? tmp2[1] : src_lo; - if (is_stacked(src_lo)) - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); + rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; /* ST: *(size *)(dst + off) = imm */ @@ -1520,12 +1459,8 @@ exit: { u8 sz = BPF_SIZE(code); - rn = is_stacked(src_lo) ? tmp2[1] : src_lo; - rm = is_stacked(src_lo) ? tmp2[0] : src_hi; - if (is_stacked(src_lo)) { - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); - } + rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); /* Store the value */ if (BPF_SIZE(code) == BPF_DW) { @@ -1559,12 +1494,8 @@ exit: case BPF_JMP | BPF_JSLT | BPF_X: case BPF_JMP | BPF_JSLE | BPF_X: /* Setup source registers */ - rm = is_stacked(src_lo) ? tmp2[0] : src_hi; - rn = is_stacked(src_lo) ? tmp2[1] : src_lo; - if (is_stacked(src_lo)) { - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); - } + rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); + rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); goto go_jmp; /* PC += off if dst == imm */ /* PC += off if dst > imm */ @@ -1596,12 +1527,8 @@ exit: emit_a32_mov_i64(true, tmp2, imm, ctx); go_jmp: /* Setup destination register */ - rd = is_stacked(dst_lo) ? tmp[0] : dst_hi; - rt = is_stacked(dst_lo) ? tmp[1] : dst_lo; - if (is_stacked(dst_lo)) { - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rt = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); + rd = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); /* Check for the condition */ emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code)); -- cgit v1.2.3 From a6eccac507e5e4aed63fb23320fcadeb253c2af6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:31:57 +0100 Subject: ARM: net: bpf: 64-bit accessor functions for BPF registers Provide a couple of 64-bit register accessors, and use them where appropriate Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 235 ++++++++++++++++++++++++---------------------- 1 file changed, 122 insertions(+), 113 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 08fb4eb285a2..45a3599e94a4 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -478,6 +478,17 @@ static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx) return reg; } +static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, + struct jit_ctx *ctx) +{ + if (is_stacked(reg[1])) { + emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(reg[1])), ctx); + emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(reg[0])), ctx); + reg = tmp; + } + return reg; +} + /* If a BPF register is on the stack (stk is true), save the register * back to the stack. If the source register is not the same, then * move it into the correct register. @@ -490,6 +501,20 @@ static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx) emit(ARM_MOV_R(reg, src), ctx); } +static void arm_bpf_put_reg64(const s8 *reg, const s8 *src, + struct jit_ctx *ctx) +{ + if (is_stacked(reg[1])) { + emit(ARM_STR_I(src[1], ARM_SP, STACK_VAR(reg[1])), ctx); + emit(ARM_STR_I(src[0], ARM_SP, STACK_VAR(reg[0])), ctx); + } else { + if (reg[1] != src[1]) + emit(ARM_MOV_R(reg[1], src[1]), ctx); + if (reg[0] != src[0]) + emit(ARM_MOV_R(reg[0], src[0]), ctx); + } +} + static inline void emit_a32_mov_i(const s8 dst, const u32 val, struct jit_ctx *ctx) { @@ -669,18 +694,16 @@ static inline void emit_a32_alu_i(const s8 dst, const u32 val, static inline void emit_a32_neg64(const s8 dst[], struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rd, rm; + const s8 *rd; /* Setup Operand */ - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do Negate Operation */ - emit(ARM_RSBS_I(rd, rd, 0), ctx); - emit(ARM_RSC_I(rm, rm, 0), ctx); + emit(ARM_RSBS_I(rd[1], rd[1], 0), ctx); + emit(ARM_RSC_I(rd[0], rd[0], 0), ctx); - arm_bpf_put_reg32(dst_lo, rd, ctx); - arm_bpf_put_reg32(dst_hi, rm, ctx); + arm_bpf_put_reg64(dst, rd, ctx); } /* dst = dst << src */ @@ -688,20 +711,20 @@ static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rt, rd, rm; + const s8 *rd; + s8 rt; /* Setup Operands */ rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSH operation */ emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); - emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); - emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); - emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx); + emit(ARM_MOV_SR(ARM_LR, rd[0], SRTYPE_ASL, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[1], SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd[1], SRTYPE_LSR, tmp2[0]), ctx); + emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_ASL, rt), ctx); arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); @@ -712,21 +735,21 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rt, rd, rm; + const s8 *rd; + s8 rt; /* Setup Operands */ rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do the ARSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); - emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); _emit(ARM_COND_MI, ARM_B(0), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx); - emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx); + emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx); arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); @@ -737,20 +760,20 @@ static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rt, rd, rm; + const s8 *rd; + s8 rt; /* Setup Operands */ rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do RSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); - emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); - emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx); + emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_LSR, tmp2[0]), ctx); + emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_LSR, rt), ctx); arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); @@ -761,27 +784,25 @@ static inline void emit_a32_lsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rd, rm; + const s8 *rd; /* Setup operands */ - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSH operation */ if (val < 32) { - emit(ARM_MOV_SI(tmp2[0], rm, SRTYPE_ASL, val), ctx); - emit(ARM_ORR_SI(rm, tmp2[0], rd, SRTYPE_LSR, 32 - val), ctx); - emit(ARM_MOV_SI(rd, rd, SRTYPE_ASL, val), ctx); + emit(ARM_MOV_SI(tmp2[0], rd[0], SRTYPE_ASL, val), ctx); + emit(ARM_ORR_SI(rd[0], tmp2[0], rd[1], SRTYPE_LSR, 32 - val), ctx); + emit(ARM_MOV_SI(rd[1], rd[1], SRTYPE_ASL, val), ctx); } else { if (val == 32) - emit(ARM_MOV_R(rm, rd), ctx); + emit(ARM_MOV_R(rd[0], rd[1]), ctx); else - emit(ARM_MOV_SI(rm, rd, SRTYPE_ASL, val - 32), ctx); - emit(ARM_EOR_R(rd, rd, rd), ctx); + emit(ARM_MOV_SI(rd[0], rd[1], SRTYPE_ASL, val - 32), ctx); + emit(ARM_EOR_R(rd[1], rd[1], rd[1]), ctx); } - arm_bpf_put_reg32(dst_lo, rd, ctx); - arm_bpf_put_reg32(dst_hi, rm, ctx); + arm_bpf_put_reg64(dst, rd, ctx); } /* dst = dst >> val */ @@ -789,27 +810,25 @@ static inline void emit_a32_rsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rd, rm; + const s8 *rd; /* Setup operands */ - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSR operation */ if (val < 32) { - emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); - emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); - emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx); + emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); + emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); + emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx); } else if (val == 32) { - emit(ARM_MOV_R(rd, rm), ctx); - emit(ARM_MOV_I(rm, 0), ctx); + emit(ARM_MOV_R(rd[1], rd[0]), ctx); + emit(ARM_MOV_I(rd[0], 0), ctx); } else { - emit(ARM_MOV_SI(rd, rm, SRTYPE_LSR, val - 32), ctx); - emit(ARM_MOV_I(rm, 0), ctx); + emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_LSR, val - 32), ctx); + emit(ARM_MOV_I(rd[0], 0), ctx); } - arm_bpf_put_reg32(dst_lo, rd, ctx); - arm_bpf_put_reg32(dst_hi, rm, ctx); + arm_bpf_put_reg64(dst, rd, ctx); } /* dst = dst >> val (signed) */ @@ -817,51 +836,47 @@ static inline void emit_a32_arsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rd, rm; + const s8 *rd; /* Setup operands */ - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do ARSH operation */ if (val < 32) { - emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); - emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); - emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx); + emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); + emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); + emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx); } else if (val == 32) { - emit(ARM_MOV_R(rd, rm), ctx); - emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); + emit(ARM_MOV_R(rd[1], rd[0]), ctx); + emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx); } else { - emit(ARM_MOV_SI(rd, rm, SRTYPE_ASR, val - 32), ctx); - emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); + emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_ASR, val - 32), ctx); + emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx); } - arm_bpf_put_reg32(dst_lo, rd, ctx); - arm_bpf_put_reg32(dst_hi, rm, ctx); + arm_bpf_put_reg64(dst, rd, ctx); } static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; - s8 rd, rm, rt, rn; + const s8 *rd, *rt; /* Setup operands for multiplication */ - rd = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rm = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); - rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - rn = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); + rt = arm_bpf_get_reg64(src, tmp2, ctx); /* Do Multiplication */ - emit(ARM_MUL(ARM_IP, rd, rn), ctx); - emit(ARM_MUL(ARM_LR, rm, rt), ctx); + emit(ARM_MUL(ARM_IP, rd[1], rt[0]), ctx); + emit(ARM_MUL(ARM_LR, rd[0], rt[1]), ctx); emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); - emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); - emit(ARM_ADD_R(rm, ARM_LR, rm), ctx); + emit(ARM_UMULL(ARM_IP, rd[0], rd[1], rt[1]), ctx); + emit(ARM_ADD_R(rd[0], ARM_LR, rd[0]), ctx); arm_bpf_put_reg32(dst_lo, ARM_IP, ctx); - arm_bpf_put_reg32(dst_hi, rm, ctx); + arm_bpf_put_reg32(dst_hi, rd[0], ctx); } /* *(size *)(dst + off) = src */ @@ -918,17 +933,17 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, case BPF_B: /* Load a Byte */ emit(ARM_LDRB_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, ctx); + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_H: /* Load a HalfWord */ emit(ARM_LDRH_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, ctx); + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_W: /* Load a Word */ emit(ARM_LDR_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, ctx); + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_DW: /* Load a Double Word */ @@ -936,9 +951,7 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); break; } - arm_bpf_put_reg32(dst[1], rd[1], ctx); - if (sz == BPF_DW) - arm_bpf_put_reg32(dst[0], rd[0], ctx); + arm_bpf_put_reg64(dst, rd, ctx); } /* Arithmatic Operation */ @@ -982,11 +995,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *tmp2 = bpf2a32[TMP_REG_2]; const s8 *tcc = bpf2a32[TCALL_CNT]; + const s8 *tc; const int idx0 = ctx->idx; #define cur_offset (ctx->idx - idx0) #define jmp_offset (out_offset - (cur_offset) - 2) u32 off, lo, hi; - s8 r_array, r_index, r_tc_lo, r_tc_hi; + s8 r_array, r_index; /* if (index >= array->map.max_entries) * goto out; @@ -1008,15 +1022,13 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ lo = (u32)MAX_TAIL_CALL_CNT; hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); - r_tc_lo = arm_bpf_get_reg32(tcc[1], tmp[1], ctx); - r_tc_hi = arm_bpf_get_reg32(tcc[0], tmp[0], ctx); - emit(ARM_CMP_I(r_tc_hi, hi), ctx); - _emit(ARM_COND_EQ, ARM_CMP_I(r_tc_lo, lo), ctx); + tc = arm_bpf_get_reg64(tcc, tmp, ctx); + emit(ARM_CMP_I(tc[0], hi), ctx); + _emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx); _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); - emit(ARM_ADDS_I(r_tc_lo, r_tc_lo, 1), ctx); - emit(ARM_ADC_I(r_tc_hi, r_tc_hi, 0), ctx); - arm_bpf_put_reg32(tcc[1], r_tc_lo, ctx); - arm_bpf_put_reg32(tcc[0], r_tc_hi, ctx); + emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx); + emit(ARM_ADC_I(tc[0], tc[0], 0), ctx); + arm_bpf_put_reg64(tcc, tmp, ctx); /* prog = array->ptrs[index] * if (prog == NULL) @@ -1183,7 +1195,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; const bool is64 = BPF_CLASS(code) == BPF_ALU64; - s8 rd, rt, rm, rn; + const s8 *rd, *rs; + s8 rd_lo, rt, rm, rn; s32 jmp_offset; #define check_imm(bits, imm) do { \ @@ -1270,7 +1283,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU | BPF_MOD | BPF_X: - rd = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx); + rd_lo = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx); switch (BPF_SRC(code)) { case BPF_X: rt = arm_bpf_get_reg32(src_lo, tmp2[0], ctx); @@ -1283,8 +1296,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) rt = src_lo; break; } - emit_udivmod(rd, rd, rt, ctx, BPF_OP(code)); - arm_bpf_put_reg32(dst_lo, rd, ctx); + emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code)); + arm_bpf_put_reg32(dst_lo, rd_lo, ctx); emit_a32_mov_i(dst_hi, 0, ctx); break; case BPF_ALU64 | BPF_DIV | BPF_K: @@ -1364,21 +1377,20 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = htobe(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: case BPF_ALU | BPF_END | BPF_FROM_BE: - rt = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rd = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); if (BPF_SRC(code) == BPF_FROM_LE) goto emit_bswap_uxt; switch (imm) { case 16: - emit_rev16(rt, rt, ctx); + emit_rev16(rd[1], rd[1], ctx); goto emit_bswap_uxt; case 32: - emit_rev32(rt, rt, ctx); + emit_rev32(rd[1], rd[1], ctx); goto emit_bswap_uxt; case 64: - emit_rev32(ARM_LR, rt, ctx); - emit_rev32(rt, rd, ctx); - emit(ARM_MOV_R(rd, ARM_LR), ctx); + emit_rev32(ARM_LR, rd[1], ctx); + emit_rev32(rd[1], rd[0], ctx); + emit(ARM_MOV_R(rd[0], ARM_LR), ctx); break; } goto exit; @@ -1388,23 +1400,22 @@ emit_bswap_uxt: /* zero-extend 16 bits into 64 bits */ #if __LINUX_ARM_ARCH__ < 6 emit_a32_mov_i(tmp2[1], 0xffff, ctx); - emit(ARM_AND_R(rt, rt, tmp2[1]), ctx); + emit(ARM_AND_R(rd[1], rd[1], tmp2[1]), ctx); #else /* ARMv6+ */ - emit(ARM_UXTH(rt, rt), ctx); + emit(ARM_UXTH(rd[1], rd[1]), ctx); #endif - emit(ARM_EOR_R(rd, rd, rd), ctx); + emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); break; case 32: /* zero-extend 32 bits into 64 bits */ - emit(ARM_EOR_R(rd, rd, rd), ctx); + emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); break; case 64: /* nop */ break; } exit: - arm_bpf_put_reg32(dst_lo, rt, ctx); - arm_bpf_put_reg32(dst_hi, rd, ctx); + arm_bpf_put_reg64(dst, rd, ctx); break; /* dst = imm64 */ case BPF_LD | BPF_IMM | BPF_DW: @@ -1459,15 +1470,14 @@ exit: { u8 sz = BPF_SIZE(code); - rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); - rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); + rs = arm_bpf_get_reg64(src, tmp2, ctx); /* Store the value */ if (BPF_SIZE(code) == BPF_DW) { - emit_str_r(dst_lo, rn, off, ctx, BPF_W); - emit_str_r(dst_lo, rm, off+4, ctx, BPF_W); + emit_str_r(dst_lo, rs[1], off, ctx, BPF_W); + emit_str_r(dst_lo, rs[0], off+4, ctx, BPF_W); } else { - emit_str_r(dst_lo, rn, off, ctx, sz); + emit_str_r(dst_lo, rs[1], off, ctx, sz); } break; } @@ -1527,11 +1537,10 @@ exit: emit_a32_mov_i64(true, tmp2, imm, ctx); go_jmp: /* Setup destination register */ - rt = arm_bpf_get_reg32(dst_lo, tmp[1], ctx); - rd = arm_bpf_get_reg32(dst_hi, tmp[0], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Check for the condition */ - emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code)); + emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code)); /* Setup JUMP instruction */ jmp_offset = bpf2a32_offset(i+off, i, ctx); -- cgit v1.2.3 From 96cced4e774a2728710c8f8f48441fc7b29d6177 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:02 +0100 Subject: ARM: net: bpf: access eBPF scratch space using ARM FP register Access the eBPF scratch space using the frame pointer rather than our stack pointer, as the offsets from the ARM frame pointer are constant across all eBPF programs. Since we no longer reference the scratch space registers from the stack pointer, this simplifies emit_push_r64() as it no longer needs to know how many words are pushed onto the stack. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 45a3599e94a4..753b5b2b2e3d 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -108,6 +108,12 @@ enum { #define STACK_OFFSET(k) (-4 - (k) * 4) #define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4) +#ifdef CONFIG_FRAME_POINTER +#define EBPF_SCRATCH_TO_ARM_FP(x) ((x) - 4 * hweight16(CALLEE_PUSH_MASK) - 4) +#else +#define EBPF_SCRATCH_TO_ARM_FP(x) (x) +#endif + #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ @@ -294,9 +300,6 @@ static void jit_fill_hole(void *area, unsigned int size) #define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE) #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) -/* Get the offset of eBPF REGISTERs stored on scratch space. */ -#define STACK_VAR(off) (STACK_SIZE + (off)) - #if __LINUX_ARM_ARCH__ < 7 static u16 imm_offset(u32 k, struct jit_ctx *ctx) @@ -472,7 +475,7 @@ static bool is_stacked(s8 reg) static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx) { if (is_stacked(reg)) { - emit(ARM_LDR_I(tmp, ARM_SP, STACK_VAR(reg)), ctx); + emit(ARM_LDR_I(tmp, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); reg = tmp; } return reg; @@ -482,8 +485,10 @@ static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, struct jit_ctx *ctx) { if (is_stacked(reg[1])) { - emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(reg[1])), ctx); - emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(reg[0])), ctx); + emit(ARM_LDR_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), + ctx); + emit(ARM_LDR_I(tmp[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), + ctx); reg = tmp; } return reg; @@ -496,7 +501,7 @@ static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx) { if (is_stacked(reg)) - emit(ARM_STR_I(src, ARM_SP, STACK_VAR(reg)), ctx); + emit(ARM_STR_I(src, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); else if (reg != src) emit(ARM_MOV_R(reg, src), ctx); } @@ -505,8 +510,10 @@ static void arm_bpf_put_reg64(const s8 *reg, const s8 *src, struct jit_ctx *ctx) { if (is_stacked(reg[1])) { - emit(ARM_STR_I(src[1], ARM_SP, STACK_VAR(reg[1])), ctx); - emit(ARM_STR_I(src[0], ARM_SP, STACK_VAR(reg[0])), ctx); + emit(ARM_STR_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), + ctx); + emit(ARM_STR_I(src[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), + ctx); } else { if (reg[1] != src[1]) emit(ARM_MOV_R(reg[1], src[1]), ctx); @@ -1103,16 +1110,15 @@ static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) } // push the scratch stack register on top of the stack -static inline void emit_push_r64(const s8 src[], const u8 shift, - struct jit_ctx *ctx) +static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx) { const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *rt; u16 reg_set = 0; - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx); - emit(ARM_LDR_I(tmp2[0], ARM_SP, STACK_VAR(src[0]+shift)), ctx); + rt = arm_bpf_get_reg64(src, tmp2, ctx); - reg_set = (1 << tmp2[1]) | (1 << tmp2[0]); + reg_set = (1 << rt[1]) | (1 << rt[0]); emit(ARM_PUSH(reg_set), ctx); } @@ -1155,8 +1161,8 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_MOV_R(r3, r4), ctx); emit(ARM_MOV_R(r2, r0), ctx); /* Initialize Tail Count */ - emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[0])), ctx); - emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[1])), ctx); + emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx); + emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx); /* end of prologue */ } @@ -1606,9 +1612,9 @@ go_jmp: emit_a32_mov_r64(true, r0, r1, ctx); emit_a32_mov_r64(true, r1, r2, ctx); - emit_push_r64(r5, 0, ctx); - emit_push_r64(r4, 8, ctx); - emit_push_r64(r3, 16, ctx); + emit_push_r64(r5, ctx); + emit_push_r64(r4, ctx); + emit_push_r64(r3, ctx); emit_a32_mov_i(tmp[1], func, ctx); emit_blx_r(tmp[1], ctx); -- cgit v1.2.3 From 1ca3b17b777c4136f9dba0195e13502c445f4ade Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:07 +0100 Subject: ARM: net: bpf: imm12 constant conversion Provide a version of the imm8m() function that the compiler can optimise when used with a constant expression. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 753b5b2b2e3d..2cc66aa44dfe 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -236,10 +236,56 @@ static inline void emit(u32 inst, struct jit_ctx *ctx) _emit(ARM_COND_AL, inst, ctx); } +/* + * This is rather horrid, but necessary to convert an integer constant + * to an immediate operand for the opcodes, and be able to detect at + * build time whether the constant can't be converted (iow, usable in + * BUILD_BUG_ON()). + */ +#define imm12val(v, s) (rol32(v, (s)) | (s) << 7) +#define const_imm8m(x) \ + ({ int r; \ + u32 v = (x); \ + if (!(v & ~0x000000ff)) \ + r = imm12val(v, 0); \ + else if (!(v & ~0xc000003f)) \ + r = imm12val(v, 2); \ + else if (!(v & ~0xf000000f)) \ + r = imm12val(v, 4); \ + else if (!(v & ~0xfc000003)) \ + r = imm12val(v, 6); \ + else if (!(v & ~0xff000000)) \ + r = imm12val(v, 8); \ + else if (!(v & ~0x3fc00000)) \ + r = imm12val(v, 10); \ + else if (!(v & ~0x0ff00000)) \ + r = imm12val(v, 12); \ + else if (!(v & ~0x03fc0000)) \ + r = imm12val(v, 14); \ + else if (!(v & ~0x00ff0000)) \ + r = imm12val(v, 16); \ + else if (!(v & ~0x003fc000)) \ + r = imm12val(v, 18); \ + else if (!(v & ~0x000ff000)) \ + r = imm12val(v, 20); \ + else if (!(v & ~0x0003fc00)) \ + r = imm12val(v, 22); \ + else if (!(v & ~0x0000ff00)) \ + r = imm12val(v, 24); \ + else if (!(v & ~0x00003fc0)) \ + r = imm12val(v, 26); \ + else if (!(v & ~0x00000ff0)) \ + r = imm12val(v, 28); \ + else if (!(v & ~0x000003fc)) \ + r = imm12val(v, 30); \ + else \ + r = -1; \ + r; }) + /* * Checks if immediate value can be converted to imm12(12 bits) value. */ -static int16_t imm8m(u32 x) +static int imm8m(u32 x) { u32 rot; @@ -249,6 +295,8 @@ static int16_t imm8m(u32 x) return -1; } +#define imm8m(x) (__builtin_constant_p(x) ? const_imm8m(x) : imm8m(x)) + static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12) { op |= rt << 12 | rn << 16; -- cgit v1.2.3 From 828e2b90e8e9b5bd844a25f22ceeb8df4dd18b07 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:12 +0100 Subject: ARM: net: bpf: use immediate forms of instructions where possible Rather than moving constants to a register and then using them in a subsequent instruction, use them directly in the desired instruction cutting out the "middle" register. This removes two instructions from the tail call code path. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 21 ++++++++++++--------- arch/arm/net/bpf_jit_32.h | 1 + 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 2cc66aa44dfe..645653e1931e 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -304,7 +304,7 @@ static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12) op |= ARM_INST_LDST__U; else imm12 = -imm12; - return op | (imm12 & 0xfff); + return op | (imm12 & ARM_INST_LDST__IMM12); } static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) @@ -1054,17 +1054,19 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) const int idx0 = ctx->idx; #define cur_offset (ctx->idx - idx0) #define jmp_offset (out_offset - (cur_offset) - 2) - u32 off, lo, hi; + u32 lo, hi; s8 r_array, r_index; + int off; /* if (index >= array->map.max_entries) * goto out; */ + BUILD_BUG_ON(offsetof(struct bpf_array, map.max_entries) > + ARM_INST_LDST__IMM12); off = offsetof(struct bpf_array, map.max_entries); /* array->map.max_entries */ - emit_a32_mov_i(tmp[1], off, ctx); r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); - emit(ARM_LDR_R(tmp[1], r_array, tmp[1]), ctx); + emit(ARM_LDR_I(tmp[1], r_array, off), ctx); /* index is 32-bit for arrays */ r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); /* index >= array->map.max_entries */ @@ -1089,10 +1091,10 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) * if (prog == NULL) * goto out; */ - off = offsetof(struct bpf_array, ptrs); - emit_a32_mov_i(tmp[1], off, ctx); + BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0); + off = imm8m(offsetof(struct bpf_array, ptrs)); r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); - emit(ARM_ADD_R(tmp[1], r_array, tmp[1]), ctx); + emit(ARM_ADD_I(tmp[1], r_array, off), ctx); r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); emit(ARM_MOV_SI(tmp[0], r_index, SRTYPE_ASL, 2), ctx); emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx); @@ -1100,9 +1102,10 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); /* goto *(prog->bpf_func + prologue_size); */ + BUILD_BUG_ON(offsetof(struct bpf_prog, bpf_func) > + ARM_INST_LDST__IMM12); off = offsetof(struct bpf_prog, bpf_func); - emit_a32_mov_i(tmp2[1], off, ctx); - emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); + emit(ARM_LDR_I(tmp[1], tmp[1], off), ctx); emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); emit_bx_r(tmp[1], ctx); diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index c55bc39d3e22..dee8a76fb0bc 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -78,6 +78,7 @@ #define ARM_INST_EOR_I 0x02200000 #define ARM_INST_LDST__U 0x00800000 +#define ARM_INST_LDST__IMM12 0x00000fff #define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 #define ARM_INST_LDRH_I 0x015000b0 -- cgit v1.2.3 From 2b6958ef1151452cb2160fde75a5c5382b512c34 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:17 +0100 Subject: ARM: net: bpf: use ldr instructions with shifted rm register Rather than pre-shifting the rm register for the ldr in the tail call, shift it in the load instruction. This eliminates one unnecessary instruction. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 3 +-- arch/arm/net/bpf_jit_32.h | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 645653e1931e..e22dc828420c 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1096,8 +1096,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); emit(ARM_ADD_I(tmp[1], r_array, off), ctx); r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); - emit(ARM_MOV_SI(tmp[0], r_index, SRTYPE_ASL, 2), ctx); - emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx); + emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx); emit(ARM_CMP_I(tmp[1], 0), ctx); _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index dee8a76fb0bc..e541a7a6139a 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -188,6 +188,10 @@ #define ARM_LDR_R(rt, rn, rm) (ARM_INST_LDR_R | ARM_INST_LDST__U \ | (rt) << 12 | (rn) << 16 \ | (rm)) +#define ARM_LDR_R_SI(rt, rn, rm, type, imm) \ + (ARM_INST_LDR_R | ARM_INST_LDST__U \ + | (rt) << 12 | (rn) << 16 \ + | (imm) << 7 | (type) << 5 | (rm)) #define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | ARM_INST_LDST__U \ | (rt) << 12 | (rn) << 16 \ | (rm)) -- cgit v1.2.3 From aaffd2f5c3d58f154ca7b3d104a2ee6b6e40bc6b Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:22 +0100 Subject: ARM: net: bpf: avoid reloading 'index' Avoid reloading 'index' after we have validated it - it remains in tmp2[1] up to the point that we begin the code to index the pointer array, so with a little rearrangement of the registers, we can use the already loaded value. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index e22dc828420c..0a8b3d0903c4 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1073,6 +1073,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit(ARM_CMP_R(r_index, tmp[1]), ctx); _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); + /* tmp2[1] = index */ + /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; * tail_call_cnt++; @@ -1093,9 +1095,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0); off = imm8m(offsetof(struct bpf_array, ptrs)); - r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); + r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx); emit(ARM_ADD_I(tmp[1], r_array, off), ctx); - r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx); emit(ARM_CMP_I(tmp[1], 0), ctx); _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); -- cgit v1.2.3 From b50452299864fbc00a576241e1490541c8754d50 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:28 +0100 Subject: ARM: net: bpf: avoid reloading 'array' Rearranging the order of the initial tail call code a little allows is to avoid reloading the 'array' pointer. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 0a8b3d0903c4..f0cad9692952 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1064,16 +1064,16 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) BUILD_BUG_ON(offsetof(struct bpf_array, map.max_entries) > ARM_INST_LDST__IMM12); off = offsetof(struct bpf_array, map.max_entries); - /* array->map.max_entries */ - r_array = arm_bpf_get_reg32(r2[1], tmp2[1], ctx); - emit(ARM_LDR_I(tmp[1], r_array, off), ctx); + r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx); /* index is 32-bit for arrays */ r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); + /* array->map.max_entries */ + emit(ARM_LDR_I(tmp[1], r_array, off), ctx); /* index >= array->map.max_entries */ emit(ARM_CMP_R(r_index, tmp[1]), ctx); _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); - /* tmp2[1] = index */ + /* tmp2[0] = array, tmp2[1] = index */ /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; @@ -1095,7 +1095,6 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0); off = imm8m(offsetof(struct bpf_array, ptrs)); - r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx); emit(ARM_ADD_I(tmp[1], r_array, off), ctx); emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx); emit(ARM_CMP_I(tmp[1], 0), ctx); -- cgit v1.2.3 From bef8968df8a6e3eb91081d68affc64b8d87d5721 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:33 +0100 Subject: ARM: net: bpf: always use odd/even register pair Always use an odd/even register pair for our 64-bit registers, so that we're able to use the double-word load/store instructions in the future. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index f0cad9692952..006ff9615850 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -47,27 +47,27 @@ * The callee saved registers depends on whether frame pointers are enabled. * With frame pointers (to be compliant with the ABI): * - * high - * original ARM_SP => +------------------+ \ - * | pc | | - * current ARM_FP => +------------------+ } callee saved registers - * |r4-r8,r10,fp,ip,lr| | - * +------------------+ / - * low + * high + * original ARM_SP => +--------------+ \ + * | pc | | + * current ARM_FP => +--------------+ } callee saved registers + * |r4-r9,fp,ip,lr| | + * +--------------+ / + * low * * Without frame pointers: * - * high - * original ARM_SP => +------------------+ - * | r4-r8,r10,fp,lr | callee saved registers - * current ARM_FP => +------------------+ - * low + * high + * original ARM_SP => +--------------+ + * | r4-r9,fp,lr | callee saved registers + * current ARM_FP => +--------------+ + * low * * When popping registers off the stack at the end of a BPF function, we * reference them via the current ARM_FP register. */ #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ - 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \ + 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ 1 << ARM_FP) #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) @@ -157,7 +157,7 @@ static const s8 bpf2a32[][2] = { * for constant blindings and others. */ [TMP_REG_1] = {ARM_R7, ARM_R6}, - [TMP_REG_2] = {ARM_R10, ARM_R8}, + [TMP_REG_2] = {ARM_R9, ARM_R8}, /* Tail call count. Stored on stack scratch space. */ [TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)}, /* temporary register for blinding constants. -- cgit v1.2.3 From 8c9602d38c7262664c31332101f540c1e179797d Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 11 Jul 2018 10:32:38 +0100 Subject: ARM: net: bpf: use double-word load/stores where available Use double-word load and stores where support for this instruction is supported by the CPU architecture. Signed-off-by: Russell King Signed-off-by: Daniel Borkmann --- arch/arm/net/bpf_jit_32.c | 55 ++++++++++++++++++++++++++++++++++++----------- arch/arm/net/bpf_jit_32.h | 2 ++ 2 files changed, 45 insertions(+), 12 deletions(-) (limited to 'arch/arm/net') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 006ff9615850..a9f68a924800 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "bpf_jit_32.h" @@ -192,6 +193,7 @@ struct jit_ctx { unsigned int idx; unsigned int prologue_bytes; unsigned int epilogue_offset; + unsigned int cpu_architecture; u32 flags; u32 *offsets; u32 *target; @@ -319,10 +321,12 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) #define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off) #define ARM_LDRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off) +#define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) #define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) #define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) #define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) +#define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) #define ARM_STRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off) /* @@ -533,10 +537,16 @@ static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, struct jit_ctx *ctx) { if (is_stacked(reg[1])) { - emit(ARM_LDR_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), - ctx); - emit(ARM_LDR_I(tmp[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), - ctx); + if (__LINUX_ARM_ARCH__ >= 6 || + ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { + emit(ARM_LDRD_I(tmp[1], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); + } else { + emit(ARM_LDR_I(tmp[1], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); + emit(ARM_LDR_I(tmp[0], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); + } reg = tmp; } return reg; @@ -558,10 +568,16 @@ static void arm_bpf_put_reg64(const s8 *reg, const s8 *src, struct jit_ctx *ctx) { if (is_stacked(reg[1])) { - emit(ARM_STR_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), - ctx); - emit(ARM_STR_I(src[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), - ctx); + if (__LINUX_ARM_ARCH__ >= 6 || + ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { + emit(ARM_STRD_I(src[1], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); + } else { + emit(ARM_STR_I(src[1], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); + emit(ARM_STR_I(src[0], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); + } } else { if (reg[1] != src[1]) emit(ARM_MOV_R(reg[1], src[1]), ctx); @@ -711,13 +727,27 @@ static inline void emit_a32_mov_r(const s8 dst, const s8 src, static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { - emit_a32_mov_r(dst_lo, src_lo, ctx); - if (is64) { + if (!is64) { + emit_a32_mov_r(dst_lo, src_lo, ctx); + /* Zero out high 4 bytes */ + emit_a32_mov_i(dst_hi, 0, ctx); + } else if (__LINUX_ARM_ARCH__ < 6 && + ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ + emit_a32_mov_r(dst_lo, src_lo, ctx); emit_a32_mov_r(dst_hi, src_hi, ctx); + } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { + const u8 *tmp = bpf2a32[TMP_REG_1]; + + emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); + emit(ARM_STRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); + } else if (is_stacked(src_lo)) { + emit(ARM_LDRD_I(dst[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); + } else if (is_stacked(dst_lo)) { + emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); } else { - /* Zero out high 4 bytes */ - emit_a32_mov_i(dst_hi, 0, ctx); + emit(ARM_MOV_R(dst[0], src[0]), ctx); + emit(ARM_MOV_R(dst[1], src[1]), ctx); } } @@ -1778,6 +1808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; + ctx.cpu_architecture = cpu_architecture(); /* Not able to allocate memory for offsets[] , then * we must fall back to the interpreter diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index e541a7a6139a..f4e58bcdaa43 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -81,6 +81,7 @@ #define ARM_INST_LDST__IMM12 0x00000fff #define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 +#define ARM_INST_LDRD_I 0x014000d0 #define ARM_INST_LDRH_I 0x015000b0 #define ARM_INST_LDRH_R 0x019000b0 #define ARM_INST_LDR_I 0x05100000 @@ -128,6 +129,7 @@ #define ARM_INST_STR_I 0x05000000 #define ARM_INST_STRB_I 0x05400000 +#define ARM_INST_STRD_I 0x014000f0 #define ARM_INST_STRH_I 0x014000b0 #define ARM_INST_TST_R 0x01100000 -- cgit v1.2.3