diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-08-03 15:34:36 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-08-03 15:34:36 -0700 |
commit | d07b7b32da6f678d42d96a8b9824cf0a181ce140 (patch) | |
tree | 606829d4b33a57dbe0f0e825ca8505e0b5fcb759 /arch/x86 | |
parent | 35b1b1fd96388d5e3cf179bf36bd8a4153baf4a3 (diff) | |
parent | 648880e9331c68b2008430fd90f3648d1795399d (diff) |
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says:
====================
pull-request: bpf-next 2023-08-03
We've added 54 non-merge commits during the last 10 day(s) which contain
a total of 84 files changed, 4026 insertions(+), 562 deletions(-).
The main changes are:
1) Add SO_REUSEPORT support for TC bpf_sk_assign from Lorenz Bauer,
Daniel Borkmann
2) Support new insns from cpu v4 from Yonghong Song
3) Non-atomically allocate freelist during prefill from YiFei Zhu
4) Support defragmenting IPv(4|6) packets in BPF from Daniel Xu
5) Add tracepoint to xdp attaching failure from Leon Hwang
6) struct netdev_rx_queue and xdp.h reshuffling to reduce
rebuild time from Jakub Kicinski
* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (54 commits)
net: invert the netdevice.h vs xdp.h dependency
net: move struct netdev_rx_queue out of netdevice.h
eth: add missing xdp.h includes in drivers
selftests/bpf: Add testcase for xdp attaching failure tracepoint
bpf, xdp: Add tracepoint to xdp attaching failure
selftests/bpf: fix static assert compilation issue for test_cls_*.c
bpf: fix bpf_probe_read_kernel prototype mismatch
riscv, bpf: Adapt bpf trampoline to optimized riscv ftrace framework
libbpf: fix typos in Makefile
tracing: bpf: use struct trace_entry in struct syscall_tp_t
bpf, devmap: Remove unused dtab field from bpf_dtab_netdev
bpf, cpumap: Remove unused cmap field from bpf_cpu_map_entry
netfilter: bpf: Only define get_proto_defrag_hook() if necessary
bpf: Fix an array-index-out-of-bounds issue in disasm.c
net: remove duplicate INDIRECT_CALLABLE_DECLARE of udp[6]_ehashfn
docs/bpf: Fix malformed documentation
bpf: selftests: Add defrag selftests
bpf: selftests: Support custom type and proto for client sockets
bpf: selftests: Support not connecting client socket
netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link
...
====================
Link: https://lore.kernel.org/r/20230803174845.825419-1-martin.lau@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 141 |
1 files changed, 117 insertions, 24 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 83c4b45dc65f..a5930042139d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -701,6 +701,38 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) *pprog = prog; } +static void emit_movsx_reg(u8 **pprog, int num_bits, bool is64, u32 dst_reg, + u32 src_reg) +{ + u8 *prog = *pprog; + + if (is64) { + /* movs[b,w,l]q dst, src */ + if (num_bits == 8) + EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbe, + add_2reg(0xC0, src_reg, dst_reg)); + else if (num_bits == 16) + EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbf, + add_2reg(0xC0, src_reg, dst_reg)); + else if (num_bits == 32) + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x63, + add_2reg(0xC0, src_reg, dst_reg)); + } else { + /* movs[b,w]l dst, src */ + if (num_bits == 8) { + EMIT4(add_2mod(0x40, src_reg, dst_reg), 0x0f, 0xbe, + add_2reg(0xC0, src_reg, dst_reg)); + } else if (num_bits == 16) { + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, src_reg, dst_reg)); + EMIT3(add_2mod(0x0f, src_reg, dst_reg), 0xbf, + add_2reg(0xC0, src_reg, dst_reg)); + } + } + + *pprog = prog; +} + /* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off) { @@ -779,6 +811,29 @@ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) *pprog = prog; } +/* LDSX: dst_reg = *(s8*)(src_reg + off) */ +static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) +{ + u8 *prog = *pprog; + + switch (size) { + case BPF_B: + /* Emit 'movsx rax, byte ptr [rax + off]' */ + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xBE); + break; + case BPF_H: + /* Emit 'movsx rax, word ptr [rax + off]' */ + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xBF); + break; + case BPF_W: + /* Emit 'movsx rax, dword ptr [rax+0x14]' */ + EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x63); + break; + } + emit_insn_suffix(&prog, src_reg, dst_reg, off); + *pprog = prog; +} + /* STX: *(u8*)(dst_reg + off) = src_reg */ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { @@ -1028,9 +1083,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image case BPF_ALU64 | BPF_MOV | BPF_X: case BPF_ALU | BPF_MOV | BPF_X: - emit_mov_reg(&prog, - BPF_CLASS(insn->code) == BPF_ALU64, - dst_reg, src_reg); + if (insn->off == 0) + emit_mov_reg(&prog, + BPF_CLASS(insn->code) == BPF_ALU64, + dst_reg, src_reg); + else + emit_movsx_reg(&prog, insn->off, + BPF_CLASS(insn->code) == BPF_ALU64, + dst_reg, src_reg); break; /* neg dst */ @@ -1134,15 +1194,26 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image /* mov rax, dst_reg */ emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg); - /* - * xor edx, edx - * equivalent to 'xor rdx, rdx', but one byte less - */ - EMIT2(0x31, 0xd2); + if (insn->off == 0) { + /* + * xor edx, edx + * equivalent to 'xor rdx, rdx', but one byte less + */ + EMIT2(0x31, 0xd2); - /* div src_reg */ - maybe_emit_1mod(&prog, src_reg, is64); - EMIT2(0xF7, add_1reg(0xF0, src_reg)); + /* div src_reg */ + maybe_emit_1mod(&prog, src_reg, is64); + EMIT2(0xF7, add_1reg(0xF0, src_reg)); + } else { + if (BPF_CLASS(insn->code) == BPF_ALU) + EMIT1(0x99); /* cdq */ + else + EMIT2(0x48, 0x99); /* cqo */ + + /* idiv src_reg */ + maybe_emit_1mod(&prog, src_reg, is64); + EMIT2(0xF7, add_1reg(0xF8, src_reg)); + } if (BPF_OP(insn->code) == BPF_MOD && dst_reg != BPF_REG_3) @@ -1262,6 +1333,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image break; case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: switch (imm32) { case 16: /* Emit 'ror %ax, 8' to swap lower 2 bytes */ @@ -1370,9 +1442,17 @@ st: if (is_imm8(insn->off)) case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + /* LDXS: dst_reg = *(s8*)(src_reg + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: insn_off = insn->off; - if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { + if (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { /* Conservatively check that src_reg + insn->off is a kernel address: * src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE * src_reg is used as scratch for src_reg += insn->off and restored @@ -1415,8 +1495,13 @@ st: if (is_imm8(insn->off)) start_of_ldx = prog; end_of_jmp[-1] = start_of_ldx - end_of_jmp; } - emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off); - if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { + if (BPF_MODE(insn->code) == BPF_PROBE_MEMSX || + BPF_MODE(insn->code) == BPF_MEMSX) + emit_ldsx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off); + else + emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off); + if (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { struct exception_table_entry *ex; u8 *_insn = image + proglen + (start_of_ldx - temp); s64 delta; @@ -1730,16 +1815,24 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */ break; case BPF_JMP | BPF_JA: - if (insn->off == -1) - /* -1 jmp instructions will always jump - * backwards two bytes. Explicitly handling - * this case avoids wasting too many passes - * when there are long sequences of replaced - * dead code. - */ - jmp_offset = -2; - else - jmp_offset = addrs[i + insn->off] - addrs[i]; + case BPF_JMP32 | BPF_JA: + if (BPF_CLASS(insn->code) == BPF_JMP) { + if (insn->off == -1) + /* -1 jmp instructions will always jump + * backwards two bytes. Explicitly handling + * this case avoids wasting too many passes + * when there are long sequences of replaced + * dead code. + */ + jmp_offset = -2; + else + jmp_offset = addrs[i + insn->off] - addrs[i]; + } else { + if (insn->imm == -1) + jmp_offset = -2; + else + jmp_offset = addrs[i + insn->imm] - addrs[i]; + } if (!jmp_offset) { /* |