From 19f68ed6dc90c93daf7e54d3350ea67fead7cbbf Mon Sep 17 00:00:00 2001 From: Aijun Sun Date: Thu, 4 Aug 2022 10:54:42 +0800 Subject: bpf, arm64: Allocate program buffer using kvcalloc instead of kcalloc It is not necessary to allocate contiguous physical memory for BPF program buffer using kcalloc. When the BPF program is large more than memory page size, kcalloc allocates multiple memory pages from buddy system. If the device can not provide sufficient memory, for example in low-end android devices [0], memory allocation for BPF program is likely to fail. Test cases in lib/test_bpf.c all pass on ARM64 QEMU. [0] AndroidTestSuit: page allocation failure: order:4, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=foreground,mems_allowed=0 Call trace: dump_stack+0xa4/0x114 warn_alloc+0xf8/0x14c __alloc_pages_slowpath+0xac8/0xb14 __alloc_pages_nodemask+0x194/0x3d0 kmalloc_order_trace+0x44/0x1e8 __kmalloc+0x29c/0x66c bpf_int_jit_compile+0x17c/0x568 bpf_prog_select_runtime+0x4c/0x1b0 bpf_prepare_filter+0x5fc/0x6bc bpf_prog_create_from_user+0x118/0x1c0 seccomp_set_mode_filter+0x1c4/0x7cc __do_sys_prctl+0x380/0x1424 __arm64_sys_prctl+0x20/0x2c el0_svc_common+0xc8/0x22c el0_svc_handler+0x1c/0x28 el0_svc+0x8/0x100 Signed-off-by: Aijun Sun Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220804025442.22524-1-aijun.sun@unisoc.com --- arch/arm64/net/bpf_jit_comp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 7ca8779ae34f..40aa3e744beb 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1496,7 +1496,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; - ctx.offset = kcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); + ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); if (ctx.offset == NULL) { prog = orig_prog; goto out_off; @@ -1601,7 +1601,7 @@ skip_init_ctx: ctx.offset[i] *= AARCH64_INSN_SIZE; bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); out_off: - kfree(ctx.offset); + kvfree(ctx.offset); kfree(jit_data); prog->aux->jit_data = NULL; } -- cgit v1.2.3 From aada476655461a9ab491d8298a415430cdd10278 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Mon, 8 Aug 2022 00:07:35 -0400 Subject: bpf, arm64: Fix bpf trampoline instruction endianness The sparse tool complains as follows: arch/arm64/net/bpf_jit_comp.c:1684:16: warning: incorrect type in assignment (different base types) arch/arm64/net/bpf_jit_comp.c:1684:16: expected unsigned int [usertype] *branch arch/arm64/net/bpf_jit_comp.c:1684:16: got restricted __le32 [usertype] * arch/arm64/net/bpf_jit_comp.c:1700:52: error: subtraction of different types can't work (different base types) arch/arm64/net/bpf_jit_comp.c:1734:29: warning: incorrect type in assignment (different base types) arch/arm64/net/bpf_jit_comp.c:1734:29: expected unsigned int [usertype] * arch/arm64/net/bpf_jit_comp.c:1734:29: got restricted __le32 [usertype] * arch/arm64/net/bpf_jit_comp.c:1918:52: error: subtraction of different types can't work (different base types) This is because the variable branch in function invoke_bpf_prog and the variable branches in function prepare_trampoline are defined as type u32 *, which conflicts with ctx->image's type __le32 *, so sparse complains when assignment or arithmetic operation are performed on these two variables and ctx->image. Since arm64 instructions are always little-endian, change the type of these two variables to __le32 * and call cpu_to_le32() to convert instruction to little-endian before writing it to memory. This is also in line with emit() which internally does cpu_to_le32(), too. Fixes: efc9909fdce0 ("bpf, arm64: Add bpf trampoline for arm64") Reported-by: kernel test robot Signed-off-by: Xu Kuohai Signed-off-by: Daniel Borkmann Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/bpf/20220808040735.1232002-1-xukuohai@huawei.com --- arch/arm64/net/bpf_jit_comp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 40aa3e744beb..389623ae5a91 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1643,7 +1643,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, int args_off, int retval_off, int run_ctx_off, bool save_ret) { - u32 *branch; + __le32 *branch; u64 enter_prog; u64 exit_prog; struct bpf_prog *p = l->link.prog; @@ -1698,7 +1698,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, if (ctx->image) { int offset = &ctx->image[ctx->idx] - branch; - *branch = A64_CBZ(1, A64_R(0), offset); + *branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset)); } /* arg1: prog */ @@ -1713,7 +1713,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, int args_off, int retval_off, int run_ctx_off, - u32 **branches) + __le32 **branches) { int i; @@ -1784,7 +1784,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; bool save_ret; - u32 **branches = NULL; + __le32 **branches = NULL; /* trampoline stack layout: * [ parent ip ] @@ -1892,7 +1892,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, flags & BPF_TRAMP_F_RET_FENTRY_RET); if (fmod_ret->nr_links) { - branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), + branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *), GFP_KERNEL); if (!branches) return -ENOMEM; @@ -1916,7 +1916,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, /* update the branches saved in invoke_bpf_mod_ret with cbnz */ for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { int offset = &ctx->image[ctx->idx] - branches[i]; - *branches[i] = A64_CBNZ(1, A64_R(10), offset); + *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset)); } for (i = 0; i < fexit->nr_links; i++) -- cgit v1.2.3