diff options
author | David S. Miller <davem@davemloft.net> | 2021-04-02 11:03:07 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-04-02 11:03:07 -0700 |
commit | c2bcb4cf021121d7c162e44b7773281891e3abc2 (patch) | |
tree | 8f37f13ff2292f0435cd523507d40b7a384cb8c6 /kernel | |
parent | bd78980be1a68d14524c51c4b4170782fada622b (diff) | |
parent | 89d69c5d0fbcabd8656459bc8b1a476d6f1efee4 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says:
====================
pull-request: bpf-next 2021-04-01
The following pull-request contains BPF updates for your *net-next* tree.
We've added 68 non-merge commits during the last 7 day(s) which contain
a total of 70 files changed, 2944 insertions(+), 1139 deletions(-).
The main changes are:
1) UDP support for sockmap, from Cong.
2) Verifier merge conflict resolution fix, from Daniel.
3) xsk selftests enhancements, from Maciej.
4) Unstable helpers aka kernel func calling, from Martin.
5) Batches ops for LPM map, from Pedro.
6) Fix race in bpf_get_local_storage, from Yonghong.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/btf.c | 219 | ||||
-rw-r--r-- | kernel/bpf/core.c | 47 | ||||
-rw-r--r-- | kernel/bpf/disasm.c | 13 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 15 | ||||
-rw-r--r-- | kernel/bpf/local_storage.c | 5 | ||||
-rw-r--r-- | kernel/bpf/lpm_trie.c | 3 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 5 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 390 |
8 files changed, 546 insertions, 151 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 369faeddf1df..0600ed325fa0 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -283,7 +283,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_FLOAT] = "FLOAT", }; -static const char *btf_type_str(const struct btf_type *t) +const char *btf_type_str(const struct btf_type *t) { return btf_kind_str[BTF_INFO_KIND(t->info)]; } @@ -789,7 +789,6 @@ static const struct btf_type *btf_type_skip_qualifiers(const struct btf *btf, while (btf_type_is_modifier(t) && BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) { - id = t->type; t = btf_type_by_id(btf, t->type); } @@ -4377,7 +4376,7 @@ static u8 bpf_ctx_convert_map[] = { #undef BPF_LINK_TYPE static const struct btf_member * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, +btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg) { @@ -5362,122 +5361,190 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr return btf_check_func_type_match(log, btf1, t1, btf2, t2); } -/* Compare BTF of a function with given bpf_reg_state. - * Returns: - * EFAULT - there is a verifier bug. Abort verification. - * EINVAL - there is a type mismatch or BTF is not available. - * 0 - BTF matches with what bpf_reg_state expects. - * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. - */ -int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs) +static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { +#ifdef CONFIG_NET + [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], + [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], + [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP], +#endif +}; + +static int btf_check_func_arg_match(struct bpf_verifier_env *env, + const struct btf *btf, u32 func_id, + struct bpf_reg_state *regs, + bool ptr_to_mem_ok) { struct bpf_verifier_log *log = &env->log; - struct bpf_prog *prog = env->prog; - struct btf *btf = prog->aux->btf; - const struct btf_param *args; + const char *func_name, *ref_tname; const struct btf_type *t, *ref_t; - u32 i, nargs, btf_id, type_size; - const char *tname; - bool is_global; - - if (!prog->aux->func_info) - return -EINVAL; - - btf_id = prog->aux->func_info[subprog].type_id; - if (!btf_id) - return -EFAULT; - - if (prog->aux->func_info_aux[subprog].unreliable) - return -EINVAL; + const struct btf_param *args; + u32 i, nargs, ref_id; - t = btf_type_by_id(btf, btf_id); + t = btf_type_by_id(btf, func_id); if (!t || !btf_type_is_func(t)) { /* These checks were already done by the verifier while loading - * struct bpf_func_info + * struct bpf_func_info or in add_kfunc_call(). */ - bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n", - subprog); + bpf_log(log, "BTF of func_id %u doesn't point to KIND_FUNC\n", + func_id); return -EFAULT; } - tname = btf_name_by_offset(btf, t->name_off); + func_name = btf_name_by_offset(btf, t->name_off); t = btf_type_by_id(btf, t->type); if (!t || !btf_type_is_func_proto(t)) { - bpf_log(log, "Invalid BTF of func %s\n", tname); + bpf_log(log, "Invalid BTF of func %s\n", func_name); return -EFAULT; } args = (const struct btf_param *)(t + 1); nargs = btf_type_vlen(t); if (nargs > MAX_BPF_FUNC_REG_ARGS) { - bpf_log(log, "Function %s has %d > %d args\n", tname, nargs, + bpf_log(log, "Function %s has %d > %d args\n", func_name, nargs, MAX_BPF_FUNC_REG_ARGS); - goto out; + return -EINVAL; } - is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; /* check that BTF function arguments match actual types that the * verifier sees. */ for (i = 0; i < nargs; i++) { - struct bpf_reg_state *reg = ®s[i + 1]; + u32 regno = i + 1; + struct bpf_reg_state *reg = ®s[regno]; - t = btf_type_by_id(btf, args[i].type); - while (btf_type_is_modifier(t)) - t = btf_type_by_id(btf, t->type); - if (btf_type_is_int(t) || btf_type_is_enum(t)) { + t = btf_type_skip_modifiers(btf, args[i].type, NULL); + if (btf_type_is_scalar(t)) { if (reg->type == SCALAR_VALUE) continue; - bpf_log(log, "R%d is not a scalar\n", i + 1); - goto out; + bpf_log(log, "R%d is not a scalar\n", regno); + return -EINVAL; } - if (btf_type_is_ptr(t)) { + + if (!btf_type_is_ptr(t)) { + bpf_log(log, "Unrecognized arg#%d type %s\n", + i, btf_type_str(t)); + return -EINVAL; + } + + ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); + ref_tname = btf_name_by_offset(btf, ref_t->name_off); + if (btf_is_kernel(btf)) { + const struct btf_type *reg_ref_t; + const struct btf *reg_btf; + const char *reg_ref_tname; + u32 reg_ref_id; + + if (!btf_type_is_struct(ref_t)) { + bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n", + func_name, i, btf_type_str(ref_t), + ref_tname); + return -EINVAL; + } + + if (reg->type == PTR_TO_BTF_ID) { + reg_btf = reg->btf; + reg_ref_id = reg->btf_id; + } else if (reg2btf_ids[reg->type]) { + reg_btf = btf_vmlinux; + reg_ref_id = *reg2btf_ids[reg->type]; + } else { + bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n", + func_name, i, + btf_type_str(ref_t), ref_tname, regno); + return -EINVAL; + } + + reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, + ®_ref_id); + reg_ref_tname = btf_name_by_offset(reg_btf, + reg_ref_t->name_off); + if (!btf_struct_ids_match(log, reg_btf, reg_ref_id, + reg->off, btf, ref_id)) { + bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", + func_name, i, + btf_type_str(ref_t), ref_tname, + regno, btf_type_str(reg_ref_t), + reg_ref_tname); + return -EINVAL; + } + } else if (btf_get_prog_ctx_type(log, btf, t, + env->prog->type, i)) { /* If function expects ctx type in BTF check that caller * is passing PTR_TO_CTX. */ - if (btf_get_prog_ctx_type(log, btf, t, prog->type, i)) { - if (reg->type != PTR_TO_CTX) { - bpf_log(log, - "arg#%d expected pointer to ctx, but got %s\n", - i, btf_kind_str[BTF_INFO_KIND(t->info)]); - goto out; - } - if (check_ctx_reg(env, reg, i + 1)) - goto out; - continue; + if (reg->type != PTR_TO_CTX) { + bpf_log(log, + "arg#%d expected pointer to ctx, but got %s\n", + i, btf_type_str(t)); + return -EINVAL; } + if (check_ctx_reg(env, reg, regno)) + return -EINVAL; + } else if (ptr_to_mem_ok) { + const struct btf_type *resolve_ret; + u32 type_size; - if (!is_global) - goto out; - - t = btf_type_skip_modifiers(btf, t->type, NULL); - - ref_t = btf_resolve_size(btf, t, &type_size); - if (IS_ERR(ref_t)) { + resolve_ret = btf_resolve_size(btf, ref_t, &type_size); + if (IS_ERR(resolve_ret)) { bpf_log(log, - "arg#%d reference type('%s %s') size cannot be determined: %ld\n", - i, btf_type_str(t), btf_name_by_offset(btf, t->name_off), - PTR_ERR(ref_t)); - goto out; + "arg#%d reference type('%s %s') size cannot be determined: %ld\n", + i, btf_type_str(ref_t), ref_tname, + PTR_ERR(resolve_ret)); + return -EINVAL; } - if (check_mem_reg(env, reg, i + 1, type_size)) - goto out; - - continue; + if (check_mem_reg(env, reg, regno, type_size)) + return -EINVAL; + } else { + return -EINVAL; } - bpf_log(log, "Unrecognized arg#%d type %s\n", - i, btf_kind_str[BTF_INFO_KIND(t->info)]); - goto out; } + return 0; -out: +} + +/* Compare BTF of a function with given bpf_reg_state. + * Returns: + * EFAULT - there is a verifier bug. Abort verification. + * EINVAL - there is a type mismatch or BTF is not available. + * 0 - BTF matches with what bpf_reg_state expects. + * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. + */ +int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, + struct bpf_reg_state *regs) +{ + struct bpf_prog *prog = env->prog; + struct btf *btf = prog->aux->btf; + bool is_global; + u32 btf_id; + int err; + + if (!prog->aux->func_info) + return -EINVAL; + + btf_id = prog->aux->func_info[subprog].type_id; + if (!btf_id) + return -EFAULT; + + if (prog->aux->func_info_aux[subprog].unreliable) + return -EINVAL; + + is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; + err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global); + /* Compiler optimizations can remove arguments from static functions * or mismatched type can be passed into a global function. * In such cases mark the function as unreliable from BTF point of view. */ - prog->aux->func_info_aux[subprog].unreliable = true; - return -EINVAL; + if (err) + prog->aux->func_info_aux[subprog].unreliable = true; + return err; +} + +int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, + const struct btf *btf, u32 func_id, + struct bpf_reg_state *regs) +{ + return btf_check_func_arg_match(env, btf, func_id, regs, false); } /* Convert BTF of a function into bpf_reg_state if possible diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 75244ecb2389..f5423251c118 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -143,25 +143,25 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) if (!prog->aux->nr_linfo || !prog->jit_requested) return 0; - prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo, - sizeof(*prog->aux->jited_linfo), - GFP_KERNEL_ACCOUNT | __GFP_NOWARN); + prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo, + sizeof(*prog->aux->jited_linfo), + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!prog->aux->jited_linfo) return -ENOMEM; return 0; } -void bpf_prog_free_jited_linfo(struct bpf_prog *prog) +void bpf_prog_jit_attempt_done(struct bpf_prog *prog) { - kfree(prog->aux->jited_linfo); - prog->aux->jited_linfo = NULL; -} + if (prog->aux->jited_linfo && + (!prog->jited || !prog->aux->jited_linfo[0])) { + kvfree(prog->aux->jited_linfo); + prog->aux->jited_linfo = NULL; + } -void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog) -{ - if (prog->aux->jited_linfo && !prog->aux->jited_linfo[0]) - bpf_prog_free_jited_linfo(prog); + kfree(prog->aux->kfunc_tab); + prog->aux->kfunc_tab = NULL; } /* The jit engine is responsible to provide an array @@ -217,12 +217,6 @@ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, insn_to_jit_off[linfo[i].insn_off - insn_start - 1]; } -void bpf_prog_free_linfo(struct bpf_prog *prog) -{ - bpf_prog_free_jited_linfo(prog); - kvfree(prog->aux->linfo); -} - struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags) { @@ -1849,9 +1843,15 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) /* In case of BPF to BPF calls, verifier did all the prep * work with regards to JITing, etc. */ + bool jit_needed = false; + if (fp->bpf_func) goto finalize; + if (IS_ENABLED(CONFIG_BPF_JIT_ALWAYS_ON) || + bpf_prog_has_kfunc_call(fp)) + jit_needed = true; + bpf_prog_select_func(fp); /* eBPF JITs can rewrite the program in case constant @@ -1866,14 +1866,10 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) return fp; fp = bpf_int_jit_compile(fp); - if (!fp->jited) { - bpf_prog_free_jited_linfo(fp); -#ifdef CONFIG_BPF_JIT_ALWAYS_ON + bpf_prog_jit_attempt_done(fp); + if (!fp->jited && jit_needed) { *err = -ENOTSUPP; return fp; -#endif - } else { - bpf_prog_free_unused_jited_linfo(fp); } } else { *err = bpf_prog_offload_compile(fp); @@ -2354,6 +2350,11 @@ bool __weak bpf_jit_needs_zext(void) return false; } +bool __weak bpf_jit_supports_kfunc_call(void) +{ + return false; +} + /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. */ diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index 3acc7e0b6916..dad821c8ecd0 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -19,16 +19,23 @@ static const char *__func_get_name(const struct bpf_insn_cbs *cbs, { BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID); - if (insn->src_reg != BPF_PSEUDO_CALL && + if (!insn->src_reg && insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID && func_id_str[insn->imm]) return func_id_str[insn->imm]; - if (cbs && cbs->cb_call) - return cbs->cb_call(cbs->private_data, insn); + if (cbs && cbs->cb_call) { + const char *res; + + res = cbs->cb_call(cbs->private_data, insn); + if (res) + return res; + } if (insn->src_reg == BPF_PSEUDO_CALL) snprintf(buff, len, "%+d", insn->imm); + else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) + snprintf(buff, len, "kernel-function"); return buff; } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 074800226327..f306611c4ddf 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -382,8 +382,8 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { }; #ifdef CONFIG_CGROUP_BPF -DECLARE_PER_CPU(struct bpf_cgroup_storage*, - bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); +DECLARE_PER_CPU(struct bpf_cgroup_storage_info, + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) { @@ -392,10 +392,17 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) * verifier checks that its value is correct. */ enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); - struct bpf_cgroup_storage *storage; + struct bpf_cgroup_storage *storage = NULL; void *ptr; + int i; - storage = this_cpu_read(bpf_cgroup_storage[stype]); + for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { + if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) + continue; + + storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); + break; + } if (stype == BPF_CGROUP_STORAGE_SHARED) ptr = &READ_ONCE(storage->buf)->data[0]; diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 2d4f9ac12377..bd11db9774c3 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -9,10 +9,11 @@ #include <linux/slab.h> #include <uapi/linux/btf.h> -DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); - #ifdef CONFIG_CGROUP_BPF +DEFINE_PER_CPU(struct bpf_cgroup_storage_info, + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); + #include "../cgroup/cgroup-internal.h" #define LOCAL_STORAGE_CREATE_FLAG_MASK \ diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index cec792a17e5f..1b7b8a6f34ee 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -726,6 +726,9 @@ const struct bpf_map_ops trie_map_ops = { .map_lookup_elem = trie_lookup_elem, .map_update_elem = trie_update_elem, .map_delete_elem = trie_delete_elem, + .map_lookup_batch = generic_map_lookup_batch, + .map_update_batch = generic_map_update_batch, + .map_delete_batch = generic_map_delete_batch, .map_check_btf = trie_check_btf, .map_btf_name = "lpm_trie", .map_btf_id = &trie_map_btf_id, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 250503482cda..6428634da57e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1694,7 +1694,9 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) { bpf_prog_kallsyms_del_all(prog); btf_put(prog->aux->btf); - bpf_prog_free_linfo(prog); + kvfree(prog->aux->jited_linfo); + kvfree(prog->aux->linfo); + kfree(prog->aux->kfunc_tab); if (prog->aux->attach_btf) btf_put(prog->aux->attach_btf); @@ -2946,6 +2948,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) return BPF_PROG_TYPE_SK_MSG; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: + case BPF_SK_SKB_VERDICT: return BPF_PROG_TYPE_SK_SKB; case BPF_LIRC_MODE2: return BPF_PROG_TYPE_LIRC_MODE2; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 210169c25ead..852541a435ef 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -234,6 +234,12 @@ static bool bpf_pseudo_call(const struct bpf_insn *insn) insn->src_reg == BPF_PSEUDO_CALL; } +static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) +{ + return insn->code == (BPF_JMP | BPF_CALL) && + insn->src_reg == BPF_PSEUDO_KFUNC_CALL; +} + static bool bpf_pseudo_func(const struct bpf_insn *insn) { return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && @@ -1554,47 +1560,205 @@ static int add_subprog(struct bpf_verifier_env *env, int off) verbose(env, "too many subprograms\n"); return -E2BIG; } + /* determine subprog starts. The end is one before the next starts */ env->subprog_info[env->subprog_cnt++].start = off; sort(env->subprog_info, env->subprog_cnt, sizeof(env->subprog_info[0]), cmp_subprogs, NULL); return env->subprog_cnt - 1; } -static int check_subprogs(struct bpf_verifier_env *env) +struct bpf_kfunc_desc { + struct btf_func_model func_model; + u32 func_id; + s32 imm; +}; + +#define MAX_KFUNC_DESCS 256 +struct bpf_kfunc_desc_tab { + struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; + u32 nr_descs; +}; + +static int kfunc_desc_cmp_by_id(const void *a, const void *b) +{ + const struct bpf_kfunc_desc *d0 = a; + const struct bpf_kfunc_desc *d1 = b; + + /* func_id is not greater than BTF_MAX_TYPE */ + return d0->func_id - d1->func_id; +} + +static const struct bpf_kfunc_desc * +find_kfunc_desc(const struct bpf_prog *prog, u32 func_id) +{ + struct bpf_kfunc_desc desc = { + .func_id = func_id, + }; + struct bpf_kfunc_desc_tab *tab; + + tab = prog->aux->kfunc_tab; + return bsearch(&desc, tab->descs, tab->nr_descs, + sizeof(tab->descs[0]), kfunc_desc_cmp_by_id); +} + +static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) +{ + const struct btf_type *func, *func_proto; + struct bpf_kfunc_desc_tab *tab; + struct bpf_prog_aux *prog_aux; + struct bpf_kfunc_desc *desc; + const char *func_name; + unsigned long addr; + int err; + + prog_aux = env->prog->aux; + tab = prog_aux->kfunc_tab; + if (!tab) { + if (!btf_vmlinux) { + verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n"); + return -ENOTSUPP; + } + + if (!env->prog->jit_requested) { + verbose(env, "JIT is required for calling kernel function\n"); + return -ENOTSUPP; + } + + if (!bpf_jit_supports_kfunc_call()) { + verbose(env, "JIT does not support calling kernel function\n"); + return -ENOTSUPP; + } + + if (!env->prog->gpl_compatible) { + verbose(env, "cannot call kernel function from non-GPL compatible program\n"); + return -EINVAL; + } + + tab = kzalloc(sizeof(*tab), GFP_KERNEL); + if (!tab) + return -ENOMEM; + prog_aux->kfunc_tab = tab; + } + + if (find_kfunc_desc(env->prog, func_id)) + return 0; + + if (tab->nr_descs == MAX_KFUNC_DESCS) { + verbose(env, "too many different kernel function calls\n"); + return -E2BIG; + } + + func = btf_type_by_id(btf_vmlinux, func_id); + if (!func || !btf_type_is_func(func)) { + verbose(env, "kernel btf_id %u is not a function\n", + func_id); + return -EINVAL; + } + func_proto = btf_type_by_id(btf_vmlinux, func->type); + if (!func_proto || !btf_type_is_func_proto(func_proto)) { + verbose(env, "kernel function btf_id %u does not have a valid func_proto\n", + func_id); + return -EINVAL; + } + + func_name = btf_name_by_offset(btf_vmlinux, func->name_off); + addr = kallsyms_lookup_name(func_name); + if (!addr) { + verbose(env, "cannot find address for kernel function %s\n", + func_name); + return -EINVAL; + } + + desc = &tab->descs[tab->nr_descs++]; + desc->func_id = func_id; + desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base; + err = btf_distill_func_proto(&env->log, btf_vmlinux, + func_proto, func_name, + &desc->func_model); + if (!err) + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_desc_cmp_by_id, NULL); + return err; +} + +static int kfunc_desc_cmp_by_imm(const void *a, const void *b) +{ + const struct bpf_kfunc_desc *d0 = a; + const struct bpf_kfunc_desc *d1 = b; + + if (d0->imm > d1->imm) + return 1; + else if (d0->imm < d1->imm) + return -1; + return 0; +} + +static void sort_kfunc_descs_by_imm(struct bpf_prog *prog) +{ + struct bpf_kfunc_desc_tab *tab; + + tab = prog->aux->kfunc_tab; + if (!tab) + return; + + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_desc_cmp_by_imm, NULL); +} + +bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) +{ + return !!prog->aux->kfunc_tab; +} + +const struct btf_func_model * +bpf_jit_find_kfunc_model(const struct bpf_prog *prog, + const struct bpf_insn *insn) +{ + const struct bpf_kfunc_desc desc = { + .imm = insn->imm, + }; + const struct bpf_kfunc_desc *res; + struct bpf_kfunc_desc_tab *tab; + + tab = prog->aux->kfunc_tab; + res = bsearch(&desc, tab->descs, tab->nr_descs, + sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm); + + return res ? &res->func_model : NULL; +} + +static int add_subprog_and_kfunc(struct bpf_verifier_env *env) { - int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn *insn = env->prog->insnsi; - int insn_cnt = env->prog->len; + int i, ret, insn_cnt = env->prog->len; /* Add entry function. */ ret = add_subprog(env, 0); - if (ret < 0) + if (ret) return ret; - /* determine subprog starts. The end is one before the next starts */ - for (i = 0; i < insn_cnt; i++) { - if (bpf_pseudo_func(insn + i)) { - if (!env->bpf_capable) { - verbose(env, - "function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); - return -EPERM; - } - ret = add_subprog(env, i + insn[i].imm + 1); - if (ret < 0) - return ret; - /* remember subprog */ - insn[i + 1].imm = ret; - continue; - } - if (!bpf_pseudo_call(insn + i)) + for (i = 0; i < insn_cnt; i++, insn++) { + if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) && + !bpf_pseudo_kfunc_call(insn)) continue; + if (!env->bpf_capable) { - verbose(env, - "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); + verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); return -EPERM; } - ret = add_subprog(env, i + insn[i].imm + 1); + + if (bpf_pseudo_func(insn)) { + ret = add_subprog(env, i + insn->imm + 1); + if (ret >= 0) + /* remember subprog */ + insn[1].imm = ret; + } else if (bpf_pseudo_call(insn)) { + ret = add_subprog(env, i + insn->imm + 1); + } else { + ret = add_kfunc_call(env, insn->imm); + } + if (ret < 0) return ret; } @@ -1608,6 +1772,16 @@ static int check_subprogs(struct bpf_verifier_env *env) for (i = 0; i < env->subprog_cnt; i++) verbose(env, "func#%d @%d\n", i, subprog[i].start); + return 0; +} + +static int check_subprogs(struct bpf_verifier_env *env) +{ + int i, subprog_start, subprog_end, off, cur_subprog = 0; + struct bpf_subprog_info *subprog = env->subprog_info; + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + /* now check that all jumps are within the same subprog */ subprog_start = subprog[cur_subprog].start; subprog_end = subprog[cur_subprog + 1].start; @@ -1916,6 +2090,17 @@ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, return i; } +static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) +{ + const struct btf_type *func; + + if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) + return NULL; + + func = btf_type_by_id(btf_vmlinux, insn->imm); + return btf_name_by_offset(btf_vmlinux, func->name_off); +} + /* For given verifier state backtrack_insn() is called from the last insn to * the first insn. Its purpose is to compute a bitmask of registers and * stack slots that needs precision in the parent verifier state. @@ -1924,6 +2109,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, u32 *reg_mask, u64 *stack_mask) { const struct bpf_insn_cbs cbs = { + .cb_call = disasm_kfunc_name, .cb_print = verbose, .private_data = env, }; @@ -5365,7 +5551,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn func_info_aux = env->prog->aux->func_info_aux; if (func_info_aux) is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, subprog, caller->regs); + err = btf_check_subprog_arg_match(env, subprog, caller->regs); if (err == -EFAULT) return err; if (is_global) { @@ -5960,6 +6146,98 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return 0; } +/* mark_btf_func_reg_size() is used when the reg size is determined by + * the BTF func_proto's return value size and argument. + */ +static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, + size_t reg_size) +{ + struct bpf_reg_state *reg = &cur_regs(env)[regno]; + + if (regno == BPF_REG_0) { + /* Function return value */ + reg->live |= REG_LIVE_WRITTEN; + reg->subreg_def = reg_size == sizeof(u64) ? + DEF_NOT_SUBREG : env->insn_idx + 1; + } else { + /* Function argument */ + if (reg_size == sizeof(u64)) { + mark_insn_zext(env, reg); + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); + } else { + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32); + } + } +} + +static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) +{ + const struct btf_type *t, *func, *func_proto, *ptr_type; + struct bpf_reg_state *regs = cur_regs(env); + const char *func_name, *ptr_type_name; + u32 i, nargs, func_id, ptr_type_id; + const struct btf_param *args; + int err; + + func_id = insn->imm; + func = btf_type_by_id(btf_vmlinux, func_id); + func_name = btf_name_by_offset(btf_vmlinux, func->name_off); + func_proto = btf_type_by_id(btf_vmlinux, func->type); + + if (!env->ops->check_kfunc_call || + !env->ops->check_kfunc_call(func_id)) { + verbose(env, "calling kernel function %s is not allowed\n", + func_name); + return -EACCES; + } + + /* Check the arguments */ + err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs); + if (err) + return err; + + for (i = 0; i < CALLER_SAVED_REGS; i++) + mark_reg_not_init(env, regs, caller_saved[i]); + + /* Check return type */ + t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL); + if (btf_type_is_scalar(t)) { + mark_reg_unknown(env, regs, BPF_REG_0); + mark_btf_func_reg_size(env, BPF_REG_0, t->size); + } else if (btf_type_is_ptr(t)) { + ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type, + &ptr_type_id); + if (!btf_type_is_struct(ptr_type)) { + ptr_type_name = btf_name_by_offset(btf_vmlinux, + ptr_type->name_off); + verbose(env, "kernel function %s returns pointer type %s %s is not supported\n", + func_name, btf_type_str(ptr_type), + ptr_type_name); + return -EINVAL; + } + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].btf = btf_vmlinux; + regs[BPF_REG_0].type = PTR_TO_BTF_ID; + regs[BPF_REG_0].btf_id = ptr_type_id; + mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); + } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ + + nargs = btf_type_vlen(func_proto); + args = (const struct btf_param *)(func_proto + 1); + for (i = 0; i < nargs; i++) { + u32 regno = i + 1; + + t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL); + if (btf_type_is_ptr(t)) + mark_btf_func_reg_size(env, regno, sizeof(void *)); + else + /* scalar. ensured by btf_check_kfunc_arg_match() */ + mark_btf_func_reg_size(env, regno, t->size); + } + + return 0; +} + static bool signed_add_overflows(s64 a, s64 b) { /* Do the add in u64, where overflow is well-defined */ @@ -6062,19 +6340,6 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, else *ptr_limit = -off - 1; return *ptr_limit >= max ? -ERANGE : 0; - case PTR_TO_MAP_KEY: - /* Currently, this code is not exercised as the only use - * is bpf_for_each_map_elem() helper which requires - * bpf_capble. The code has been tested manually for - * future use. - */ - if (mask_to_left) { - *ptr_limit = ptr_reg->umax_value + ptr_reg->off; - } else { - off = ptr_reg->smin_value + ptr_reg->off; - *ptr_limit = ptr_reg->map_ptr->key_size - off; - } - return 0; case PTR_TO_MAP_VALUE: max = ptr_reg->map_ptr->value_size; if (mask_to_left) { @@ -6281,7 +6546,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; - case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", @@ -10176,6 +10440,7 @@ static int do_check(struct bpf_verifier_env *env) if (env->log.level & BPF_LOG_LEVEL) { const struct bpf_insn_cbs cbs = { + .cb_call = disasm_kfunc_name, .cb_print = verbose, .private_data = env, }; @@ -10323,7 +10588,8 @@ static int do_check(struct bpf_verifier_env *env) if (BPF_SRC(insn->code) != BPF_K || insn->off != 0 || (insn->src_reg != BPF_REG_0 && - insn->src_reg != BPF_PSEUDO_CALL) || + insn->src_reg != BPF_PSEUDO_CALL && + insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) { verbose(env, "BPF_CALL uses reserved fields\n"); @@ -10338,6 +10604,8 @@ static int do_check(struct bpf_verifier_env *env) } if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); + else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) + err = check_kfunc_call(env, insn); else err = check_helper_call(env, insn, &env->insn_idx); if (err) @@ -11648,6 +11916,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; + func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; func[i]->aux->linfo = prog->aux->linfo; func[i]->aux->nr_linfo = prog->aux->nr_linfo; func[i]->aux->jited_linfo = prog->aux->jited_linfo; @@ -11755,7 +12024,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) prog->bpf_func = func[0]->bpf_func; prog->aux->func = func; prog->aux->func_cnt = env->subprog_cnt; - bpf_prog_free_unused_jited_linfo(prog); + bpf_prog_jit_attempt_done(prog); return 0; out_free: for (i = 0; i < env->subprog_cnt; i++) { @@ -11778,7 +12047,7 @@ out_undo_insn: insn->off = 0; insn->imm = env->insn_aux_data[i].call_imm; } - bpf_prog_free_jited_linfo(prog); + bpf_prog_jit_attempt_done(prog); return err; } @@ -11787,6 +12056,7 @@ static int fixup_call_args(struct bpf_verifier_env *env) #ifndef CONFIG_BPF_JIT_ALWAYS_ON struct bpf_prog *prog = env->prog; struct bpf_insn *insn = prog->insnsi; + bool has_kfunc_call = bpf_prog_has_kfunc_call(prog); int i, depth; #endif int err = 0; @@ -11800,6 +12070,10 @@ static int fixup_call_args(struct bpf_verifier_env *env) return err; } #ifndef CONFIG_BPF_JIT_ALWAYS_ON + if (has_kfunc_call) { + verbose(env, "calling kernel functions are not allowed in non-JITed programs\n"); + return -EINVAL; + } if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) { /* When JIT fails the progs with bpf2bpf calls and tail_calls * have to be rejected, since interpreter doesn't support them yet. @@ -11828,6 +12102,26 @@ static int fixup_call_args(struct bpf_verifier_env *env) return err; } +static int fixup_kfunc_call(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + const struct bpf_kfunc_desc *desc; + + /* insn->imm has the btf func_id. Replace it with + * an address (relative to __bpf_base_call). + */ + desc = find_kfunc_desc(env->prog, insn->imm); + if (!desc) { + verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n", + insn->imm); + return -EFAULT; + } + + insn->imm = desc->imm; + + return 0; +} + /* Do various post-verification rewrites in a single program pass. * These rewrites simplify JIT and interpreter implementations. */ @@ -11963,6 +12257,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env) continue; if (insn->src_reg == BPF_PSEUDO_CALL) continue; + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + ret = fixup_kfunc_call(env, insn); + if (ret) + return ret; + continue; + } if (insn->imm == BPF_FUNC_get_route_realm) prog->dst_needed = 1; @@ -12192,6 +12492,8 @@ patch_call_imm: } } + sort_kfunc_descs_by_imm(env->prog); + return 0; } @@ -12302,7 +12604,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) /* 1st arg to a function */ regs[BPF_REG_1].type = PTR_TO_CTX; mark_reg_known_zero(env, regs, BPF_REG_1); - ret = btf_check_func_arg_match(env, subprog, regs); + ret = btf_check_subprog_arg_match(env, subprog, regs); if (ret == -EFAULT) /* unlikely verifier bug. abort. * ret == 0 and ret < 0 are sadly acceptable for @@ -12897,6 +13199,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (!env->explored_states) goto skip_full_check; + ret = add_subprog_and_kfunc(env); + if (ret < 0) + goto skip_full_check; + ret = check_subprogs(env); if (ret < 0) goto skip_full_check; |