summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/bpf_design_QA.rst15
-rw-r--r--arch/x86/net/bpf_jit_comp.c5
-rw-r--r--arch/x86/net/bpf_jit_comp32.c198
-rw-r--r--drivers/net/veth.c12
-rw-r--r--include/linux/bpf-cgroup.h57
-rw-r--r--include/linux/bpf.h58
-rw-r--r--include/linux/btf.h6
-rw-r--r--include/linux/filter.h13
-rw-r--r--include/linux/skbuff.h1
-rw-r--r--include/linux/skmsg.h77
-rw-r--r--include/net/bpf_sk_storage.h1
-rw-r--r--include/net/sock.h3
-rw-r--r--include/net/tcp.h3
-rw-r--r--include/net/udp.h3
-rw-r--r--include/uapi/linux/bpf.h5
-rw-r--r--kernel/bpf/btf.c219
-rw-r--r--kernel/bpf/core.c47
-rw-r--r--kernel/bpf/disasm.c13
-rw-r--r--kernel/bpf/helpers.c15
-rw-r--r--kernel/bpf/local_storage.c5
-rw-r--r--kernel/bpf/lpm_trie.c3
-rw-r--r--kernel/bpf/syscall.c5
-rw-r--r--kernel/bpf/verifier.c390
-rw-r--r--net/bpf/test_run.c34
-rw-r--r--net/core/filter.c1
-rw-r--r--net/core/skbuff.c55
-rw-r--r--net/core/skmsg.c177
-rw-r--r--net/core/sock_map.c118
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/bpf_tcp_ca.c43
-rw-r--r--net/ipv4/tcp_bpf.c130
-rw-r--r--net/ipv4/tcp_cubic.c24
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/udp.c32
-rw-r--r--net/ipv4/udp_bpf.c79
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/tls/tls_sw.c4
-rw-r--r--samples/bpf/sampleip_kern.c1
-rw-r--r--samples/bpf/trace_event_kern.c1
-rw-r--r--samples/bpf/xdpsock_user.c55
-rw-r--r--tools/bpf/bpftool/common.c1
-rw-r--r--tools/bpf/bpftool/prog.c1
-rw-r--r--tools/bpf/resolve_btfids/main.c11
-rw-r--r--tools/include/uapi/linux/bpf.h5
-rw-r--r--tools/lib/bpf/libbpf.c403
-rw-r--r--tools/lib/bpf/libbpf.h5
-rw-r--r--tools/lib/bpf/libbpf.map1
-rw-r--r--tools/lib/bpf/linker.c37
-rw-r--r--tools/lib/bpf/xsk.c258
-rw-r--r--tools/testing/selftests/bpf/README.rst14
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h29
-rw-r--r--tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c158
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c59
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c136
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_ima.c6
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c36
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c22
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test.c47
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c42
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c18
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh3
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c12
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c10
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh39
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c700
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.h49
70 files changed, 2944 insertions, 1139 deletions
diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
index 0e15f9b05c9d..437de2a7a5de 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -258,3 +258,18 @@ Q: Can BPF functionality such as new program or map types, new
helpers, etc be added out of kernel module code?
A: NO.
+
+Q: Directly calling kernel function is an ABI?
+----------------------------------------------
+Q: Some kernel functions (e.g. tcp_slow_start) can be called
+by BPF programs. Do these kernel functions become an ABI?
+
+A: NO.
+
+The kernel function protos will change and the bpf programs will be
+rejected by the verifier. Also, for example, some of the bpf-callable
+kernel functions have already been used by other kernel tcp
+cc (congestion-control) implementations. If any of these kernel
+functions has changed, both the in-tree and out-of-tree kernel tcp cc
+implementations have to be changed. The same goes for the bpf
+programs and they have to be adjusted accordingly.
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b35fc8023884..9eead60f0301 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2346,3 +2346,8 @@ out:
tmp : orig_prog);
return prog;
}
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+ return true;
+}
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index d17b67c69f89..0a7a2870f111 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1390,6 +1390,19 @@ static inline void emit_push_r64(const u8 src[], u8 **pprog)
*pprog = prog;
}
+static void emit_push_r32(const u8 src[], u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
+ /* push ecx */
+ EMIT1(0x51);
+
+ *pprog = prog;
+}
+
static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
{
u8 jmp_cond;
@@ -1459,6 +1472,174 @@ static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
return jmp_cond;
}
+/* i386 kernel compiles with "-mregparm=3". From gcc document:
+ *
+ * ==== snippet ====
+ * regparm (number)
+ * On x86-32 targets, the regparm attribute causes the compiler
+ * to pass arguments number one to (number) if they are of integral
+ * type in registers EAX, EDX, and ECX instead of on the stack.
+ * Functions that take a variable number of arguments continue
+ * to be passed all of their arguments on the stack.
+ * ==== snippet ====
+ *
+ * The first three args of a function will be considered for
+ * putting into the 32bit register EAX, EDX, and ECX.
+ *
+ * Two 32bit registers are used to pass a 64bit arg.
+ *
+ * For example,
+ * void foo(u32 a, u32 b, u32 c, u32 d):
+ * u32 a: EAX
+ * u32 b: EDX
+ * u32 c: ECX
+ * u32 d: stack
+ *
+ * void foo(u64 a, u32 b, u32 c):
+ * u64 a: EAX (lo32) EDX (hi32)
+ * u32 b: ECX
+ * u32 c: stack
+ *
+ * void foo(u32 a, u64 b, u32 c):
+ * u32 a: EAX
+ * u64 b: EDX (lo32) ECX (hi32)
+ * u32 c: stack
+ *
+ * void foo(u32 a, u32 b, u64 c):
+ * u32 a: EAX
+ * u32 b: EDX
+ * u64 c: stack
+ *
+ * The return value will be stored in the EAX (and EDX for 64bit value).
+ *
+ * For example,
+ * u32 foo(u32 a, u32 b, u32 c):
+ * return value: EAX
+ *
+ * u64 foo(u32 a, u32 b, u32 c):
+ * return value: EAX (lo32) EDX (hi32)
+ *
+ * Notes:
+ * The verifier only accepts function having integer and pointers
+ * as its args and return value, so it does not have
+ * struct-by-value.
+ *
+ * emit_kfunc_call() finds out the btf_func_model by calling
+ * bpf_jit_find_kfunc_model(). A btf_func_model
+ * has the details about the number of args, size of each arg,
+ * and the size of the return value.
+ *
+ * It first decides how many args can be passed by EAX, EDX, and ECX.
+ * That will decide what args should be pushed to the stack:
+ * [first_stack_regno, last_stack_regno] are the bpf regnos
+ * that should be pushed to the stack.
+ *
+ * It will first push all args to the stack because the push
+ * will need to use ECX. Then, it moves
+ * [BPF_REG_1, first_stack_regno) to EAX, EDX, and ECX.
+ *
+ * When emitting a call (0xE8), it needs to figure out
+ * the jmp_offset relative to the jit-insn address immediately
+ * following the call (0xE8) instruction. At this point, it knows
+ * the end of the jit-insn address after completely translated the
+ * current (BPF_JMP | BPF_CALL) bpf-insn. It is passed as "end_addr"
+ * to the emit_kfunc_call(). Thus, it can learn the "immediate-follow-call"
+ * address by figuring out how many jit-insn is generated between
+ * the call (0xE8) and the end_addr:
+ * - 0-1 jit-insn (3 bytes each) to restore the esp pointer if there
+ * is arg pushed to the stack.
+ * - 0-2 jit-insns (3 bytes each) to handle the return value.
+ */
+static int emit_kfunc_call(const struct bpf_prog *bpf_prog, u8 *end_addr,
+ const struct bpf_insn *insn, u8 **pprog)
+{
+ const u8 arg_regs[] = { IA32_EAX, IA32_EDX, IA32_ECX };
+ int i, cnt = 0, first_stack_regno, last_stack_regno;
+ int free_arg_regs = ARRAY_SIZE(arg_regs);
+ const struct btf_func_model *fm;
+ int bytes_in_stack = 0;
+ const u8 *cur_arg_reg;
+ u8 *prog = *pprog;
+ s64 jmp_offset;
+
+ fm = bpf_jit_find_kfunc_model(bpf_prog, insn);
+ if (!fm)
+ return -EINVAL;
+
+ first_stack_regno = BPF_REG_1;
+ for (i = 0; i < fm->nr_args; i++) {
+ int regs_needed = fm->arg_size[i] > sizeof(u32) ? 2 : 1;
+
+ if (regs_needed > free_arg_regs)
+ break;
+
+ free_arg_regs -= regs_needed;
+ first_stack_regno++;
+ }
+
+ /* Push the args to the stack */
+ last_stack_regno = BPF_REG_0 + fm->nr_args;
+ for (i = last_stack_regno; i >= first_stack_regno; i--) {
+ if (fm->arg_size[i - 1] > sizeof(u32)) {
+ emit_push_r64(bpf2ia32[i], &prog);
+ bytes_in_stack += 8;
+ } else {
+ emit_push_r32(bpf2ia32[i], &prog);
+ bytes_in_stack += 4;
+ }
+ }
+
+ cur_arg_reg = &arg_regs[0];
+ for (i = BPF_REG_1; i < first_stack_regno; i++) {
+ /* mov e[adc]x,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
+ STACK_VAR(bpf2ia32[i][0]));
+ if (fm->arg_size[i - 1] > sizeof(u32))
+ /* mov e[adc]x,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
+ STACK_VAR(bpf2ia32[i][1]));
+ }
+
+ if (bytes_in_stack)
+ /* add esp,"bytes_in_stack" */
+ end_addr -= 3;
+
+ /* mov dword ptr [ebp+off],edx */
+ if (fm->ret_size > sizeof(u32))
+ end_addr -= 3;
+
+ /* mov dword ptr [ebp+off],eax */
+ if (fm->ret_size)
+ end_addr -= 3;
+
+ jmp_offset = (u8 *)__bpf_call_base + insn->imm - end_addr;
+ if (!is_simm32(jmp_offset)) {
+ pr_err("unsupported BPF kernel function jmp_offset:%lld\n",
+ jmp_offset);
+ return -EINVAL;
+ }
+
+ EMIT1_off32(0xE8, jmp_offset);
+
+ if (fm->ret_size)
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(bpf2ia32[BPF_REG_0][0]));
+
+ if (fm->ret_size > sizeof(u32))
+ /* mov dword ptr [ebp+off],edx */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(bpf2ia32[BPF_REG_0][1]));
+
+ if (bytes_in_stack)
+ /* add esp,"bytes_in_stack" */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ESP), bytes_in_stack);
+
+ *pprog = prog;
+
+ return 0;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
@@ -1888,6 +2069,18 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
if (insn->src_reg == BPF_PSEUDO_CALL)
goto notyet;
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ int err;
+
+ err = emit_kfunc_call(bpf_prog,
+ image + addrs[i],
+ insn, &prog);
+
+ if (err)
+ return err;
+ break;
+ }
+
func = (u8 *) __bpf_call_base + imm32;
jmp_offset = func - (image + addrs[i]);
@@ -2393,3 +2586,8 @@ out:
tmp : orig_prog);
return prog;
}
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+ return true;
+}
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 91b73db37555..9e525646df1d 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -218,6 +218,17 @@ static void veth_get_ethtool_stats(struct net_device *dev,
}
}
+static void veth_get_channels(struct net_device *dev,
+ struct ethtool_channels *channels)
+{
+ channels->tx_count = dev->real_num_tx_queues;
+ channels->rx_count = dev->real_num_rx_queues;
+ channels->max_tx = dev->real_num_tx_queues;
+ channels->max_rx = dev->real_num_rx_queues;
+ channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
+ channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
+}
+
static const struct ethtool_ops veth_ethtool_ops = {
.get_drvinfo = veth_get_drvinfo,
.get_link = ethtool_op_get_link,
@@ -226,6 +237,7 @@ static const struct ethtool_ops veth_ethtool_ops = {
.get_ethtool_stats = veth_get_ethtool_stats,
.get_link_ksettings = veth_get_link_ksettings,
.get_ts_info = ethtool_op_get_ts_info,
+ .get_channels = veth_get_channels,
};
/* general routines */
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index c42e02b4d84b..6a29fe11485d 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -20,14 +20,25 @@ struct bpf_sock_ops_kern;
struct bpf_cgroup_storage;
struct ctl_table;
struct ctl_table_header;
+struct task_struct;
#ifdef CONFIG_CGROUP_BPF
extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
-DECLARE_PER_CPU(struct bpf_cgroup_storage*,
- bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+#define BPF_CGROUP_STORAGE_NEST_MAX 8
+
+struct bpf_cgroup_storage_info {
+ struct task_struct *task;
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+};
+
+/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
+ * to use bpf cgroup storage simultaneously.
+ */
+DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
+ bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
#define for_each_cgroup_storage_type(stype) \
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
@@ -161,13 +172,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
return BPF_CGROUP_STORAGE_SHARED;
}
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
- *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
+static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
+ *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
{
enum bpf_cgroup_storage_type stype;
+ int i, err = 0;
+
+ preempt_disable();
+ for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+ if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
+ continue;
+
+ this_cpu_write(bpf_cgroup_storage_info[i].task, current);
+ for_each_cgroup_storage_type(stype)
+ this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
+ storage[stype]);
+ goto out;
+ }
+ err = -EBUSY;
+ WARN_ON_ONCE(1);
+
+out:
+ preempt_enable();
+ return err;
+}
+
+static inline void bpf_cgroup_storage_unset(void)
+{
+ int i;
+
+ for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+ if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
+ continue;
- for_each_cgroup_storage_type(stype)
- this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
+ this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
+ return;
+ }
}
struct bpf_cgroup_storage *
@@ -448,8 +488,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
return -EINVAL;
}
-static inline void bpf_cgroup_storage_set(
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
+static inline int bpf_cgroup_storage_set(
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
+static inline void bpf_cgroup_storage_unset(void) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 39dce9d3c3a5..9fdd839b418c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -56,7 +56,7 @@ struct bpf_iter_seq_info {
u32 seq_priv_size;
};
-/* map is generic key/value storage optionally accesible by eBPF programs */
+/* map is generic key/value storage optionally accessible by eBPF programs */
struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
int (*map_alloc_check)(union bpf_attr *attr);
@@ -427,6 +427,7 @@ enum bpf_reg_type {
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
PTR_TO_FUNC, /* reg points to a bpf program function */
PTR_TO_MAP_KEY, /* reg points to a map element key */
+ __BPF_REG_TYPE_MAX,
};
/* The information passed from prog-specific *_is_valid_access
@@ -480,6 +481,7 @@ struct bpf_verifier_ops {
const struct btf_type *t, int off, int size,
enum bpf_access_type atype,
u32 *next_btf_id);
+ bool (*check_kfunc_call)(u32 kfunc_btf_id);
};
struct bpf_prog_offload_ops {
@@ -796,6 +798,8 @@ struct btf_mod_pair {
struct module *module;
};
+struct bpf_kfunc_desc_tab;
+
struct bpf_prog_aux {
atomic64_t refcnt;
u32 used_map_cnt;
@@ -832,6 +836,7 @@ struct bpf_prog_aux {
struct bpf_prog **func;
void *jit_data; /* JIT specific data. arch dependent */
struct bpf_jit_poke_descriptor *poke_tab;
+ struct bpf_kfunc_desc_tab *kfunc_tab;
u32 size_poke_tab;
struct bpf_ksym ksym;
const struct bpf_prog_ops *ops;
@@ -1106,6 +1111,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
/* BPF program asks to set CN on the packet. */
#define BPF_RET_SET_CN (1 << 0)
+/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
+ * if bpf_cgroup_storage_set() failed, the rest of programs
+ * will not execute. This should be a really rare scenario
+ * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
+ * preemptions all between bpf_cgroup_storage_set() and
+ * bpf_cgroup_storage_unset() on the same cpu.
+ */
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
({ \
struct bpf_prog_array_item *_item; \
@@ -1118,10 +1130,12 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
- bpf_cgroup_storage_set(_item->cgroup_storage); \
+ if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
+ break; \
func_ret = func(_prog, ctx); \
_ret &= (func_ret & 1); \
*(ret_flags) |= (func_ret >> 1); \
+ bpf_cgroup_storage_unset(); \
_item++; \
} \
rcu_read_unlock(); \
@@ -1142,9 +1156,14 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
goto _out; \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
- if (set_cg_storage) \
- bpf_cgroup_storage_set(_item->cgroup_storage); \
- _ret &= func(_prog, ctx); \
+ if (!set_cg_storage) { \
+ _ret &= func(_prog, ctx); \
+ } else { \
+ if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
+ break; \
+ _ret &= func(_prog, ctx); \
+ bpf_cgroup_storage_unset(); \
+ } \
_item++; \
} \
_out: \
@@ -1513,6 +1532,7 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+bool bpf_prog_test_check_kfunc_call(u32 kfunc_id);
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
@@ -1531,8 +1551,11 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
struct btf_func_model *m);
struct bpf_reg_state;
-int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *regs);
+int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
+ struct bpf_reg_state *regs);
+int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
+ const struct btf *btf, u32 func_id,
+ struct bpf_reg_state *regs);
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *reg);
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
@@ -1543,6 +1566,10 @@ struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
void bpf_task_storage_free(struct task_struct *task);
+bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
+const struct btf_func_model *
+bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+ const struct bpf_insn *insn);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -1705,6 +1732,11 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
return -ENOTSUPP;
}
+static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
+{
+ return false;
+}
+
static inline void bpf_map_put(struct bpf_map *map)
{
}
@@ -1723,6 +1755,18 @@ bpf_base_func_proto(enum bpf_func_id func_id)
static inline void bpf_task_storage_free(struct task_struct *task)
{
}
+
+static inline bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
+{
+ return false;
+}
+
+static inline const struct btf_func_model *
+bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+ const struct bpf_insn *insn)
+{
+ return NULL;
+}
#endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 9c1b52738bbe..3bac66e0183a 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -110,6 +110,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf,
const struct btf_type *
btf_resolve_size(const struct btf *btf, const struct btf_type *type,
u32 *type_size);
+const char *btf_type_str(const struct btf_type *t);
#define for_each_member(i, struct_type, member) \
for (i = 0, member = btf_type_member(struct_type); \
@@ -141,6 +142,11 @@ static inline bool btf_type_is_enum(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM;
}
+static inline bool btf_type_is_scalar(const struct btf_type *t)
+{
+ return btf_type_is_int(t) || btf_type_is_enum(t);
+}
+
static inline bool btf_type_is_typedef(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index b2b85b2cad8e..9a09547bc7ba 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -877,8 +877,7 @@ void bpf_prog_free_linfo(struct bpf_prog *prog);
void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
const u32 *insn_to_jit_off);
int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog);
-void bpf_prog_free_jited_linfo(struct bpf_prog *prog);
-void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog);
+void bpf_prog_jit_attempt_done(struct bpf_prog *prog);
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags);
@@ -919,6 +918,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
bool bpf_jit_needs_zext(void);
+bool bpf_jit_supports_kfunc_call(void);
bool bpf_helper_changes_pkt_data(void *func);
static inline bool bpf_dump_raw_ok(const struct cred *cred)
@@ -1246,15 +1246,6 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
int k, unsigned int size);
-static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
- unsigned int size, void *buffer)
-{
- if (k >= 0)
- return skb_header_pointer(skb, k, size, buffer);
-
- return bpf_internal_load_pointer_neg_helper(skb, k, size);
-}
-
static inline int bpf_tell_extensions(void)
{
return SKF_AD_MAX;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c8def85fcc22..dbf820a50a39 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3626,6 +3626,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
unsigned int flags);
int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
int len);
+int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 6c09d94be2e9..f78e90a04a69 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -58,6 +58,7 @@ struct sk_psock_progs {
struct bpf_prog *msg_parser;
struct bpf_prog *stream_parser;
struct bpf_prog *stream_verdict;
+ struct bpf_prog *skb_verdict;
};
enum sk_psock_state_bits {
@@ -89,6 +90,7 @@ struct sk_psock {
#endif
struct sk_buff_head ingress_skb;
struct list_head ingress_msg;
+ spinlock_t ingress_lock;
unsigned long state;
struct list_head link;
spinlock_t link_lock;
@@ -97,13 +99,12 @@ struct sk_psock {
void (*saved_close)(struct sock *sk, long timeout);
void (*saved_write_space)(struct sock *sk);
void (*saved_data_ready)(struct sock *sk);
+ int (*psock_update_sk_prot)(struct sock *sk, bool restore);
struct proto *sk_proto;
+ struct mutex work_mutex;
struct sk_psock_work_state work_state;
struct work_struct work;
- union {
- struct rcu_head rcu;
- struct work_struct gc;
- };
+ struct rcu_work rwork;
};
int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
@@ -124,6 +125,10 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
+int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
+ long timeo, int *err);
+int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ int len, int flags);
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
{
@@ -284,7 +289,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
static inline void sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
+ spin_lock_bh(&psock->ingress_lock);
list_add_tail(&msg->list, &psock->ingress_msg);
+ spin_unlock_bh(&psock->ingress_lock);
+}
+
+static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
+{
+ struct sk_msg *msg;
+
+ spin_lock_bh(&psock->ingress_lock);
+ msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+ if (msg)
+ list_del(&msg->list);
+ spin_unlock_bh(&psock->ingress_lock);
+ return msg;
+}
+
+static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock)
+{
+ struct sk_msg *msg;
+
+ spin_lock_bh(&psock->ingress_lock);
+ msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+ spin_unlock_bh(&psock->ingress_lock);
+ return msg;
+}
+
+static inline struct sk_msg *sk_psock_next_msg(struct sk_psock *psock,
+ struct sk_msg *msg)
+{
+ struct sk_msg *ret;
+
+ spin_lock_bh(&psock->ingress_lock);
+ if (list_is_last(&msg->list, &psock->ingress_msg))
+ ret = NULL;
+ else
+ ret = list_next_entry(msg, list);
+ spin_unlock_bh(&psock->ingress_lock);
+ return ret;
}
static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
@@ -292,6 +335,13 @@ static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
return psock ? list_empty(&psock->ingress_msg) : true;
}
+static inline void kfree_sk_msg(struct sk_msg *msg)
+{
+ if (msg->skb)
+ consume_skb(msg->skb);
+ kfree(msg);
+}
+
static inline void sk_psock_report_error(struct sk_psock *psock, int err)
{
struct sock *sk = psock->sk;
@@ -301,6 +351,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
}
struct sk_psock *sk_psock_init(struct sock *sk, int node);
+void sk_psock_stop(struct sk_psock *psock, bool wait);
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
@@ -349,25 +400,12 @@ static inline void sk_psock_cork_free(struct sk_psock *psock)
}
}
-static inline void sk_psock_update_proto(struct sock *sk,
- struct sk_psock *psock,
- struct proto *ops)
-{
- /* Pairs with lockless read in sk_clone_lock() */
- WRITE_ONCE(sk->sk_prot, ops);
-}
-
static inline void sk_psock_restore_proto(struct sock *sk,
struct sk_psock *psock)
{
sk->sk_prot->unhash = psock->saved_unhash;
- if (inet_csk_has_ulp(sk)) {
- tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
- } else {
- sk->sk_write_space = psock->saved_write_space;
- /* Pairs with lockless read in sk_clone_lock() */
- WRITE_ONCE(sk->sk_prot, psock->sk_proto);
- }
+ if (psock->psock_update_sk_prot)
+ psock->psock_update_sk_prot(sk, true);
}
static inline void sk_psock_set_state(struct sk_psock *psock,
@@ -442,6 +480,7 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
psock_set_prog(&progs->msg_parser, NULL);
psock_set_prog(&progs->stream_parser, NULL);
psock_set_prog(&progs->stream_verdict, NULL);
+ psock_set_prog(&progs->skb_verdict, NULL);
}
int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 0e85713f56df..2926f1f00d65 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -27,7 +27,6 @@ struct bpf_local_storage_elem;
struct bpf_sk_storage_diag;
struct sk_buff;
struct nlattr;
-struct sock;
#ifdef CONFIG_BPF_SYSCALL
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
diff --git a/include/net/sock.h b/include/net/sock.h
index 0b6266fd6bf6..8b4155e756c2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1184,6 +1184,9 @@ struct proto {
void (*unhash)(struct sock *sk);
void (*rehash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
+#ifdef CONFIG_BPF_SYSCALL
+ int (*psock_update_sk_prot)(struct sock *sk, bool restore);
+#endif
/* Keeping track of sockets in use */
#ifdef CONFIG_PROC_FS
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 075de26f449d..31b1696c62ba 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2203,13 +2203,12 @@ struct sk_psock;
#ifdef CONFIG_BPF_SYSCALL
struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
+int tcp_bpf_update_proto(struct sock *sk, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#endif /* CONFIG_BPF_SYSCALL */
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int flags);
-int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
- struct msghdr *msg, int len, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
diff --git a/include/net/udp.h b/include/net/udp.h
index adf2ff8ac87c..f55aaeef7e91 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -329,6 +329,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
struct sk_buff *skb);
struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport);
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
/* UDP uses skb->dev_scratch to cache as much information as possible and avoid
* possibly multiple cache miss on dequeue()
@@ -541,6 +543,7 @@ static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
#ifdef CONFIG_BPF_SYSCALL
struct sk_psock;
struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
+int udp_bpf_update_proto(struct sock *sk, bool restore);
#endif
#endif /* _UDP_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 008edc1dc8c1..49371eba98ba 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -957,6 +957,7 @@ enum bpf_attach_type {
BPF_XDP_CPUMAP,
BPF_SK_LOOKUP,
BPF_XDP,
+ BPF_SK_SKB_VERDICT,
__MAX_BPF_ATTACH_TYPE
};
@@ -1117,6 +1118,10 @@ enum bpf_link_type {
* offset to another bpf function
*/
#define BPF_PSEUDO_CALL 1
+/* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL,
+ * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel
+ */
+#define BPF_PSEUDO_KFUNC_CALL 2
/* flags for BPF_MAP_UPDATE_ELEM command */
enum {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 369faeddf1df..0600ed325fa0 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -283,7 +283,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_FLOAT] = "FLOAT",
};
-static const char *btf_type_str(const struct btf_type *t)
+const char *btf_type_str(const struct btf_type *t)
{
return btf_kind_str[BTF_INFO_KIND(t->info)];
}
@@ -789,7 +789,6 @@ static const struct btf_type *btf_type_skip_qualifiers(const struct btf *btf,
while (btf_type_is_modifier(t) &&
BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) {
- id = t->type;
t = btf_type_by_id(btf, t->type);
}
@@ -4377,7 +4376,7 @@ static u8 bpf_ctx_convert_map[] = {
#undef BPF_LINK_TYPE
static const struct btf_member *
-btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, enum bpf_prog_type prog_type,
int arg)
{
@@ -5362,122 +5361,190 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr
return btf_check_func_type_match(log, btf1, t1, btf2, t2);
}
-/* Compare BTF of a function with given bpf_reg_state.
- * Returns:
- * EFAULT - there is a verifier bug. Abort verification.
- * EINVAL - there is a type mismatch or BTF is not available.
- * 0 - BTF matches with what bpf_reg_state expects.
- * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
- */
-int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *regs)
+static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
+#ifdef CONFIG_NET
+ [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
+ [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+ [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
+#endif
+};
+
+static int btf_check_func_arg_match(struct bpf_verifier_env *env,
+ const struct btf *btf, u32 func_id,
+ struct bpf_reg_state *regs,
+ bool ptr_to_mem_ok)
{
struct bpf_verifier_log *log = &env->log;
- struct bpf_prog *prog = env->prog;
- struct btf *btf = prog->aux->btf;
- const struct btf_param *args;
+ const char *func_name, *ref_tname;
const struct btf_type *t, *ref_t;
- u32 i, nargs, btf_id, type_size;
- const char *tname;
- bool is_global;
-
- if (!prog->aux->func_info)
- return -EINVAL;
-
- btf_id = prog->aux->func_info[subprog].type_id;
- if (!btf_id)
- return -EFAULT;
-
- if (prog->aux->func_info_aux[subprog].unreliable)
- return -EINVAL;
+ const struct btf_param *args;
+ u32 i, nargs, ref_id;
- t = btf_type_by_id(btf, btf_id);
+ t = btf_type_by_id(btf, func_id);
if (!t || !btf_type_is_func(t)) {
/* These checks were already done by the verifier while loading
- * struct bpf_func_info
+ * struct bpf_func_info or in add_kfunc_call().
*/
- bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n",
- subprog);
+ bpf_log(log, "BTF of func_id %u doesn't point to KIND_FUNC\n",
+ func_id);
return -EFAULT;
}
- tname = btf_name_by_offset(btf, t->name_off);
+ func_name = btf_name_by_offset(btf, t->name_off);
t = btf_type_by_id(btf, t->type);
if (!t || !btf_type_is_func_proto(t)) {
- bpf_log(log, "Invalid BTF of func %s\n", tname);
+ bpf_log(log, "Invalid BTF of func %s\n", func_name);
return -EFAULT;
}
args = (const struct btf_param *)(t + 1);
nargs = btf_type_vlen(t);
if (nargs > MAX_BPF_FUNC_REG_ARGS) {
- bpf_log(log, "Function %s has %d > %d args\n", tname, nargs,
+ bpf_log(log, "Function %s has %d > %d args\n", func_name, nargs,
MAX_BPF_FUNC_REG_ARGS);
- goto out;
+ return -EINVAL;
}
- is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
/* check that BTF function arguments match actual types that the
* verifier sees.
*/
for (i = 0; i < nargs; i++) {
- struct bpf_reg_state *reg = &regs[i + 1];
+ u32 regno = i + 1;
+ struct bpf_reg_state *reg = &regs[regno];
- t = btf_type_by_id(btf, args[i].type);
- while (btf_type_is_modifier(t))
- t = btf_type_by_id(btf, t->type);
- if (btf_type_is_int(t) || btf_type_is_enum(t)) {
+ t = btf_type_skip_modifiers(btf, args[i].type, NULL);
+ if (btf_type_is_scalar(t)) {
if (reg->type == SCALAR_VALUE)
continue;
- bpf_log(log, "R%d is not a scalar\n", i + 1);
- goto out;
+ bpf_log(log, "R%d is not a scalar\n", regno);
+ return -EINVAL;
}
- if (btf_type_is_ptr(t)) {
+
+ if (!btf_type_is_ptr(t)) {
+ bpf_log(log, "Unrecognized arg#%d type %s\n",
+ i, btf_type_str(t));
+ return -EINVAL;
+ }
+
+ ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
+ ref_tname = btf_name_by_offset(btf, ref_t->name_off);
+ if (btf_is_kernel(btf)) {
+ const struct btf_type *reg_ref_t;
+ const struct btf *reg_btf;
+ const char *reg_ref_tname;
+ u32 reg_ref_id;
+
+ if (!btf_type_is_struct(ref_t)) {
+ bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n",
+ func_name, i, btf_type_str(ref_t),
+ ref_tname);
+ return -EINVAL;
+ }
+
+ if (reg->type == PTR_TO_BTF_ID) {
+ reg_btf = reg->btf;
+ reg_ref_id = reg->btf_id;
+ } else if (reg2btf_ids[reg->type]) {
+ reg_btf = btf_vmlinux;
+ reg_ref_id = *reg2btf_ids[reg->type];
+ } else {
+ bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n",
+ func_name, i,
+ btf_type_str(ref_t), ref_tname, regno);
+ return -EINVAL;
+ }
+
+ reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id,
+ &reg_ref_id);
+ reg_ref_tname = btf_name_by_offset(reg_btf,
+ reg_ref_t->name_off);
+ if (!btf_struct_ids_match(log, reg_btf, reg_ref_id,
+ reg->off, btf, ref_id)) {
+ bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
+ func_name, i,
+ btf_type_str(ref_t), ref_tname,
+ regno, btf_type_str(reg_ref_t),
+ reg_ref_tname);
+ return -EINVAL;
+ }
+ } else if (btf_get_prog_ctx_type(log, btf, t,
+ env->prog->type, i)) {
/* If function expects ctx type in BTF check that caller
* is passing PTR_TO_CTX.
*/
- if (btf_get_prog_ctx_type(log, btf, t, prog->type, i)) {
- if (reg->type != PTR_TO_CTX) {
- bpf_log(log,
- "arg#%d expected pointer to ctx, but got %s\n",
- i, btf_kind_str[BTF_INFO_KIND(t->info)]);
- goto out;
- }
- if (check_ctx_reg(env, reg, i + 1))
- goto out;
- continue;
+ if (reg->type != PTR_TO_CTX) {
+ bpf_log(log,
+ "arg#%d expected pointer to ctx, but got %s\n",
+ i, btf_type_str(t));
+ return -EINVAL;
}
+ if (check_ctx_reg(env, reg, regno))
+ return -EINVAL;
+ } else if (ptr_to_mem_ok) {
+ const struct btf_type *resolve_ret;
+ u32 type_size;
- if (!is_global)
- goto out;
-
- t = btf_type_skip_modifiers(btf, t->type, NULL);
-
- ref_t = btf_resolve_size(btf, t, &type_size);
- if (IS_ERR(ref_t)) {
+ resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
+ if (IS_ERR(resolve_ret)) {
bpf_log(log,
- "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
- i, btf_type_str(t), btf_name_by_offset(btf, t->name_off),
- PTR_ERR(ref_t));
- goto out;
+ "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
+ i, btf_type_str(ref_t), ref_tname,
+ PTR_ERR(resolve_ret));
+ return -EINVAL;
}
- if (check_mem_reg(env, reg, i + 1, type_size))
- goto out;
-
- continue;
+ if (check_mem_reg(env, reg, regno, type_size))
+ return -EINVAL;
+ } else {
+ return -EINVAL;
}
- bpf_log(log, "Unrecognized arg#%d type %s\n",
- i, btf_kind_str[BTF_INFO_KIND(t->info)]);
- goto out;
}
+
return 0;
-out:
+}
+
+/* Compare BTF of a function with given bpf_reg_state.
+ * Returns:
+ * EFAULT - there is a verifier bug. Abort verification.
+ * EINVAL - there is a type mismatch or BTF is not available.
+ * 0 - BTF matches with what bpf_reg_state expects.
+ * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
+ */
+int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
+ struct bpf_reg_state *regs)
+{
+ struct bpf_prog *prog = env->prog;
+ struct btf *btf = prog->aux->btf;
+ bool is_global;
+ u32 btf_id;
+ int err;
+
+ if (!prog->aux->func_info)
+ return -EINVAL;
+
+ btf_id = prog->aux->func_info[subprog].type_id;
+ if (!btf_id)
+ return -EFAULT;
+
+ if (prog->aux->func_info_aux[subprog].unreliable)
+ return -EINVAL;
+
+ is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
+ err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global);
+
/* Compiler optimizations can remove arguments from static functions
* or mismatched type can be passed into a global function.
* In such cases mark the function as unreliable from BTF point of view.
*/
- prog->aux->func_info_aux[subprog].unreliable = true;
- return -EINVAL;
+ if (err)
+ prog->aux->func_info_aux[subprog].unreliable = true;
+ return err;
+}
+
+int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
+ const struct btf *btf, u32 func_id,
+ struct bpf_reg_state *regs)
+{
+ return btf_check_func_arg_match(env, btf, func_id, regs, false);
}
/* Convert BTF of a function into bpf_reg_state if possible
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 75244ecb2389..f5423251c118 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -143,25 +143,25 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
if (!prog->aux->nr_linfo || !prog->jit_requested)
return 0;
- prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo,
- sizeof(*prog->aux->jited_linfo),
- GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+ prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo,
+ sizeof(*prog->aux->jited_linfo),
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!prog->aux->jited_linfo)
return -ENOMEM;
return 0;
}
-void bpf_prog_free_jited_linfo(struct bpf_prog *prog)
+void bpf_prog_jit_attempt_done(struct bpf_prog *prog)
{
- kfree(prog->aux->jited_linfo);
- prog->aux->jited_linfo = NULL;
-}
+ if (prog->aux->jited_linfo &&
+ (!prog->jited || !prog->aux->jited_linfo[0])) {
+ kvfree(prog->aux->jited_linfo);
+ prog->aux->jited_linfo = NULL;
+ }
-void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog)
-{
- if (prog->aux->jited_linfo && !prog->aux->jited_linfo[0])
- bpf_prog_free_jited_linfo(prog);
+ kfree(prog->aux->kfunc_tab);
+ prog->aux->kfunc_tab = NULL;
}
/* The jit engine is responsible to provide an array
@@ -217,12 +217,6 @@ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
insn_to_jit_off[linfo[i].insn_off - insn_start - 1];
}
-void bpf_prog_free_linfo(struct bpf_prog *prog)
-{
- bpf_prog_free_jited_linfo(prog);
- kvfree(prog->aux->linfo);
-}
-
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags)
{
@@ -1849,9 +1843,15 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
/* In case of BPF to BPF calls, verifier did all the prep
* work with regards to JITing, etc.
*/
+ bool jit_needed = false;
+
if (fp->bpf_func)
goto finalize;
+ if (IS_ENABLED(CONFIG_BPF_JIT_ALWAYS_ON) ||
+ bpf_prog_has_kfunc_call(fp))
+ jit_needed = true;
+
bpf_prog_select_func(fp);
/* eBPF JITs can rewrite the program in case constant
@@ -1866,14 +1866,10 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
return fp;
fp = bpf_int_jit_compile(fp);
- if (!fp->jited) {
- bpf_prog_free_jited_linfo(fp);
-#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+ bpf_prog_jit_attempt_done(fp);
+ if (!fp->jited && jit_needed) {
*err = -ENOTSUPP;
return fp;
-#endif
- } else {
- bpf_prog_free_unused_jited_linfo(fp);
}
} else {
*err = bpf_prog_offload_compile(fp);
@@ -2354,6 +2350,11 @@ bool __weak bpf_jit_needs_zext(void)
return false;
}
+bool __weak bpf_jit_supports_kfunc_call(void)
+{
+ return false;
+}
+
/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
* skb_copy_bits(), so provide a weak definition of it for NET-less config.
*/
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 3acc7e0b6916..dad821c8ecd0 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -19,16 +19,23 @@ static const char *__func_get_name(const struct bpf_insn_cbs *cbs,
{
BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
- if (insn->src_reg != BPF_PSEUDO_CALL &&
+ if (!insn->src_reg &&
insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID &&
func_id_str[insn->imm])
return func_id_str[insn->imm];
- if (cbs && cbs->cb_call)
- return cbs->cb_call(cbs->private_data, insn);
+ if (cbs && cbs->cb_call) {
+ const char *res;
+
+ res = cbs->cb_call(cbs->private_data, insn);
+ if (res)
+ return res;
+ }
if (insn->src_reg == BPF_PSEUDO_CALL)
snprintf(buff, len, "%+d", insn->imm);
+ else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
+ snprintf(buff, len, "kernel-function");
return buff;
}
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 074800226327..f306611c4ddf 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -382,8 +382,8 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
};
#ifdef CONFIG_CGROUP_BPF
-DECLARE_PER_CPU(struct bpf_cgroup_storage*,
- bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
+ bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
{
@@ -392,10 +392,17 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
* verifier checks that its value is correct.
*/
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
- struct bpf_cgroup_storage *storage;
+ struct bpf_cgroup_storage *storage = NULL;
void *ptr;
+ int i;
- storage = this_cpu_read(bpf_cgroup_storage[stype]);
+ for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+ if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
+ continue;
+
+ storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
+ break;
+ }
if (stype == BPF_CGROUP_STORAGE_SHARED)
ptr = &READ_ONCE(storage->buf)->data[0];
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 2d4f9ac12377..bd11db9774c3 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -9,10 +9,11 @@
#include <linux/slab.h>
#include <uapi/linux/btf.h>
-DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
-
#ifdef CONFIG_CGROUP_BPF
+DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
+ bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
+
#include "../cgroup/cgroup-internal.h"
#define LOCAL_STORAGE_CREATE_FLAG_MASK \
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index cec792a17e5f..1b7b8a6f34ee 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -726,6 +726,9 @@ const struct bpf_map_ops trie_map_ops = {
.map_lookup_elem = trie_lookup_elem,
.map_update_elem = trie_update_elem,
.map_delete_elem = trie_delete_elem,
+ .map_lookup_batch = generic_map_lookup_batch,
+ .map_update_batch = generic_map_update_batch,
+ .map_delete_batch = generic_map_delete_batch,
.map_check_btf = trie_check_btf,
.map_btf_name = "lpm_trie",
.map_btf_id = &trie_map_btf_id,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 250503482cda..6428634da57e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1694,7 +1694,9 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
{
bpf_prog_kallsyms_del_all(prog);
btf_put(prog->aux->btf);
- bpf_prog_free_linfo(prog);
+ kvfree(prog->aux->jited_linfo);
+ kvfree(prog->aux->linfo);
+ kfree(prog->aux->kfunc_tab);
if (prog->aux->attach_btf)
btf_put(prog->aux->attach_btf);
@@ -2946,6 +2948,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
return BPF_PROG_TYPE_SK_MSG;
case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT:
+ case BPF_SK_SKB_VERDICT:
return BPF_PROG_TYPE_SK_SKB;
case BPF_LIRC_MODE2:
return BPF_PROG_TYPE_LIRC_MODE2;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 210169c25ead..852541a435ef 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -234,6 +234,12 @@ static bool bpf_pseudo_call(const struct bpf_insn *insn)
insn->src_reg == BPF_PSEUDO_CALL;
}
+static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL) &&
+ insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
+}
+
static bool bpf_pseudo_func(const struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
@@ -1554,47 +1560,205 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
verbose(env, "too many subprograms\n");
return -E2BIG;
}
+ /* determine subprog starts. The end is one before the next starts */
env->subprog_info[env->subprog_cnt++].start = off;
sort(env->subprog_info, env->subprog_cnt,
sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
return env->subprog_cnt - 1;
}
-static int check_subprogs(struct bpf_verifier_env *env)
+struct bpf_kfunc_desc {
+ struct btf_func_model func_model;
+ u32 func_id;
+ s32 imm;
+};
+
+#define MAX_KFUNC_DESCS 256
+struct bpf_kfunc_desc_tab {
+ struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
+ u32 nr_descs;
+};
+
+static int kfunc_desc_cmp_by_id(const void *a, const void *b)
+{
+ const struct bpf_kfunc_desc *d0 = a;
+ const struct bpf_kfunc_desc *d1 = b;
+
+ /* func_id is not greater than BTF_MAX_TYPE */
+ return d0->func_id - d1->func_id;
+}
+
+static const struct bpf_kfunc_desc *
+find_kfunc_desc(const struct bpf_prog *prog, u32 func_id)
+{
+ struct bpf_kfunc_desc desc = {
+ .func_id = func_id,
+ };
+ struct bpf_kfunc_desc_tab *tab;
+
+ tab = prog->aux->kfunc_tab;
+ return bsearch(&desc, tab->descs, tab->nr_descs,
+ sizeof(tab->descs[0]), kfunc_desc_cmp_by_id);
+}
+
+static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
+{
+ const struct btf_type *func, *func_proto;
+ struct bpf_kfunc_desc_tab *tab;
+ struct bpf_prog_aux *prog_aux;
+ struct bpf_kfunc_desc *desc;
+ const char *func_name;
+ unsigned long addr;
+ int err;
+
+ prog_aux = env->prog->aux;
+ tab = prog_aux->kfunc_tab;
+ if (!tab) {
+ if (!btf_vmlinux) {
+ verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
+ return -ENOTSUPP;
+ }
+
+ if (!env->prog->jit_requested) {
+ verbose(env, "JIT is required for calling kernel function\n");
+ return -ENOTSUPP;
+ }
+
+ if (!bpf_jit_supports_kfunc_call()) {
+ verbose(env, "JIT does not support calling kernel function\n");
+ return -ENOTSUPP;
+ }
+
+ if (!env->prog->gpl_compatible) {
+ verbose(env, "cannot call kernel function from non-GPL compatible program\n");
+ return -EINVAL;
+ }
+
+ tab = kzalloc(sizeof(*tab), GFP_KERNEL);
+ if (!tab)
+ return -ENOMEM;
+ prog_aux->kfunc_tab = tab;
+ }
+
+ if (find_kfunc_desc(env->prog, func_id))
+ return 0;
+
+ if (tab->nr_descs == MAX_KFUNC_DESCS) {
+ verbose(env, "too many different kernel function calls\n");
+ return -E2BIG;
+ }
+
+ func = btf_type_by_id(btf_vmlinux, func_id);
+ if (!func || !btf_type_is_func(func)) {
+ verbose(env, "kernel btf_id %u is not a function\n",
+ func_id);
+ return -EINVAL;
+ }
+ func_proto = btf_type_by_id(btf_vmlinux, func->type);
+ if (!func_proto || !btf_type_is_func_proto(func_proto)) {
+ verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
+ func_id);
+ return -EINVAL;
+ }
+
+ func_name = btf_name_by_offset(btf_vmlinux, func->name_off);
+ addr = kallsyms_lookup_name(func_name);
+ if (!addr) {
+ verbose(env, "cannot find address for kernel function %s\n",
+ func_name);
+ return -EINVAL;
+ }
+
+ desc = &tab->descs[tab->nr_descs++];
+ desc->func_id = func_id;
+ desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base;
+ err = btf_distill_func_proto(&env->log, btf_vmlinux,
+ func_proto, func_name,
+ &desc->func_model);
+ if (!err)
+ sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
+ kfunc_desc_cmp_by_id, NULL);
+ return err;
+}
+
+static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
+{
+ const struct bpf_kfunc_desc *d0 = a;
+ const struct bpf_kfunc_desc *d1 = b;
+
+ if (d0->imm > d1->imm)
+ return 1;
+ else if (d0->imm < d1->imm)
+ return -1;
+ return 0;
+}
+
+static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
+{
+ struct bpf_kfunc_desc_tab *tab;
+
+ tab = prog->aux->kfunc_tab;
+ if (!tab)
+ return;
+
+ sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
+ kfunc_desc_cmp_by_imm, NULL);
+}
+
+bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
+{
+ return !!prog->aux->kfunc_tab;
+}
+
+const struct btf_func_model *
+bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+ const struct bpf_insn *insn)
+{
+ const struct bpf_kfunc_desc desc = {
+ .imm = insn->imm,
+ };
+ const struct bpf_kfunc_desc *res;
+ struct bpf_kfunc_desc_tab *tab;
+
+ tab = prog->aux->kfunc_tab;
+ res = bsearch(&desc, tab->descs, tab->nr_descs,
+ sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
+
+ return res ? &res->func_model : NULL;
+}
+
+static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
{
- int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn *insn = env->prog->insnsi;
- int insn_cnt = env->prog->len;
+ int i, ret, insn_cnt = env->prog->len;
/* Add entry function. */
ret = add_subprog(env, 0);
- if (ret < 0)
+ if (ret)
return ret;
- /* determine subprog starts. The end is one before the next starts */
- for (i = 0; i < insn_cnt; i++) {
- if (bpf_pseudo_func(insn + i)) {
- if (!env->bpf_capable) {
- verbose(env,
- "function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
- return -EPERM;
- }
- ret = add_subprog(env, i + insn[i].imm + 1);
- if (ret < 0)
- return ret;
- /* remember subprog */
- insn[i + 1].imm = ret;
- continue;
- }
- if (!bpf_pseudo_call(insn + i))
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
+ !bpf_pseudo_kfunc_call(insn))
continue;
+
if (!env->bpf_capable) {
- verbose(env,
- "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
+ verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
return -EPERM;
}
- ret = add_subprog(env, i + insn[i].imm + 1);
+
+ if (bpf_pseudo_func(insn)) {
+ ret = add_subprog(env, i + insn->imm + 1);
+ if (ret >= 0)
+ /* remember subprog */
+ insn[1].imm = ret;
+ } else if (bpf_pseudo_call(insn)) {
+ ret = add_subprog(env, i + insn->imm + 1);
+ } else {
+ ret = add_kfunc_call(env, insn->imm);
+ }
+
if (ret < 0)
return ret;
}
@@ -1608,6 +1772,16 @@ static int check_subprogs(struct bpf_verifier_env *env)
for (i = 0; i < env->subprog_cnt; i++)
verbose(env, "func#%d @%d\n", i, subprog[i].start);
+ return 0;
+}
+
+static int check_subprogs(struct bpf_verifier_env *env)
+{
+ int i, subprog_start, subprog_end, off, cur_subprog = 0;
+ struct bpf_subprog_info *subprog = env->subprog_info;
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+
/* now check that all jumps are within the same subprog */
subprog_start = subprog[cur_subprog].start;
subprog_end = subprog[cur_subprog + 1].start;
@@ -1916,6 +2090,17 @@ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
return i;
}
+static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
+{
+ const struct btf_type *func;
+
+ if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
+ return NULL;
+
+ func = btf_type_by_id(btf_vmlinux, insn->imm);
+ return btf_name_by_offset(btf_vmlinux, func->name_off);
+}
+
/* For given verifier state backtrack_insn() is called from the last insn to
* the first insn. Its purpose is to compute a bitmask of registers and
* stack slots that needs precision in the parent verifier state.
@@ -1924,6 +2109,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
u32 *reg_mask, u64 *stack_mask)
{
const struct bpf_insn_cbs cbs = {
+ .cb_call = disasm_kfunc_name,
.cb_print = verbose,
.private_data = env,
};
@@ -5365,7 +5551,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
func_info_aux = env->prog->aux->func_info_aux;
if (func_info_aux)
is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
- err = btf_check_func_arg_match(env, subprog, caller->regs);
+ err = btf_check_subprog_arg_match(env, subprog, caller->regs);
if (err == -EFAULT)
return err;
if (is_global) {
@@ -5960,6 +6146,98 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return 0;
}
+/* mark_btf_func_reg_size() is used when the reg size is determined by
+ * the BTF func_proto's return value size and argument.
+ */
+static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
+ size_t reg_size)
+{
+ struct bpf_reg_state *reg = &cur_regs(env)[regno];
+
+ if (regno == BPF_REG_0) {
+ /* Function return value */
+ reg->live |= REG_LIVE_WRITTEN;
+ reg->subreg_def = reg_size == sizeof(u64) ?
+ DEF_NOT_SUBREG : env->insn_idx + 1;
+ } else {
+ /* Function argument */
+ if (reg_size == sizeof(u64)) {
+ mark_insn_zext(env, reg);
+ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
+ } else {
+ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
+ }
+ }
+}
+
+static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
+{
+ const struct btf_type *t, *func, *func_proto, *ptr_type;
+ struct bpf_reg_state *regs = cur_regs(env);
+ const char *func_name, *ptr_type_name;
+ u32 i, nargs, func_id, ptr_type_id;
+ const struct btf_param *args;
+ int err;
+
+ func_id = insn->imm;
+ func = btf_type_by_id(btf_vmlinux, func_id);
+ func_name = btf_name_by_offset(btf_vmlinux, func->name_off);
+ func_proto = btf_type_by_id(btf_vmlinux, func->type);
+
+ if (!env->ops->check_kfunc_call ||
+ !env->ops->check_kfunc_call(func_id)) {
+ verbose(env, "calling kernel function %s is not allowed\n",
+ func_name);
+ return -EACCES;
+ }
+
+ /* Check the arguments */
+ err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs);
+ if (err)
+ return err;
+
+ for (i = 0; i < CALLER_SAVED_REGS; i++)
+ mark_reg_not_init(env, regs, caller_saved[i]);
+
+ /* Check return type */
+ t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL);
+ if (btf_type_is_scalar(t)) {
+ mark_reg_unknown(env, regs, BPF_REG_0);
+ mark_btf_func_reg_size(env, BPF_REG_0, t->size);
+ } else if (btf_type_is_ptr(t)) {
+ ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type,
+ &ptr_type_id);
+ if (!btf_type_is_struct(ptr_type)) {
+ ptr_type_name = btf_name_by_offset(btf_vmlinux,
+ ptr_type->name_off);
+ verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
+ func_name, btf_type_str(ptr_type),
+ ptr_type_name);
+ return -EINVAL;
+ }
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].btf = btf_vmlinux;
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID;
+ regs[BPF_REG_0].btf_id = ptr_type_id;
+ mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
+ } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
+
+ nargs = btf_type_vlen(func_proto);
+ args = (const struct btf_param *)(func_proto + 1);
+ for (i = 0; i < nargs; i++) {
+ u32 regno = i + 1;
+
+ t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL);
+ if (btf_type_is_ptr(t))
+ mark_btf_func_reg_size(env, regno, sizeof(void *));
+ else
+ /* scalar. ensured by btf_check_kfunc_arg_match() */
+ mark_btf_func_reg_size(env, regno, t->size);
+ }
+
+ return 0;
+}
+
static bool signed_add_overflows(s64 a, s64 b)
{
/* Do the add in u64, where overflow is well-defined */
@@ -6062,19 +6340,6 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
else
*ptr_limit = -off - 1;
return *ptr_limit >= max ? -ERANGE : 0;
- case PTR_TO_MAP_KEY:
- /* Currently, this code is not exercised as the only use
- * is bpf_for_each_map_elem() helper which requires
- * bpf_capble. The code has been tested manually for
- * future use.
- */
- if (mask_to_left) {
- *ptr_limit = ptr_reg->umax_value + ptr_reg->off;
- } else {
- off = ptr_reg->smin_value + ptr_reg->off;
- *ptr_limit = ptr_reg->map_ptr->key_size - off;
- }
- return 0;
case PTR_TO_MAP_VALUE:
max = ptr_reg->map_ptr->value_size;
if (mask_to_left) {
@@ -6281,7 +6546,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
- case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
@@ -10176,6 +10440,7 @@ static int do_check(struct bpf_verifier_env *env)
if (env->log.level & BPF_LOG_LEVEL) {
const struct bpf_insn_cbs cbs = {
+ .cb_call = disasm_kfunc_name,
.cb_print = verbose,
.private_data = env,
};
@@ -10323,7 +10588,8 @@ static int do_check(struct bpf_verifier_env *env)
if (BPF_SRC(insn->code) != BPF_K ||
insn->off != 0 ||
(insn->src_reg != BPF_REG_0 &&
- insn->src_reg != BPF_PSEUDO_CALL) ||
+ insn->src_reg != BPF_PSEUDO_CALL &&
+ insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
insn->dst_reg != BPF_REG_0 ||
class == BPF_JMP32) {
verbose(env, "BPF_CALL uses reserved fields\n");
@@ -10338,6 +10604,8 @@ static int do_check(struct bpf_verifier_env *env)
}
if (insn->src_reg == BPF_PSEUDO_CALL)
err = check_func_call(env, insn, &env->insn_idx);
+ else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
+ err = check_kfunc_call(env, insn);
else
err = check_helper_call(env, insn, &env->insn_idx);
if (err)
@@ -11648,6 +11916,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->name[0] = 'F';
func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
func[i]->jit_requested = 1;
+ func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
func[i]->aux->linfo = prog->aux->linfo;
func[i]->aux->nr_linfo = prog->aux->nr_linfo;
func[i]->aux->jited_linfo = prog->aux->jited_linfo;
@@ -11755,7 +12024,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
prog->bpf_func = func[0]->bpf_func;
prog->aux->func = func;
prog->aux->func_cnt = env->subprog_cnt;
- bpf_prog_free_unused_jited_linfo(prog);
+ bpf_prog_jit_attempt_done(prog);
return 0;
out_free:
for (i = 0; i < env->subprog_cnt; i++) {
@@ -11778,7 +12047,7 @@ out_undo_insn:
insn->off = 0;
insn->imm = env->insn_aux_data[i].call_imm;
}
- bpf_prog_free_jited_linfo(prog);
+ bpf_prog_jit_attempt_done(prog);
return err;
}
@@ -11787,6 +12056,7 @@ static int fixup_call_args(struct bpf_verifier_env *env)
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
struct bpf_prog *prog = env->prog;
struct bpf_insn *insn = prog->insnsi;
+ bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
int i, depth;
#endif
int err = 0;
@@ -11800,6 +12070,10 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return err;
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+ if (has_kfunc_call) {
+ verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
+ return -EINVAL;
+ }
if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
/* When JIT fails the progs with bpf2bpf calls and tail_calls
* have to be rejected, since interpreter doesn't support them yet.
@@ -11828,6 +12102,26 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return err;
}
+static int fixup_kfunc_call(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
+{
+ const struct bpf_kfunc_desc *desc;
+
+ /* insn->imm has the btf func_id. Replace it with
+ * an address (relative to __bpf_base_call).
+ */
+ desc = find_kfunc_desc(env->prog, insn->imm);
+ if (!desc) {
+ verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
+ insn->imm);
+ return -EFAULT;
+ }
+
+ insn->imm = desc->imm;
+
+ return 0;
+}
+
/* Do various post-verification rewrites in a single program pass.
* These rewrites simplify JIT and interpreter implementations.
*/
@@ -11963,6 +12257,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
continue;
if (insn->src_reg == BPF_PSEUDO_CALL)
continue;
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ ret = fixup_kfunc_call(env, insn);
+ if (ret)
+ return ret;
+ continue;
+ }
if (insn->imm == BPF_FUNC_get_route_realm)
prog->dst_needed = 1;
@@ -12192,6 +12492,8 @@ patch_call_imm:
}
}
+ sort_kfunc_descs_by_imm(env->prog);
+
return 0;
}
@@ -12302,7 +12604,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
/* 1st arg to a function */
regs[BPF_REG_1].type = PTR_TO_CTX;
mark_reg_known_zero(env, regs, BPF_REG_1);
- ret = btf_check_func_arg_match(env, subprog, regs);
+ ret = btf_check_subprog_arg_match(env, subprog, regs);
if (ret == -EFAULT)
/* unlikely verifier bug. abort.
* ret == 0 and ret < 0 are sadly acceptable for
@@ -12897,6 +13199,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
if (!env->explored_states)
goto skip_full_check;
+ ret = add_subprog_and_kfunc(env);
+ if (ret < 0)
+ goto skip_full_check;
+
ret = check_subprogs(env);
if (ret < 0)
goto skip_full_check;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0abdd67f44b1..a5d72c48fb66 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2017 Facebook
*/
#include <linux/bpf.h>
+#include <linux/btf_ids.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/etherdevice.h>
@@ -106,12 +107,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
bpf_test_timer_enter(&t);
do {
- bpf_cgroup_storage_set(storage);
+ ret = bpf_cgroup_storage_set(storage);
+ if (ret)
+ break;
if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx);
else
*retval = BPF_PROG_RUN(prog, ctx);
+
+ bpf_cgroup_storage_unset();
} while (bpf_test_timer_continue(&t, repeat, &ret, time));
bpf_test_timer_leave(&t);
@@ -209,10 +214,37 @@ int noinline bpf_modify_return_test(int a, int *b)
*b += 1;
return a + *b;
}
+
+u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
+{
+ return a + b + c + d;
+}
+
+int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
+{
+ return a + b;
+}
+
+struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
+{
+ return sk;
+}
+
__diag_pop();
ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
+BTF_SET_START(test_sk_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test1)
+BTF_ID(func, bpf_kfunc_call_test2)
+BTF_ID(func, bpf_kfunc_call_test3)
+BTF_SET_END(test_sk_kfunc_ids)
+
+bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
+{
+ return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id);
+}
+
static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
u32 headroom, u32 tailroom)
{
diff --git a/net/core/filter.c b/net/core/filter.c
index 17dc159ec40c..cae56d08a670 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -9813,6 +9813,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
.gen_ld_abs = bpf_gen_ld_abs,
+ .check_kfunc_call = bpf_prog_test_check_kfunc_call,
};
const struct bpf_prog_ops tc_cls_act_prog_ops = {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e8320b5d651a..3ad9e8425ab2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2500,9 +2500,32 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
}
EXPORT_SYMBOL_GPL(skb_splice_bits);
-/* Send skb data on a socket. Socket must be locked. */
-int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
- int len)
+static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg,
+ struct kvec *vec, size_t num, size_t size)
+{
+ struct socket *sock = sk->sk_socket;
+
+ if (!sock)
+ return -EINVAL;
+ return kernel_sendmsg(sock, msg, vec, num, size);
+}
+
+static int sendpage_unlocked(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags)
+{
+ struct socket *sock = sk->sk_socket;
+
+ if (!sock)
+ return -EINVAL;
+ return kernel_sendpage(sock, page, offset, size, flags);
+}
+
+typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg,
+ struct kvec *vec, size_t num, size_t size);
+typedef int (*sendpage_func)(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags);
+static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
+ int len, sendmsg_func sendmsg, sendpage_func sendpage)
{
unsigned int orig_len = len;
struct sk_buff *head = skb;
@@ -2522,7 +2545,8 @@ do_frag_list:
memset(&msg, 0, sizeof(msg));
msg.msg_flags = MSG_DONTWAIT;
- ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
+ ret = INDIRECT_CALL_2(sendmsg, kernel_sendmsg_locked,
+ sendmsg_unlocked, sk, &msg, &kv, 1, slen);
if (ret <= 0)
goto error;
@@ -2553,9 +2577,11 @@ do_frag_list:
slen = min_t(size_t, len, skb_frag_size(frag) - offset);
while (slen) {
- ret = kernel_sendpage_locked(sk, skb_frag_page(frag),
- skb_frag_off(frag) + offset,
- slen, MSG_DONTWAIT);
+ ret = INDIRECT_CALL_2(sendpage, kernel_sendpage_locked,
+ sendpage_unlocked, sk,
+ skb_frag_page(frag),
+ skb_frag_off(frag) + offset,
+ slen, MSG_DONTWAIT);
if (ret <= 0)
goto error;
@@ -2587,8 +2613,23 @@ out:
error:
return orig_len == len ? ret : orig_len - len;
}
+
+/* Send skb data on a socket. Socket must be locked. */
+int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
+ int len)
+{
+ return __skb_send_sock(sk, skb, offset, len, kernel_sendmsg_locked,
+ kernel_sendpage_locked);
+}
EXPORT_SYMBOL_GPL(skb_send_sock_locked);
+/* Send skb data on a socket. Socket must be unlocked. */
+int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
+{
+ return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked,
+ sendpage_unlocked);
+}
+
/**
* skb_store_bits - store bits from kernel buffer to skb
* @skb: destination buffer
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 07f54015238a..92a83c02562a 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -399,6 +399,104 @@ out:
}
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
+int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
+ long timeo, int *err)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ int ret = 0;
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ return 1;
+
+ if (!timeo)
+ return ret;
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ ret = sk_wait_event(sk, &timeo,
+ !list_empty(&psock->ingress_msg) ||
+ !skb_queue_empty(&sk->sk_receive_queue), &wait);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sk_msg_wait_data);
+
+/* Receive sk_msg from psock->ingress_msg to @msg. */
+int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ int len, int flags)
+{
+ struct iov_iter *iter = &msg->msg_iter;
+ int peek = flags & MSG_PEEK;
+ struct sk_msg *msg_rx;
+ int i, copied = 0;
+
+ msg_rx = sk_psock_peek_msg(psock);
+ while (copied != len) {
+ struct scatterlist *sge;
+
+ if (unlikely(!msg_rx))
+ break;
+
+ i = msg_rx->sg.start;
+ do {
+ struct page *page;
+ int copy;
+
+ sge = sk_msg_elem(msg_rx, i);
+ copy = sge->length;
+ page = sg_page(sge);
+ if (copied + copy > len)
+ copy = len - copied;
+ copy = copy_page_to_iter(page, sge->offset, copy, iter);
+ if (!copy)
+ return copied ? copied : -EFAULT;
+
+ copied += copy;
+ if (likely(!peek)) {
+ sge->offset += copy;
+ sge->length -= copy;
+ if (!msg_rx->skb)
+ sk_mem_uncharge(sk, copy);
+ msg_rx->sg.size -= copy;
+
+ if (!sge->length) {
+ sk_msg_iter_var_next(i);
+ if (!msg_rx->skb)
+ put_page(page);
+ }
+ } else {
+ /* Lets not optimize peek case if copy_page_to_iter
+ * didn't copy the entire length lets just break.
+ */
+ if (copy != sge->length)
+ return copied;
+ sk_msg_iter_var_next(i);
+ }
+
+ if (copied == len)
+ break;
+ } while (i != msg_rx->sg.end);
+
+ if (unlikely(peek)) {
+ msg_rx = sk_psock_next_msg(psock, msg_rx);
+ if (!msg_rx)
+ break;
+ continue;
+ }
+
+ msg_rx->sg.start = i;
+ if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
+ msg_rx = sk_psock_dequeue_msg(psock);
+ kfree_sk_msg(msg_rx);
+ }
+ msg_rx = sk_psock_peek_msg(psock);
+ }
+
+ return copied;
+}
+EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
+
static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
struct sk_buff *skb)
{
@@ -410,7 +508,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
if (!sk_rmem_schedule(sk, skb, skb->truesize))
return NULL;
- msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+ msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
if (unlikely(!msg))
return NULL;
@@ -497,7 +595,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
if (!ingress) {
if (!sock_writeable(psock->sk))
return -EAGAIN;
- return skb_send_sock_locked(psock->sk, skb, off, len);
+ return skb_send_sock(psock->sk, skb, off, len);
}
return sk_psock_skb_ingress(psock, skb);
}
@@ -511,8 +609,7 @@ static void sk_psock_backlog(struct work_struct *work)
u32 len, off;
int ret;
- /* Lock sock to avoid losing sk_socket during loop. */
- lock_sock(psock->sk);
+ mutex_lock(&psock->work_mutex);
if (state->skb) {
skb = state->skb;
len = state->len;
@@ -529,7 +626,7 @@ start:
skb_bpf_redirect_clear(skb);
do {
ret = -EIO;
- if (likely(psock->sk->sk_socket))
+ if (!sock_flag(psock->sk, SOCK_DEAD))
ret = sk_psock_handle_skb(psock, skb, off,
len, ingress);
if (ret <= 0) {
@@ -553,7 +650,7 @@ start:
kfree_skb(skb);
}
end:
- release_sock(psock->sk);
+ mutex_unlock(&psock->work_mutex);
}
struct sk_psock *sk_psock_init(struct sock *sk, int node)
@@ -563,11 +660,6 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
write_lock_bh(&sk->sk_callback_lock);
- if (inet_csk_has_ulp(sk)) {
- psock = ERR_PTR(-EINVAL);
- goto out;
- }
-
if (sk->sk_user_data) {
psock = ERR_PTR(-EBUSY);
goto out;
@@ -591,7 +683,9 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
spin_lock_init(&psock->link_lock);
INIT_WORK(&psock->work, sk_psock_backlog);
+ mutex_init(&psock->work_mutex);
INIT_LIST_HEAD(&psock->ingress_msg);
+ spin_lock_init(&psock->ingress_lock);
skb_queue_head_init(&psock->ingress_skb);
sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
@@ -630,11 +724,11 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
}
}
-static void sk_psock_zap_ingress(struct sk_psock *psock)
+static void __sk_psock_zap_ingress(struct sk_psock *psock)
{
struct sk_buff *skb;
- while ((skb = __skb_dequeue(&psock->ingress_skb)) != NULL) {
+ while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) {
skb_bpf_redirect_clear(skb);
kfree_skb(skb);
}
@@ -651,23 +745,35 @@ static void sk_psock_link_destroy(struct sk_psock *psock)
}
}
+void sk_psock_stop(struct sk_psock *psock, bool wait)
+{
+ spin_lock_bh(&psock->ingress_lock);
+ sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
+ sk_psock_cork_free(psock);
+ __sk_psock_zap_ingress(psock);
+ spin_unlock_bh(&psock->ingress_lock);
+
+ if (wait)
+ cancel_work_sync(&psock->work);
+}
+
static void sk_psock_done_strp(struct sk_psock *psock);
-static void sk_psock_destroy_deferred(struct work_struct *gc)
+static void sk_psock_destroy(struct work_struct *work)
{
- struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
-
+ struct sk_psock *psock = container_of(to_rcu_work(work),
+ struct sk_psock, rwork);
/* No sk_callback_lock since already detached. */
sk_psock_done_strp(psock);
cancel_work_sync(&psock->work);
+ mutex_destroy(&psock->work_mutex);
psock_progs_drop(&psock->progs);
sk_psock_link_destroy(psock);
sk_psock_cork_free(psock);
- sk_psock_zap_ingress(psock);
if (psock->sk_redir)
sock_put(psock->sk_redir);
@@ -675,30 +781,21 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)
kfree(psock);
}
-static void sk_psock_destroy(struct rcu_head *rcu)
-{
- struct sk_psock *psock = container_of(rcu, struct sk_psock, rcu);
-
- INIT_WORK(&psock->gc, sk_psock_destroy_deferred);
- schedule_work(&psock->gc);
-}
-
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
{
- sk_psock_cork_free(psock);
- sk_psock_zap_ingress(psock);
+ sk_psock_stop(psock, false);
write_lock_bh(&sk->sk_callback_lock);
sk_psock_restore_proto(sk, psock);
rcu_assign_sk_user_data(sk, NULL);
if (psock->progs.stream_parser)
sk_psock_stop_strp(sk, psock);
- else if (psock->progs.stream_verdict)
+ else if (psock->progs.stream_verdict || psock->progs.skb_verdict)
sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
- sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
- call_rcu(&psock->rcu, sk_psock_destroy);
+ INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
+ queue_rcu_work(system_wq, &psock->rwork);
}
EXPORT_SYMBOL_GPL(sk_psock_drop);
@@ -767,14 +864,20 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
* error that caused the pipe to break. We can't send a packet on
* a socket that is in this state so we drop the skb.
*/
- if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
- !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
+ if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) {
+ kfree_skb(skb);
+ return;
+ }
+ spin_lock_bh(&psock_other->ingress_lock);
+ if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
+ spin_unlock_bh(&psock_other->ingress_lock);
kfree_skb(skb);
return;
}
skb_queue_tail(&psock_other->ingress_skb, skb);
schedule_work(&psock_other->work);
+ spin_unlock_bh(&psock_other->ingress_lock);
}
static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
@@ -842,8 +945,12 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
err = sk_psock_skb_ingress_self(psock, skb);
}
if (err < 0) {
- skb_queue_tail(&psock->ingress_skb, skb);
- schedule_work(&psock->work);
+ spin_lock_bh(&psock->ingress_lock);
+ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+ skb_queue_tail(&psock->ingress_skb, skb);
+ schedule_work(&psock->work);
+ }
+ spin_unlock_bh(&psock->ingress_lock);
}
break;
case __SK_REDIRECT:
@@ -1010,6 +1117,8 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
}
skb_set_owner_r(skb, sk);
prog = READ_ONCE(psock->progs.stream_verdict);
+ if (!prog)
+ prog = READ_ONCE(psock->progs.skb_verdict);
if (likely(prog)) {
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index dd53a7771d7e..3d190d22b0d8 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -26,6 +26,7 @@ struct bpf_stab {
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
struct bpf_prog *old, u32 which);
+static struct sk_psock_progs *sock_map_progs(struct bpf_map *map);
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
{
@@ -155,6 +156,8 @@ static void sock_map_del_link(struct sock *sk,
strp_stop = true;
if (psock->saved_data_ready && stab->progs.stream_verdict)
verdict_stop = true;
+ if (psock->saved_data_ready && stab->progs.skb_verdict)
+ verdict_stop = true;
list_del(&link->list);
sk_psock_free_link(link);
}
@@ -182,26 +185,10 @@ static void sock_map_unref(struct sock *sk, void *link_raw)
static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
{
- struct proto *prot;
-
- switch (sk->sk_type) {
- case SOCK_STREAM:
- prot = tcp_bpf_get_proto(sk, psock);
- break;
-
- case SOCK_DGRAM:
- prot = udp_bpf_get_proto(sk, psock);
- break;
-
- default:
+ if (!sk->sk_prot->psock_update_sk_prot)
return -EINVAL;
- }
-
- if (IS_ERR(prot))
- return PTR_ERR(prot);
-
- sk_psock_update_proto(sk, psock, prot);
- return 0;
+ psock->psock_update_sk_prot = sk->sk_prot->psock_update_sk_prot;
+ return sk->sk_prot->psock_update_sk_prot(sk, false);
}
static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
@@ -224,13 +211,25 @@ out:
return psock;
}
-static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
- struct sock *sk)
+static bool sock_map_redirect_allowed(const struct sock *sk);
+
+static int sock_map_link(struct bpf_map *map, struct sock *sk)
{
- struct bpf_prog *msg_parser, *stream_parser, *stream_verdict;
+ struct sk_psock_progs *progs = sock_map_progs(map);
+ struct bpf_prog *stream_verdict = NULL;
+ struct bpf_prog *stream_parser = NULL;
+ struct bpf_prog *skb_verdict = NULL;
+ struct bpf_prog *msg_parser = NULL;
struct sk_psock *psock;
int ret;
+ /* Only sockets we can redirect into/from in BPF need to hold
+ * refs to parser/verdict progs and have their sk_data_ready
+ * and sk_write_space callbacks overridden.
+ */
+ if (!sock_map_redirect_allowed(sk))
+ goto no_progs;
+
stream_verdict = READ_ONCE(progs->stream_verdict);
if (stream_verdict) {
stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
@@ -256,6 +255,16 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
}
}
+ skb_verdict = READ_ONCE(progs->skb_verdict);
+ if (skb_verdict) {
+ skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
+ if (IS_ERR(skb_verdict)) {
+ ret = PTR_ERR(skb_verdict);
+ goto out_put_msg_parser;
+ }
+ }
+
+no_progs:
psock = sock_map_psock_get_checked(sk);
if (IS_ERR(psock)) {
ret = PTR_ERR(psock);
@@ -265,6 +274,9 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
if (psock) {
if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
(stream_parser && READ_ONCE(psock->progs.stream_parser)) ||
+ (skb_verdict && READ_ONCE(psock->progs.skb_verdict)) ||
+ (skb_verdict && READ_ONCE(psock->progs.stream_verdict)) ||
+ (stream_verdict && READ_ONCE(psock->progs.skb_verdict)) ||
(stream_verdict && READ_ONCE(psock->progs.stream_verdict))) {
sk_psock_put(sk, psock);
ret = -EBUSY;
@@ -296,6 +308,9 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
sk_psock_start_verdict(sk,psock);
+ } else if (!stream_verdict && skb_verdict && !psock->saved_data_ready) {
+ psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+ sk_psock_start_verdict(sk, psock);
}
write_unlock_bh(&sk->sk_callback_lock);
return 0;
@@ -304,6 +319,9 @@ out_unlock_drop:
out_drop:
sk_psock_put(sk, psock);
out_progs:
+ if (skb_verdict)
+ bpf_prog_put(skb_verdict);
+out_put_msg_parser:
if (msg_parser)
bpf_prog_put(msg_parser);
out_put_stream_parser:
@@ -315,27 +333,6 @@ out_put_stream_verdict:
return ret;
}
-static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
-{
- struct sk_psock *psock;
- int ret;
-
- psock = sock_map_psock_get_checked(sk);
- if (IS_ERR(psock))
- return PTR_ERR(psock);
-
- if (!psock) {
- psock = sk_psock_init(sk, map->numa_node);
- if (IS_ERR(psock))
- return PTR_ERR(psock);
- }
-
- ret = sock_map_init_proto(sk, psock);
- if (ret < 0)
- sk_psock_put(sk, psock);
- return ret;
-}
-
static void sock_map_free(struct bpf_map *map)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
@@ -466,8 +463,6 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
return 0;
}
-static bool sock_map_redirect_allowed(const struct sock *sk);
-
static int sock_map_update_common(struct bpf_map *map, u32 idx,
struct sock *sk, u64 flags)
{
@@ -487,14 +482,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
if (!link)
return -ENOMEM;
- /* Only sockets we can redirect into/from in BPF need to hold
- * refs to parser/verdict progs and have their sk_data_ready
- * and sk_write_space callbacks overridden.
- */
- if (sock_map_redirect_allowed(sk))
- ret = sock_map_link(map, &stab->progs, sk);
- else
- ret = sock_map_link_no_progs(map, sk);
+ ret = sock_map_link(map, sk);
if (ret < 0)
goto out_free;
@@ -547,12 +535,15 @@ static bool sk_is_udp(const struct sock *sk)
static bool sock_map_redirect_allowed(const struct sock *sk)
{
- return sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN;
+ if (sk_is_tcp(sk))
+ return sk->sk_state != TCP_LISTEN;
+ else
+ return sk->sk_state == TCP_ESTABLISHED;
}
static bool sock_map_sk_is_suitable(const struct sock *sk)
{
- return sk_is_tcp(sk) || sk_is_udp(sk);
+ return !!sk->sk_prot->psock_update_sk_prot;
}
static bool sock_map_sk_state_allowed(const struct sock *sk)
@@ -999,14 +990,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
if (!link)
return -ENOMEM;
- /* Only sockets we can redirect into/from in BPF need to hold
- * refs to parser/verdict progs and have their sk_data_ready
- * and sk_write_space callbacks overridden.
- */
- if (sock_map_redirect_allowed(sk))
- ret = sock_map_link(map, &htab->progs, sk);
- else
- ret = sock_map_link_no_progs(map, sk);
+ ret = sock_map_link(map, sk);
if (ret < 0)
goto out_free;
@@ -1466,8 +1450,15 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
break;
#endif
case BPF_SK_SKB_STREAM_VERDICT:
+ if (progs->skb_verdict)
+ return -EBUSY;
pprog = &progs->stream_verdict;
break;
+ case BPF_SK_SKB_VERDICT:
+ if (progs->stream_verdict)
+ return -EBUSY;
+ pprog = &progs->skb_verdict;
+ break;
default:
return -EOPNOTSUPP;
}
@@ -1540,6 +1531,7 @@ void sock_map_close(struct sock *sk, long timeout)
saved_close = psock->saved_close;
sock_map_remove_links(sk, psock);
rcu_read_unlock();
+ sk_psock_stop(psock, true);
release_sock(sk);
saved_close(sk, timeout);
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1355e6c0d567..f17870ee558b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1070,6 +1070,7 @@ const struct proto_ops inet_dgram_ops = {
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
+ .read_sock = udp_read_sock,
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index d520e61649c8..dff4f0eb96b0 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -5,6 +5,7 @@
#include <linux/bpf_verifier.h>
#include <linux/bpf.h>
#include <linux/btf.h>
+#include <linux/btf_ids.h>
#include <linux/filter.h>
#include <net/tcp.h>
#include <net/bpf_sk_storage.h>
@@ -178,10 +179,52 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
}
}
+BTF_SET_START(bpf_tcp_ca_kfunc_ids)
+BTF_ID(func, tcp_reno_ssthresh)
+BTF_ID(func, tcp_reno_cong_avoid)
+BTF_ID(func, tcp_reno_undo_cwnd)
+BTF_ID(func, tcp_slow_start)
+BTF_ID(func, tcp_cong_avoid_ai)
+#ifdef CONFIG_DYNAMIC_FTRACE
+#if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC)
+BTF_ID(func, cubictcp_init)
+BTF_ID(func, cubictcp_recalc_ssthresh)
+BTF_ID(func, cubictcp_cong_avoid)
+BTF_ID(func, cubictcp_state)
+BTF_ID(func, cubictcp_cwnd_event)
+BTF_ID(func, cubictcp_acked)
+#endif
+#if IS_BUILTIN(CONFIG_TCP_CONG_DCTCP)
+BTF_ID(func, dctcp_init)
+BTF_ID(func, dctcp_update_alpha)
+BTF_ID(func, dctcp_cwnd_event)
+BTF_ID(func, dctcp_ssthresh)
+BTF_ID(func, dctcp_cwnd_undo)
+BTF_ID(func, dctcp_state)
+#endif
+#if IS_BUILTIN(CONFIG_TCP_CONG_BBR)
+BTF_ID(func, bbr_init)
+BTF_ID(func, bbr_main)
+BTF_ID(func, bbr_sndbuf_expand)
+BTF_ID(func, bbr_undo_cwnd)
+BTF_ID(func, bbr_cwnd_event)
+BTF_ID(func, bbr_ssthresh)
+BTF_ID(func, bbr_min_tso_segs)
+BTF_ID(func, bbr_set_state)
+#endif
+#endif /* CONFIG_DYNAMIC_FTRACE */
+BTF_SET_END(bpf_tcp_ca_kfunc_ids)
+
+static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id)
+{
+ return btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id);
+}
+
static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
.get_func_proto = bpf_tcp_ca_get_func_proto,
.is_valid_access = bpf_tcp_ca_is_valid_access,
.btf_struct_access = bpf_tcp_ca_btf_struct_access,
+ .check_kfunc_call = bpf_tcp_ca_check_kfunc_call,
};
static int bpf_tcp_ca_init_member(const struct btf_type *t,
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 17c322b875fd..3d622a0d0753 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -10,86 +10,6 @@
#include <net/inet_common.h>
#include <net/tls.h>
-int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
- struct msghdr *msg, int len, int flags)
-{
- struct iov_iter *iter = &msg->msg_iter;
- int peek = flags & MSG_PEEK;
- struct sk_msg *msg_rx;
- int i, copied = 0;
-
- msg_rx = list_first_entry_or_null(&psock->ingress_msg,
- struct sk_msg, list);
-
- while (copied != len) {
- struct scatterlist *sge;
-
- if (unlikely(!msg_rx))
- break;
-
- i = msg_rx->sg.start;
- do {
- struct page *page;
- int copy;
-
- sge = sk_msg_elem(msg_rx, i);
- copy = sge->length;
- page = sg_page(sge);
- if (copied + copy > len)
- copy = len - copied;
- copy = copy_page_to_iter(page, sge->offset, copy, iter);
- if (!copy)
- return copied ? copied : -EFAULT;
-
- copied += copy;
- if (likely(!peek)) {
- sge->offset += copy;
- sge->length -= copy;
- if (!msg_rx->skb)
- sk_mem_uncharge(sk, copy);
- msg_rx->sg.size -= copy;
-
- if (!sge->length) {
- sk_msg_iter_var_next(i);
- if (!msg_rx->skb)
- put_page(page);
- }
- } else {
- /* Lets not optimize peek case if copy_page_to_iter
- * didn't copy the entire length lets just break.
- */
- if (copy != sge->length)
- return copied;
- sk_msg_iter_var_next(i);
- }
-
- if (copied == len)
- break;
- } while (i != msg_rx->sg.end);
-
- if (unlikely(peek)) {
- if (msg_rx == list_last_entry(&psock->ingress_msg,
- struct sk_msg, list))
- break;
- msg_rx = list_next_entry(msg_rx, list);
- continue;
- }
-
- msg_rx->sg.start = i;
- if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
- list_del(&msg_rx->list);
- if (msg_rx->skb)
- consume_skb(msg_rx->skb);
- kfree(msg_rx);
- }
- msg_rx = list_first_entry_or_null(&psock->ingress_msg,
- struct sk_msg, list);
- }
-
- return copied;
-}
-EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg);
-
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, u32 apply_bytes, int flags)
{
@@ -243,28 +163,6 @@ static bool tcp_bpf_stream_read(const struct sock *sk)
return !empty;
}
-static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
- int flags, long timeo, int *err)
-{
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- int ret = 0;
-
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- return 1;
-
- if (!timeo)
- return ret;
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- ret = sk_wait_event(sk, &timeo,
- !list_empty(&psock->ingress_msg) ||
- !skb_queue_empty(&sk->sk_receive_queue), &wait);
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
- return ret;
-}
-
static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
@@ -284,13 +182,13 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
lock_sock(sk);
msg_bytes_ready:
- copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
+ copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
if (!copied) {
int data, err = 0;
long timeo;
timeo = sock_rcvtimeo(sk, nonblock);
- data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
+ data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
if (data) {
if (!sk_psock_queue_empty(psock))
goto msg_bytes_ready;
@@ -601,20 +499,38 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
}
-struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
+int tcp_bpf_update_proto(struct sock *sk, bool restore)
{
+ struct sk_psock *psock = sk_psock(sk);
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
+ if (restore) {
+ if (inet_csk_has_ulp(sk)) {
+ tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
+ } else {
+ sk->sk_write_space = psock->saved_write_space;
+ /* Pairs with lockless read in sk_clone_lock() */
+ WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ }
+ return 0;
+ }
+
+ if (inet_csk_has_ulp(sk))
+ return -EINVAL;
+
if (sk->sk_family == AF_INET6) {
if (tcp_bpf_assert_proto_ops(psock->sk_proto))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
tcp_bpf_check_v6_needs_rebuild(psock->sk_proto);
}
- return &tcp_bpf_prots[family][config];
+ /* Pairs with lockless read in sk_clone_lock() */
+ WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]);
+ return 0;
}
+EXPORT_SYMBOL_GPL(tcp_bpf_update_proto);
/* If a child got cloned from a listening socket that had tcp_bpf
* protocol callbacks installed, we need to restore the callbacks to
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index ffcbe46dacdb..4a30deaa9a37 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -124,7 +124,7 @@ static inline void bictcp_hystart_reset(struct sock *sk)
ca->sample_cnt = 0;
}
-static void bictcp_init(struct sock *sk)
+static void cubictcp_init(struct sock *sk)
{
struct bictcp *ca = inet_csk_ca(sk);
@@ -137,7 +137,7 @@ static void bictcp_init(struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
-static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
struct bictcp *ca = inet_csk_ca(sk);
@@ -319,7 +319,7 @@ tcp_friendliness:
ca->cnt = max(ca->cnt, 2U);
}
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -338,7 +338,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
-static u32 bictcp_recalc_ssthresh(struct sock *sk)
+static u32 cubictcp_recalc_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -355,7 +355,7 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}
-static void bictcp_state(struct sock *sk, u8 new_state)
+static void cubictcp_state(struct sock *sk, u8 new_state)
{
if (new_state == TCP_CA_Loss) {
bictcp_reset(inet_csk_ca(sk));
@@ -442,7 +442,7 @@ static void hystart_update(struct sock *sk, u32 delay)
}
}
-static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
+static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -471,13 +471,13 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
}
static struct tcp_congestion_ops cubictcp __read_mostly = {
- .init = bictcp_init,
- .ssthresh = bictcp_recalc_ssthresh,
- .cong_avoid = bictcp_cong_avoid,
- .set_state = bictcp_state,
+ .init = cubictcp_init,
+ .ssthresh = cubictcp_recalc_ssthresh,
+ .cong_avoid = cubictcp_cong_avoid,
+ .set_state = cubictcp_state,
.undo_cwnd = tcp_reno_undo_cwnd,
- .cwnd_event = bictcp_cwnd_event,
- .pkts_acked = bictcp_acked,
+ .cwnd_event = cubictcp_cwnd_event,
+ .pkts_acked = cubictcp_acked,
.owner = THIS_MODULE,
.name = "cubic",
};
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index daad4f99db32..dfc6d1c0e710 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2806,6 +2806,9 @@ struct proto tcp_prot = {
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
+#ifdef CONFIG_BPF_SYSCALL
+ .psock_update_sk_prot = tcp_bpf_update_proto,
+#endif
.enter_memory_pressure = tcp_enter_memory_pressure,
.leave_memory_pressure = tcp_leave_memory_pressure,
.stream_memory_free = tcp_stream_memory_free,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c0695ce42dc5..bfcc7f1a8a7f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1782,6 +1782,35 @@ busy_check:
}
EXPORT_SYMBOL(__skb_recv_udp);
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ int copied = 0;
+
+ while (1) {
+ struct sk_buff *skb;
+ int err, used;
+
+ skb = skb_recv_udp(sk, 0, 1, &err);
+ if (!skb)
+ return err;
+ used = recv_actor(desc, skb, 0, skb->len);
+ if (used <= 0) {
+ if (!copied)
+ copied = used;
+ break;
+ } else if (used <= skb->len) {
+ copied += used;
+ }
+
+ if (!desc->count)
+ break;
+ }
+
+ return copied;
+}
+EXPORT_SYMBOL(udp_read_sock);
+
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
@@ -2854,6 +2883,9 @@ struct proto udp_prot = {
.unhash = udp_lib_unhash,
.rehash = udp_v4_rehash,
.get_port = udp_v4_get_port,
+#ifdef CONFIG_BPF_SYSCALL
+ .psock_update_sk_prot = udp_bpf_update_proto,
+#endif
.memory_allocated = &udp_memory_allocated,
.sysctl_mem = sysctl_udp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 7a94791efc1a..7d5c4ebf42fe 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -4,6 +4,68 @@
#include <linux/skmsg.h>
#include <net/sock.h>
#include <net/udp.h>
+#include <net/inet_common.h>
+
+#include "udp_impl.h"
+
+static struct proto *udpv6_prot_saved __read_mostly;
+
+static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ return udpv6_prot_saved->recvmsg(sk, msg, len, noblock, flags,
+ addr_len);
+#endif
+ return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len);
+}
+
+static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int nonblock, int flags, int *addr_len)
+{
+ struct sk_psock *psock;
+ int copied, ret;
+
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return inet_recv_error(sk, msg, len, addr_len);
+
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+
+ lock_sock(sk);
+ if (sk_psock_queue_empty(psock)) {
+ ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ goto out;
+ }
+
+msg_bytes_ready:
+ copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+ if (!copied) {
+ int data, err = 0;
+ long timeo;
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+ data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
+ if (data) {
+ if (!sk_psock_queue_empty(psock))
+ goto msg_bytes_ready;
+ ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ goto out;
+ }
+ if (err) {
+ ret = err;
+ goto out;
+ }
+ copied = -EAGAIN;
+ }
+ ret = copied;
+out:
+ release_sock(sk);
+ sk_psock_put(sk, psock);
+ return ret;
+}
enum {
UDP_BPF_IPV4,
@@ -11,7 +73,6 @@ enum {
UDP_BPF_NUM_PROTS,
};
-static struct proto *udpv6_prot_saved __read_mostly;
static DEFINE_SPINLOCK(udpv6_prot_lock);
static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS];
@@ -20,6 +81,7 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
*prot = *base;
prot->unhash = sock_map_unhash;
prot->close = sock_map_close;
+ prot->recvmsg = udp_bpf_recvmsg;
}
static void udp_bpf_check_v6_needs_rebuild(struct proto *ops)
@@ -41,12 +103,23 @@ static int __init udp_bpf_v4_build_proto(void)
}
core_initcall(udp_bpf_v4_build_proto);
-struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
+int udp_bpf_update_proto(struct sock *sk, bool restore)
{
int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
+ struct sk_psock *psock = sk_psock(sk);
+
+ if (restore) {
+ sk->sk_write_space = psock->saved_write_space;
+ /* Pairs with lockless read in sk_clone_lock() */
+ WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ return 0;
+ }
if (sk->sk_family == AF_INET6)
udp_bpf_check_v6_needs_rebuild(psock->sk_proto);
- return &udp_bpf_prots[family];
+ /* Pairs with lockless read in sk_clone_lock() */
+ WRITE_ONCE(sk->sk_prot, &udp_bpf_prots[family]);
+ return 0;
}
+EXPORT_SYMBOL_GPL(udp_bpf_update_proto);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 4f7ca5807046..2389ff702f51 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -714,6 +714,7 @@ const struct proto_ops inet6_dgram_ops = {
.getsockopt = sock_common_getsockopt, /* ok */
.sendmsg = inet6_sendmsg, /* retpoline's sake */
.recvmsg = inet6_recvmsg, /* retpoline's sake */
+ .read_sock = udp_read_sock,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.set_peek_off = sk_set_peek_off,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d0f007741e8e..bff22d6ef516 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2139,6 +2139,9 @@ struct proto tcpv6_prot = {
.hash = inet6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
+#ifdef CONFIG_BPF_SYSCALL
+ .psock_update_sk_prot = tcp_bpf_update_proto,
+#endif
.enter_memory_pressure = tcp_enter_memory_pressure,
.leave_memory_pressure = tcp_leave_memory_pressure,
.stream_memory_free = tcp_stream_memory_free,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index fa2f54738392..199b080d418a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1714,6 +1714,9 @@ struct proto udpv6_prot = {
.unhash = udp_lib_unhash,
.rehash = udp_v6_rehash,
.get_port = udp_v6_get_port,
+#ifdef CONFIG_BPF_SYSCALL
+ .psock_update_sk_prot = udp_bpf_update_proto,
+#endif
.memory_allocated = &udp_memory_allocated,
.sysctl_mem = sysctl_udp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 01d933ae5f16..1dcb34dfd56b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1789,8 +1789,8 @@ int tls_sw_recvmsg(struct sock *sk,
skb = tls_wait_data(sk, psock, flags, timeo, &err);
if (!skb) {
if (psock) {
- int ret = __tcp_bpf_recvmsg(sk, psock,
- msg, len, flags);
+ int ret = sk_msg_recvmsg(sk, psock, msg, len,
+ flags);
if (ret > 0) {
decrypted += ret;
diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c
index f24806ac24e7..a3f8a3998e0a 100644
--- a/samples/bpf/sampleip_kern.c
+++ b/samples/bpf/sampleip_kern.c
@@ -4,7 +4,6 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf_perf_event.h>
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index 7d3c66fb3f88..0bba5fcd7d24 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -5,7 +5,6 @@
* License as published by the Free Software Foundation.
*/
#include <linux/ptrace.h>
-#include <linux/version.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf_perf_event.h>
#include <uapi/linux/perf_event.h>
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 1e2a1105d0e6..aa696854be78 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -96,7 +96,6 @@ static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
static int opt_timeout = 1000;
static bool opt_need_wakeup = true;
static u32 opt_num_xsks = 1;
-static u32 prog_id;
static bool opt_busy_poll;
static bool opt_reduced_cap;
@@ -462,59 +461,37 @@ static void *poller(void *arg)
return NULL;
}
-static void remove_xdp_program(void)
+static void int_exit(int sig)
{
- u32 curr_prog_id = 0;
- int cmd = CLOSE_CONN;
-
- if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(EXIT_FAILURE);
- }
- if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on a given interface\n");
- else
- printf("program on interface changed, not removing\n");
-
- if (opt_reduced_cap) {
- if (write(sock, &cmd, sizeof(int)) < 0) {
- fprintf(stderr, "Error writing into stream socket: %s", strerror(errno));
- exit(EXIT_FAILURE);
- }
- }
+ benchmark_done = true;
}
-static void int_exit(int sig)
+static void __exit_with_error(int error, const char *file, const char *func,
+ int line)
{
- benchmark_done = true;
+ fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
+ line, error, strerror(error));
+ exit(EXIT_FAILURE);
}
+#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
+
static void xdpsock_cleanup(void)
{
struct xsk_umem *umem = xsks[0]->umem->umem;
- int i;
+ int i, cmd = CLOSE_CONN;
dump_stats();
for (i = 0; i < num_socks; i++)
xsk_socket__delete(xsks[i]->xsk);
(void)xsk_umem__delete(umem);
- remove_xdp_program();
-}
-static void __exit_with_error(int error, const char *file, const char *func,
- int line)
-{
- fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
- line, error, strerror(error));
- dump_stats();
- remove_xdp_program();
- exit(EXIT_FAILURE);
+ if (opt_reduced_cap) {
+ if (write(sock, &cmd, sizeof(int)) < 0)
+ exit_with_error(errno);
+ }
}
-#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
- __LINE__)
static void swap_mac_addresses(void *data)
{
struct ether_header *eth = (struct ether_header *)data;
@@ -880,10 +857,6 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
if (ret)
exit_with_error(-ret);
- ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
- if (ret)
- exit_with_error(-ret);
-
xsk->app_stats.rx_empty_polls = 0;
xsk->app_stats.fill_fail_polls = 0;
xsk->app_stats.copy_tx_sendtos = 0;
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 65303664417e..1828bba19020 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -57,6 +57,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
[BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
[BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
+ [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
[BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
[BPF_LIRC_MODE2] = "lirc_mode2",
[BPF_FLOW_DISSECTOR] = "flow_dissector",
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f2b915b20546..3f067d2d7584 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -76,6 +76,7 @@ enum dump_mode {
static const char * const attach_type_strings[] = {
[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
+ [BPF_SK_SKB_VERDICT] = "skb_verdict",
[BPF_SK_MSG_VERDICT] = "msg_verdict",
[BPF_FLOW_DISSECTOR] = "flow_dissector",
[__MAX_BPF_ATTACH_TYPE] = NULL,
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 80d966cfcaa1..7550fd9c3188 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -115,10 +115,10 @@ struct object {
static int verbose;
-int eprintf(int level, int var, const char *fmt, ...)
+static int eprintf(int level, int var, const char *fmt, ...)
{
va_list args;
- int ret;
+ int ret = 0;
if (var >= level) {
va_start(args, fmt);
@@ -385,7 +385,7 @@ static int elf_collect(struct object *obj)
static int symbols_collect(struct object *obj)
{
Elf_Scn *scn = NULL;
- int n, i, err = 0;
+ int n, i;
GElf_Shdr sh;
char *name;
@@ -402,11 +402,10 @@ static int symbols_collect(struct object *obj)
* Scan symbols and look for the ones starting with
* __BTF_ID__* over .BTF_ids section.
*/
- for (i = 0; !err && i < n; i++) {
- char *tmp, *prefix;
+ for (i = 0; i < n; i++) {
+ char *prefix;
struct btf_id *id;
GElf_Sym sym;
- int err = -1;
if (!gelf_getsym(obj->efile.symbols, i, &sym))
return -1;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2d3036e292a9..69902603012c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -957,6 +957,7 @@ enum bpf_attach_type {
BPF_XDP_CPUMAP,
BPF_SK_LOOKUP,
BPF_XDP,
+ BPF_SK_SKB_VERDICT,
__MAX_BPF_ATTACH_TYPE
};
@@ -1117,6 +1118,10 @@ enum bpf_link_type {
* offset to another bpf function
*/
#define BPF_PSEUDO_CALL 1
+/* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL,
+ * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel
+ */
+#define BPF_PSEUDO_KFUNC_CALL 2
/* flags for BPF_MAP_UPDATE_ELEM command */
enum {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index c0fd2c718a7d..7aad78dbb4b4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -185,7 +185,8 @@ enum reloc_type {
RELO_LD64,
RELO_CALL,
RELO_DATA,
- RELO_EXTERN,
+ RELO_EXTERN_VAR,
+ RELO_EXTERN_FUNC,
RELO_SUBPROG_ADDR,
};
@@ -573,14 +574,19 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
insn->off == 0;
}
-static bool is_ldimm64(struct bpf_insn *insn)
+static bool is_ldimm64_insn(struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
}
+static bool is_call_insn(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL);
+}
+
static bool insn_is_pseudo_func(struct bpf_insn *insn)
{
- return is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
+ return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
}
static int
@@ -1921,9 +1927,9 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
return btf_is_func_proto(t) ? t : NULL;
}
-static const char *btf_kind_str(const struct btf_type *t)
+static const char *__btf_kind_str(__u16 kind)
{
- switch (btf_kind(t)) {
+ switch (kind) {
case BTF_KIND_UNKN: return "void";
case BTF_KIND_INT: return "int";
case BTF_KIND_PTR: return "ptr";
@@ -1945,6 +1951,16 @@ static const char *btf_kind_str(const struct btf_type *t)
}
}
+static const char *btf_kind_str(const struct btf_type *t)
+{
+ return __btf_kind_str(btf_kind(t));
+}
+
+static enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
+{
+ return (enum btf_func_linkage)BTF_INFO_VLEN(t->info);
+}
+
/*
* Fetch integer attribute of BTF map definition. Such attributes are
* represented using a pointer to an array, in which dimensionality of array
@@ -3009,7 +3025,7 @@ static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
{
const struct btf_type *t;
- const char *var_name;
+ const char *tname;
int i, n;
if (!btf)
@@ -3019,14 +3035,18 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
for (i = 1; i <= n; i++) {
t = btf__type_by_id(btf, i);
- if (!btf_is_var(t))
+ if (!btf_is_var(t) && !btf_is_func(t))
continue;
- var_name = btf__name_by_offset(btf, t->name_off);
- if (strcmp(var_name, ext_name))
+ tname = btf__name_by_offset(btf, t->name_off);
+ if (strcmp(tname, ext_name))
continue;
- if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+ if (btf_is_var(t) &&
+ btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+ return -EINVAL;
+
+ if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
return -EINVAL;
return i;
@@ -3139,12 +3159,48 @@ static int find_int_btf_id(const struct btf *btf)
return 0;
}
+static int add_dummy_ksym_var(struct btf *btf)
+{
+ int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
+ const struct btf_var_secinfo *vs;
+ const struct btf_type *sec;
+
+ sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
+ BTF_KIND_DATASEC);
+ if (sec_btf_id < 0)
+ return 0;
+
+ sec = btf__type_by_id(btf, sec_btf_id);
+ vs = btf_var_secinfos(sec);
+ for (i = 0; i < btf_vlen(sec); i++, vs++) {
+ const struct btf_type *vt;
+
+ vt = btf__type_by_id(btf, vs->type);
+ if (btf_is_func(vt))
+ break;
+ }
+
+ /* No func in ksyms sec. No need to add dummy var. */
+ if (i == btf_vlen(sec))
+ return 0;
+
+ int_btf_id = find_int_btf_id(btf);
+ dummy_var_btf_id = btf__add_var(btf,
+ "dummy_ksym",
+ BTF_VAR_GLOBAL_ALLOCATED,
+ int_btf_id);
+ if (dummy_var_btf_id < 0)
+ pr_warn("cannot create a dummy_ksym var\n");
+
+ return dummy_var_btf_id;
+}
+
static int bpf_object__collect_externs(struct bpf_object *obj)
{
struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
const struct btf_type *t;
struct extern_desc *ext;
- int i, n, off;
+ int i, n, off, dummy_var_btf_id;
const char *ext_name, *sec_name;
Elf_Scn *scn;
GElf_Shdr sh;
@@ -3156,6 +3212,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
if (elf_sec_hdr(obj, scn, &sh))
return -LIBBPF_ERRNO__FORMAT;
+ dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
+ if (dummy_var_btf_id < 0)
+ return dummy_var_btf_id;
+
n = sh.sh_size / sh.sh_entsize;
pr_debug("looking for externs among %d symbols...\n", n);
@@ -3200,6 +3260,11 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
sec_name = btf__name_by_offset(obj->btf, sec->name_off);
if (strcmp(sec_name, KCONFIG_SEC) == 0) {
+ if (btf_is_func(t)) {
+ pr_warn("extern function %s is unsupported under %s section\n",
+ ext->name, KCONFIG_SEC);
+ return -ENOTSUP;
+ }
kcfg_sec = sec;
ext->type = EXT_KCFG;
ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
@@ -3221,6 +3286,11 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return -ENOTSUP;
}
} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
+ if (btf_is_func(t) && ext->is_weak) {
+ pr_warn("extern weak function %s is unsupported\n",
+ ext->name);
+ return -ENOTSUP;
+ }
ksym_sec = sec;
ext->type = EXT_KSYM;
skip_mods_and_typedefs(obj->btf, t->type,
@@ -3247,7 +3317,14 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
* extern variables in DATASEC
*/
int int_btf_id = find_int_btf_id(obj->btf);
+ /* For extern function, a dummy_var added earlier
+ * will be used to replace the vs->type and
+ * its name string will be used to refill
+ * the missing param's name.
+ */
+ const struct btf_type *dummy_var;
+ dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
if (ext->type != EXT_KSYM)
@@ -3266,12 +3343,32 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
ext_name = btf__name_by_offset(obj->btf, vt->name_off);
ext = find_extern_by_name(obj, ext_name);
if (!ext) {
- pr_warn("failed to find extern definition for BTF var '%s'\n",
- ext_name);
+ pr_warn("failed to find extern definition for BTF %s '%s'\n",
+ btf_kind_str(vt), ext_name);
return -ESRCH;
}
- btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
- vt->type = int_btf_id;
+ if (btf_is_func(vt)) {
+ const struct btf_type *func_proto;
+ struct btf_param *param;
+ int j;
+
+ func_proto = btf__type_by_id(obj->btf,
+ vt->type);
+ param = btf_params(func_proto);
+ /* Reuse the dummy_var string if the
+ * func proto does not have param name.
+ */
+ for (j = 0; j < btf_vlen(func_proto); j++)
+ if (param[j].type && !param[j].name_off)
+ param[j].name_off =
+ dummy_var->name_off;
+ vs->type = dummy_var_btf_id;
+ vt->info &= ~0xffff;
+ vt->info |= BTF_FUNC_GLOBAL;
+ } else {
+ btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ vt->type = int_btf_id;
+ }
vs->offset = off;
vs->size = sizeof(int);
}
@@ -3403,31 +3500,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
reloc_desc->processed = false;
- /* sub-program call relocation */
- if (insn->code == (BPF_JMP | BPF_CALL)) {
- if (insn->src_reg != BPF_PSEUDO_CALL) {
- pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
- return -LIBBPF_ERRNO__RELOC;
- }
- /* text_shndx can be 0, if no default "main" program exists */
- if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
- sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
- pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
- prog->name, sym_name, sym_sec_name);
- return -LIBBPF_ERRNO__RELOC;
- }
- if (sym->st_value % BPF_INSN_SZ) {
- pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
- prog->name, sym_name, (size_t)sym->st_value);
- return -LIBBPF_ERRNO__RELOC;
- }
- reloc_desc->type = RELO_CALL;
- reloc_desc->insn_idx = insn_idx;
- reloc_desc->sym_off = sym->st_value;
- return 0;
- }
-
- if (!is_ldimm64(insn)) {
+ if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
prog->name, sym_name, insn_idx, insn->code);
return -LIBBPF_ERRNO__RELOC;
@@ -3450,12 +3523,39 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
}
pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
prog->name, i, ext->name, ext->sym_idx, insn_idx);
- reloc_desc->type = RELO_EXTERN;
+ if (insn->code == (BPF_JMP | BPF_CALL))
+ reloc_desc->type = RELO_EXTERN_FUNC;
+ else
+ reloc_desc->type = RELO_EXTERN_VAR;
reloc_desc->insn_idx = insn_idx;
reloc_desc->sym_off = i; /* sym_off stores extern index */
return 0;
}
+ /* sub-program call relocation */
+ if (is_call_insn(insn)) {
+ if (insn->src_reg != BPF_PSEUDO_CALL) {
+ pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ /* text_shndx can be 0, if no default "main" program exists */
+ if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+ pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
+ prog->name, sym_name, sym_sec_name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ if (sym->st_value % BPF_INSN_SZ) {
+ pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
+ prog->name, sym_name, (size_t)sym->st_value);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ reloc_desc->type = RELO_CALL;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+ }
+
if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
prog->name, sym_name, shdr_idx);
@@ -5695,7 +5795,7 @@ poison:
/* poison second part of ldimm64 to avoid confusing error from
* verifier about "unknown opcode 00"
*/
- if (is_ldimm64(insn))
+ if (is_ldimm64_insn(insn))
bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
return 0;
@@ -5771,7 +5871,7 @@ poison:
case BPF_LD: {
__u64 imm;
- if (!is_ldimm64(insn) ||
+ if (!is_ldimm64_insn(insn) ||
insn[0].src_reg != 0 || insn[0].off != 0 ||
insn_idx + 1 >= prog->insns_cnt ||
insn[1].code != 0 || insn[1].dst_reg != 0 ||
@@ -6213,7 +6313,7 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
insn[0].imm = obj->maps[relo->map_idx].fd;
relo->processed = true;
break;
- case RELO_EXTERN:
+ case RELO_EXTERN_VAR:
ext = &obj->externs[relo->sym_off];
if (ext->type == EXT_KCFG) {
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
@@ -6231,6 +6331,12 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
}
relo->processed = true;
break;
+ case RELO_EXTERN_FUNC:
+ ext = &obj->externs[relo->sym_off];
+ insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
+ insn[0].imm = ext->ksym.kernel_btf_id;
+ relo->processed = true;
+ break;
case RELO_SUBPROG_ADDR:
insn[0].src_reg = BPF_PSEUDO_FUNC;
/* will be handled as a follow up pass */
@@ -7351,6 +7457,7 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
{
char sym_type, sym_name[500];
unsigned long long sym_addr;
+ const struct btf_type *t;
struct extern_desc *ext;
int ret, err = 0;
FILE *f;
@@ -7377,6 +7484,10 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
if (!ext || ext->type != EXT_KSYM)
continue;
+ t = btf__type_by_id(obj->btf, ext->btf_id);
+ if (!btf_is_var(t))
+ continue;
+
if (ext->is_set && ext->ksym.addr != sym_addr) {
pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
sym_name, ext->ksym.addr, sym_addr);
@@ -7395,75 +7506,151 @@ out:
return err;
}
-static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
+ __u16 kind, struct btf **res_btf,
+ int *res_btf_fd)
{
- struct extern_desc *ext;
+ int i, id, btf_fd, err;
struct btf *btf;
- int i, j, id, btf_fd, err;
- for (i = 0; i < obj->nr_extern; i++) {
- const struct btf_type *targ_var, *targ_type;
- __u32 targ_type_id, local_type_id;
- const char *targ_var_name;
- int ret;
+ btf = obj->btf_vmlinux;
+ btf_fd = 0;
+ id = btf__find_by_name_kind(btf, ksym_name, kind);
- ext = &obj->externs[i];
- if (ext->type != EXT_KSYM || !ext->ksym.type_id)
- continue;
-
- btf = obj->btf_vmlinux;
- btf_fd = 0;
- id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
- if (id == -ENOENT) {
- err = load_module_btfs(obj);
- if (err)
- return err;
+ if (id == -ENOENT) {
+ err = load_module_btfs(obj);
+ if (err)
+ return err;
- for (j = 0; j < obj->btf_module_cnt; j++) {
- btf = obj->btf_modules[j].btf;
- /* we assume module BTF FD is always >0 */
- btf_fd = obj->btf_modules[j].fd;
- id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
- if (id != -ENOENT)
- break;
- }
- }
- if (id <= 0) {
- pr_warn("extern (ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
- ext->name);
- return -ESRCH;
+ for (i = 0; i < obj->btf_module_cnt; i++) {
+ btf = obj->btf_modules[i].btf;
+ /* we assume module BTF FD is always >0 */
+ btf_fd = obj->btf_modules[i].fd;
+ id = btf__find_by_name_kind(btf, ksym_name, kind);
+ if (id != -ENOENT)
+ break;
}
+ }
+ if (id <= 0) {
+ pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
+ __btf_kind_str(kind), ksym_name);
+ return -ESRCH;
+ }
- /* find local type_id */
- local_type_id = ext->ksym.type_id;
+ *res_btf = btf;
+ *res_btf_fd = btf_fd;
+ return id;
+}
- /* find target type_id */
- targ_var = btf__type_by_id(btf, id);
- targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
- targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
+static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
+ struct extern_desc *ext)
+{
+ const struct btf_type *targ_var, *targ_type;
+ __u32 targ_type_id, local_type_id;
+ const char *targ_var_name;
+ int id, btf_fd = 0, err;
+ struct btf *btf = NULL;
- ret = bpf_core_types_are_compat(obj->btf, local_type_id,
- btf, targ_type_id);
- if (ret <= 0) {
- const struct btf_type *local_type;
- const char *targ_name, *local_name;
+ id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd);
+ if (id < 0)
+ return id;
- local_type = btf__type_by_id(obj->btf, local_type_id);
- local_name = btf__name_by_offset(obj->btf, local_type->name_off);
- targ_name = btf__name_by_offset(btf, targ_type->name_off);
+ /* find local type_id */
+ local_type_id = ext->ksym.type_id;
- pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
- ext->name, local_type_id,
- btf_kind_str(local_type), local_name, targ_type_id,
- btf_kind_str(targ_type), targ_name);
- return -EINVAL;
- }
+ /* find target type_id */
+ targ_var = btf__type_by_id(btf, id);
+ targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
+ targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
+
+ err = bpf_core_types_are_compat(obj->btf, local_type_id,
+ btf, targ_type_id);
+ if (err <= 0) {
+ const struct btf_type *local_type;
+ const char *targ_name, *local_name;
- ext->is_set = true;
- ext->ksym.kernel_btf_obj_fd = btf_fd;
- ext->ksym.kernel_btf_id = id;
- pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
- ext->name, id, btf_kind_str(targ_var), targ_var_name);
+ local_type = btf__type_by_id(obj->btf, local_type_id);
+ local_name = btf__name_by_offset(obj->btf, local_type->name_off);
+ targ_name = btf__name_by_offset(btf, targ_type->name_off);
+
+ pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
+ ext->name, local_type_id,
+ btf_kind_str(local_type), local_name, targ_type_id,
+ btf_kind_str(targ_type), targ_name);
+ return -EINVAL;
+ }
+
+ ext->is_set = true;
+ ext->ksym.kernel_btf_obj_fd = btf_fd;
+ ext->ksym.kernel_btf_id = id;
+ pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
+ ext->name, id, btf_kind_str(targ_var), targ_var_name);
+
+ return 0;
+}
+
+static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
+ struct extern_desc *ext)
+{
+ int local_func_proto_id, kfunc_proto_id, kfunc_id;
+ const struct btf_type *kern_func;
+ struct btf *kern_btf = NULL;
+ int ret, kern_btf_fd = 0;
+
+ local_func_proto_id = ext->ksym.type_id;
+
+ kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC,
+ &kern_btf, &kern_btf_fd);
+ if (kfunc_id < 0) {
+ pr_warn("extern (func ksym) '%s': not found in kernel BTF\n",
+ ext->name);
+ return kfunc_id;
+ }
+
+ if (kern_btf != obj->btf_vmlinux) {
+ pr_warn("extern (func ksym) '%s': function in kernel module is not supported\n",
+ ext->name);
+ return -ENOTSUP;
+ }
+
+ kern_func = btf__type_by_id(kern_btf, kfunc_id);
+ kfunc_proto_id = kern_func->type;
+
+ ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
+ kern_btf, kfunc_proto_id);
+ if (ret <= 0) {
+ pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n",
+ ext->name, local_func_proto_id, kfunc_proto_id);
+ return -EINVAL;
+ }
+
+ ext->is_set = true;
+ ext->ksym.kernel_btf_obj_fd = kern_btf_fd;
+ ext->ksym.kernel_btf_id = kfunc_id;
+ pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n",
+ ext->name, kfunc_id);
+
+ return 0;
+}
+
+static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+{
+ const struct btf_type *t;
+ struct extern_desc *ext;
+ int i, err;
+
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KSYM || !ext->ksym.type_id)
+ continue;
+
+ t = btf__type_by_id(obj->btf, ext->btf_id);
+ if (btf_is_var(t))
+ err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
+ else
+ err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
+ if (err)
+ return err;
}
return 0;
}
@@ -8270,6 +8457,16 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
return obj->btf ? btf__fd(obj->btf) : -1;
}
+int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
+{
+ if (obj->loaded)
+ return -EINVAL;
+
+ obj->kern_version = kern_version;
+
+ return 0;
+}
+
int bpf_object__set_priv(struct bpf_object *obj, void *priv,
bpf_object_clear_priv_t clear_priv)
{
@@ -8458,7 +8655,7 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
return fd;
}
-enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
+enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog)
{
return prog->type;
}
@@ -8503,7 +8700,7 @@ BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
enum bpf_attach_type
-bpf_program__get_expected_attach_type(struct bpf_program *prog)
+bpf_program__get_expected_attach_type(const struct bpf_program *prog)
{
return prog->expected_attach_type;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index a1a424b9b8ff..f500621d28e5 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -143,6 +143,7 @@ LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
+LIBBPF_API int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version);
struct btf;
LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
@@ -361,12 +362,12 @@ LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
-LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
+LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
enum bpf_prog_type type);
LIBBPF_API enum bpf_attach_type
-bpf_program__get_expected_attach_type(struct bpf_program *prog);
+bpf_program__get_expected_attach_type(const struct bpf_program *prog);
LIBBPF_API void
bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 279ae861f568..f5990f7208ce 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -359,4 +359,5 @@ LIBBPF_0.4.0 {
bpf_linker__finalize;
bpf_linker__free;
bpf_linker__new;
+ bpf_object__set_kversion;
} LIBBPF_0.3.0;
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 5e0aa2f2c0ca..46b16cbdcda3 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -94,6 +94,7 @@ struct dst_sec {
int sec_sym_idx;
/* section's DATASEC variable info, emitted on BTF finalization */
+ bool has_btf;
int sec_var_cnt;
struct btf_var_secinfo *sec_vars;
@@ -1436,6 +1437,16 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
continue;
dst_sec = &linker->secs[src_sec->dst_id];
+ /* Mark section as having BTF regardless of the presence of
+ * variables. In some cases compiler might generate empty BTF
+ * with no variables information. E.g., when promoting local
+ * array/structure variable initial values and BPF object
+ * file otherwise has no read-only static variables in
+ * .rodata. We need to preserve such empty BTF and just set
+ * correct section size.
+ */
+ dst_sec->has_btf = true;
+
t = btf__type_by_id(obj->btf, src_sec->sec_type_id);
src_var = btf_var_secinfos(t);
n = btf_vlen(t);
@@ -1717,7 +1728,7 @@ static int finalize_btf(struct bpf_linker *linker)
for (i = 1; i < linker->sec_cnt; i++) {
struct dst_sec *sec = &linker->secs[i];
- if (!sec->sec_var_cnt)
+ if (!sec->has_btf)
continue;
id = btf__add_datasec(btf, sec->sec_name, sec->sec_sz);
@@ -1895,8 +1906,10 @@ static int finalize_btf_ext(struct bpf_linker *linker)
struct dst_sec *sec = &linker->secs[i];
sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->func_info);
- if (sz < 0)
- return sz;
+ if (sz < 0) {
+ err = sz;
+ goto out;
+ }
cur += sz;
}
@@ -1910,8 +1923,10 @@ static int finalize_btf_ext(struct bpf_linker *linker)
struct dst_sec *sec = &linker->secs[i];
sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->line_info);
- if (sz < 0)
- return sz;
+ if (sz < 0) {
+ err = sz;
+ goto out;
+ }
cur += sz;
}
@@ -1925,8 +1940,10 @@ static int finalize_btf_ext(struct bpf_linker *linker)
struct dst_sec *sec = &linker->secs[i];
sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->core_relo_info);
- if (sz < 0)
- return sz;
+ if (sz < 0) {
+ err = sz;
+ goto out;
+ }
cur += sz;
}
@@ -1937,8 +1954,10 @@ static int finalize_btf_ext(struct bpf_linker *linker)
if (err) {
linker->btf_ext = NULL;
pr_warn("failed to parse final .BTF.ext data: %d\n", err);
- return err;
+ goto out;
}
- return 0;
+out:
+ free(data);
+ return err;
}
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 526fc35c0b23..95da0e19f4a5 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -28,6 +28,7 @@
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/types.h>
+#include <linux/if_link.h>
#include "bpf.h"
#include "libbpf.h"
@@ -70,8 +71,10 @@ struct xsk_ctx {
int ifindex;
struct list_head list;
int prog_fd;
+ int link_fd;
int xsks_map_fd;
char ifname[IFNAMSIZ];
+ bool has_bpf_link;
};
struct xsk_socket {
@@ -409,7 +412,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
static const int log_buf_size = 16 * 1024;
struct xsk_ctx *ctx = xsk->ctx;
char log_buf[log_buf_size];
- int err, prog_fd;
+ int prog_fd;
/* This is the fallback C-program:
* SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
@@ -499,14 +502,41 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
return prog_fd;
}
- err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
- xsk->config.xdp_flags);
+ ctx->prog_fd = prog_fd;
+ return 0;
+}
+
+static int xsk_create_bpf_link(struct xsk_socket *xsk)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ struct xsk_ctx *ctx = xsk->ctx;
+ __u32 prog_id = 0;
+ int link_fd;
+ int err;
+
+ err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
if (err) {
- close(prog_fd);
+ pr_warn("getting XDP prog id failed\n");
return err;
}
- ctx->prog_fd = prog_fd;
+ /* if there's a netlink-based XDP prog loaded on interface, bail out
+ * and ask user to do the removal by himself
+ */
+ if (prog_id) {
+ pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
+ return -EINVAL;
+ }
+
+ opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
+
+ link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts);
+ if (link_fd < 0) {
+ pr_warn("bpf_link_create failed: %s\n", strerror(errno));
+ return link_fd;
+ }
+
+ ctx->link_fd = link_fd;
return 0;
}
@@ -625,7 +655,6 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
close(fd);
}
- err = 0;
if (ctx->xsks_map_fd == -1)
err = -ENOENT;
@@ -642,6 +671,98 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
&xsk->fd, 0);
}
+static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
+{
+ struct bpf_link_info link_info;
+ __u32 link_len;
+ __u32 id = 0;
+ int err;
+ int fd;
+
+ while (true) {
+ err = bpf_link_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT) {
+ err = 0;
+ break;
+ }
+ pr_warn("can't get next link: %s\n", strerror(errno));
+ break;
+ }
+
+ fd = bpf_link_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ pr_warn("can't get link by id (%u): %s\n", id, strerror(errno));
+ err = -errno;
+ break;
+ }
+
+ link_len = sizeof(struct bpf_link_info);
+ memset(&link_info, 0, link_len);
+ err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len);
+ if (err) {
+ pr_warn("can't get link info: %s\n", strerror(errno));
+ close(fd);
+ break;
+ }
+ if (link_info.type == BPF_LINK_TYPE_XDP) {
+ if (link_info.xdp.ifindex == ifindex) {
+ *link_fd = fd;
+ if (prog_id)
+ *prog_id = link_info.prog_id;
+ break;
+ }
+ }
+ close(fd);
+ }
+
+ return err;
+}
+
+static bool xsk_probe_bpf_link(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
+ .flags = XDP_FLAGS_SKB_MODE);
+ struct bpf_load_program_attr prog_attr;
+ struct bpf_insn insns[2] = {
+ BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
+ BPF_EXIT_INSN()
+ };
+ int prog_fd, link_fd = -1;
+ int ifindex_lo = 1;
+ bool ret = false;
+ int err;
+
+ err = xsk_link_lookup(ifindex_lo, NULL, &link_fd);
+ if (err)
+ return ret;
+
+ if (link_fd >= 0)
+ return true;
+
+ memset(&prog_attr, 0, sizeof(prog_attr));
+ prog_attr.prog_type = BPF_PROG_TYPE_XDP;
+ prog_attr.insns = insns;
+ prog_attr.insns_cnt = ARRAY_SIZE(insns);
+ prog_attr.license = "GPL";
+
+ prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0);
+ if (prog_fd < 0)
+ return ret;
+
+ link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts);
+ close(prog_fd);
+
+ if (link_fd >= 0) {
+ ret = true;
+ close(link_fd);
+ }
+
+ return ret;
+}
+
static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
{
char ifname[IFNAMSIZ];
@@ -663,64 +784,108 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
ctx->ifname[IFNAMSIZ - 1] = 0;
xsk->ctx = ctx;
+ xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
return 0;
}
-static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp,
- int *xsks_map_fd)
+static int xsk_init_xdp_res(struct xsk_socket *xsk,
+ int *xsks_map_fd)
{
- struct xsk_socket *xsk = _xdp;
struct xsk_ctx *ctx = xsk->ctx;
- __u32 prog_id = 0;
int err;
- err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
- xsk->config.xdp_flags);
+ err = xsk_create_bpf_maps(xsk);
if (err)
return err;
- if (!prog_id) {
- err = xsk_create_bpf_maps(xsk);
- if (err)
- return err;
+ err = xsk_load_xdp_prog(xsk);
+ if (err)
+ goto err_load_xdp_prog;
- err = xsk_load_xdp_prog(xsk);
- if (err) {
- goto err_load_xdp_prog;
- }
- } else {
- ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
- if (ctx->prog_fd < 0)
- return -errno;
- err = xsk_lookup_bpf_maps(xsk);
- if (err) {
- close(ctx->prog_fd);
- return err;
- }
- }
+ if (ctx->has_bpf_link)
+ err = xsk_create_bpf_link(xsk);
+ else
+ err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd,
+ xsk->config.xdp_flags);
- if (xsk->rx) {
- err = xsk_set_bpf_maps(xsk);
- if (err) {
- if (!prog_id) {
- goto err_set_bpf_maps;
- } else {
- close(ctx->prog_fd);
- return err;
- }
- }
- }
- if (xsks_map_fd)
- *xsks_map_fd = ctx->xsks_map_fd;
+ if (err)
+ goto err_attach_xdp_prog;
- return 0;
+ if (!xsk->rx)
+ return err;
+
+ err = xsk_set_bpf_maps(xsk);
+ if (err)
+ goto err_set_bpf_maps;
+
+ return err;
err_set_bpf_maps:
+ if (ctx->has_bpf_link)
+ close(ctx->link_fd);
+ else
+ bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
+err_attach_xdp_prog:
close(ctx->prog_fd);
- bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
err_load_xdp_prog:
xsk_delete_bpf_maps(xsk);
+ return err;
+}
+
+static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id)
+{
+ struct xsk_ctx *ctx = xsk->ctx;
+ int err;
+
+ ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (ctx->prog_fd < 0) {
+ err = -errno;
+ goto err_prog_fd;
+ }
+ err = xsk_lookup_bpf_maps(xsk);
+ if (err)
+ goto err_lookup_maps;
+
+ if (!xsk->rx)
+ return err;
+
+ err = xsk_set_bpf_maps(xsk);
+ if (err)
+ goto err_set_maps;
+
+ return err;
+
+err_set_maps:
+ close(ctx->xsks_map_fd);
+err_lookup_maps:
+ close(ctx->prog_fd);
+err_prog_fd:
+ if (ctx->has_bpf_link)
+ close(ctx->link_fd);
+ return err;
+}
+
+static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
+{
+ struct xsk_socket *xsk = _xdp;
+ struct xsk_ctx *ctx = xsk->ctx;
+ __u32 prog_id = 0;
+ int err;
+
+ if (ctx->has_bpf_link)
+ err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
+ else
+ err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
+
+ if (err)
+ return err;
+
+ err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) :
+ xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id);
+
+ if (!err && xsks_map_fd)
+ *xsks_map_fd = ctx->xsks_map_fd;
return err;
}
@@ -898,6 +1063,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
}
}
xsk->ctx = ctx;
+ xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
if (rx) {
err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
@@ -1054,6 +1220,8 @@ void xsk_socket__delete(struct xsk_socket *xsk)
if (ctx->prog_fd != -1) {
xsk_delete_bpf_maps(xsk);
close(ctx->prog_fd);
+ if (ctx->has_bpf_link)
+ close(ctx->link_fd);
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 3464161c8eea..65fe318d1e71 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -179,3 +179,17 @@ types, which was introduced in `Clang 13`__. The older Clang versions will
either crash when compiling these tests, or generate an incorrect BTF.
__ https://reviews.llvm.org/D83289
+
+Kernel function call test and Clang version
+===========================================
+
+Some selftests (e.g. kfunc_call and bpf_tcp_ca) require a LLVM support
+to generate extern function in BTF. It was introduced in `Clang 13`__.
+
+Without it, the error from compiling bpf selftests looks like:
+
+.. code-block:: console
+
+ libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2
+
+__ https://reviews.llvm.org/D93563
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 91f0fac632f4..029589c008c9 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -187,16 +187,6 @@ struct tcp_congestion_ops {
typeof(y) __y = (y); \
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
-static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked)
-{
- __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
-
- acked -= cwnd - tp->snd_cwnd;
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
-
- return acked;
-}
-
static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
return tp->snd_cwnd < tp->snd_ssthresh;
@@ -213,22 +203,7 @@ static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}
-static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked)
-{
- /* If credits accumulated at a higher w, apply them gently now. */
- if (tp->snd_cwnd_cnt >= w) {
- tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd++;
- }
-
- tp->snd_cwnd_cnt += acked;
- if (tp->snd_cwnd_cnt >= w) {
- __u32 delta = tp->snd_cwnd_cnt / w;
-
- tp->snd_cwnd_cnt -= delta * w;
- tp->snd_cwnd += delta;
- }
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
-}
+extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
+extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
#endif
diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
new file mode 100644
index 000000000000..2e986e5e4cac
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+struct test_lpm_key {
+ __u32 prefix;
+ struct in_addr ipv4;
+};
+
+static void map_batch_update(int map_fd, __u32 max_entries,
+ struct test_lpm_key *keys, int *values)
+{
+ __u32 i;
+ int err;
+ char buff[16] = { 0 };
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ for (i = 0; i < max_entries; i++) {
+ keys[i].prefix = 32;
+ snprintf(buff, 16, "192.168.1.%d", i + 1);
+ inet_pton(AF_INET, buff, &keys[i].ipv4);
+ values[i] = i + 1;
+ }
+
+ err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
+ CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
+}
+
+static void map_batch_verify(int *visited, __u32 max_entries,
+ struct test_lpm_key *keys, int *values)
+{
+ char buff[16] = { 0 };
+ int lower_byte = 0;
+ __u32 i;
+
+ memset(visited, 0, max_entries * sizeof(*visited));
+ for (i = 0; i < max_entries; i++) {
+ inet_ntop(AF_INET, &keys[i].ipv4, buff, 32);
+ CHECK(sscanf(buff, "192.168.1.%d", &lower_byte) == EOF,
+ "sscanf()", "error: i %d\n", i);
+ CHECK(lower_byte != values[i], "key/value checking",
+ "error: i %d key %s value %d\n", i, buff, values[i]);
+ visited[i] = 1;
+ }
+ for (i = 0; i < max_entries; i++) {
+ CHECK(visited[i] != 1, "visited checking",
+ "error: keys array at index %d missing\n", i);
+ }
+}
+
+void test_lpm_trie_map_batch_ops(void)
+{
+ struct bpf_create_map_attr xattr = {
+ .name = "lpm_trie_map",
+ .map_type = BPF_MAP_TYPE_LPM_TRIE,
+ .key_size = sizeof(struct test_lpm_key),
+ .value_size = sizeof(int),
+ .map_flags = BPF_F_NO_PREALLOC,
+ };
+ struct test_lpm_key *keys, key;
+ int map_fd, *values, *visited;
+ __u32 step, count, total, total_success;
+ const __u32 max_entries = 10;
+ __u64 batch = 0;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ xattr.max_entries = max_entries;
+ map_fd = bpf_create_map_xattr(&xattr);
+ CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n",
+ strerror(errno));
+
+ keys = malloc(max_entries * sizeof(struct test_lpm_key));
+ values = malloc(max_entries * sizeof(int));
+ visited = malloc(max_entries * sizeof(int));
+ CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
+ strerror(errno));
+
+ total_success = 0;
+ for (step = 1; step < max_entries; step++) {
+ map_batch_update(map_fd, max_entries, keys, values);
+ map_batch_verify(visited, max_entries, keys, values);
+ memset(keys, 0, max_entries * sizeof(*keys));
+ memset(values, 0, max_entries * sizeof(*values));
+ batch = 0;
+ total = 0;
+ /* iteratively lookup/delete elements with 'step'
+ * elements each.
+ */
+ count = step;
+ while (true) {
+ err = bpf_map_lookup_batch(map_fd,
+ total ? &batch : NULL, &batch,
+ keys + total, values + total, &count, &opts);
+
+ CHECK((err && errno != ENOENT), "lookup with steps",
+ "error: %s\n", strerror(errno));
+
+ total += count;
+ if (err)
+ break;
+ }
+
+ CHECK(total != max_entries, "lookup with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ map_batch_verify(visited, max_entries, keys, values);
+
+ total = 0;
+ count = step;
+ while (total < max_entries) {
+ if (max_entries - total < step)
+ count = max_entries - total;
+ err = bpf_map_delete_batch(map_fd, keys + total, &count,
+ &opts);
+ CHECK((err && errno != ENOENT), "delete batch",
+ "error: %s\n", strerror(errno));
+ total += count;
+ if (err)
+ break;
+ }
+ CHECK(total != max_entries, "delete with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ /* check map is empty, errono == ENOENT */
+ err = bpf_map_get_next_key(map_fd, NULL, &key);
+ CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()",
+ "error: %s\n", strerror(errno));
+
+ total_success++;
+ }
+
+ CHECK(total_success == 0, "check total_success",
+ "unexpected failure\n");
+
+ printf("%s:PASS\n", __func__);
+
+ free(keys);
+ free(values);
+ free(visited);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
new file mode 100644
index 000000000000..7fc0951ee75f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test_subprog.skel.h"
+
+static void test_main(void)
+{
+ struct kfunc_call_test *skel;
+ int prog_fd, retval, err;
+
+ skel = kfunc_call_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, (__u32 *)&retval, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run(test1)");
+ ASSERT_EQ(retval, 12, "test1-retval");
+
+ prog_fd = bpf_program__fd(skel->progs.kfunc_call_test2);
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, (__u32 *)&retval, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run(test2)");
+ ASSERT_EQ(retval, 3, "test2-retval");
+
+ kfunc_call_test__destroy(skel);
+}
+
+static void test_subprog(void)
+{
+ struct kfunc_call_test_subprog *skel;
+ int prog_fd, retval, err;
+
+ skel = kfunc_call_test_subprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, (__u32 *)&retval, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run(test1)");
+ ASSERT_EQ(retval, 10, "test1-retval");
+ ASSERT_NEQ(skel->data->active_res, -1, "active_res");
+ ASSERT_EQ(skel->data->sk_state, BPF_TCP_CLOSE, "sk_state");
+
+ kfunc_call_test_subprog__destroy(skel);
+}
+
+void test_kfunc_call(void)
+{
+ if (test__start_subtest("main"))
+ test_main();
+
+ if (test__start_subtest("subprog"))
+ test_subprog();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index b8b48cac2ac3..ab77596b64e3 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -7,6 +7,7 @@
#include "test_skmsg_load_helpers.skel.h"
#include "test_sockmap_update.skel.h"
#include "test_sockmap_invalid_update.skel.h"
+#include "test_sockmap_skb_verdict_attach.skel.h"
#include "bpf_iter_sockmap.skel.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
@@ -281,6 +282,39 @@ out:
bpf_iter_sockmap__destroy(skel);
}
+static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
+ enum bpf_attach_type second)
+{
+ struct test_sockmap_skb_verdict_attach *skel;
+ int err, map, verdict;
+
+ skel = test_sockmap_skb_verdict_attach__open_and_load();
+ if (CHECK_FAIL(!skel)) {
+ perror("test_sockmap_skb_verdict_attach__open_and_load");
+ return;
+ }
+
+ verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ map = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(verdict, map, first, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach");
+ goto out;
+ }
+
+ err = bpf_prog_attach(verdict, map, second, 0);
+ assert(err == -1 && errno == EBUSY);
+
+ err = bpf_prog_detach2(verdict, map, first);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_detach2");
+ goto out;
+ }
+out:
+ test_sockmap_skb_verdict_attach__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -301,4 +335,10 @@ void test_sockmap_basic(void)
test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash copy"))
test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap skb_verdict attach")) {
+ test_sockmap_skb_verdict_attach(BPF_SK_SKB_VERDICT,
+ BPF_SK_SKB_STREAM_VERDICT);
+ test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
+ BPF_SK_SKB_VERDICT);
+ }
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index c26e6bf05e49..648d9ae898d2 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1603,6 +1603,141 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
+static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ struct sockaddr_storage addr;
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ socklen_t len;
+ int err, n;
+ u64 value;
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ p0 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (p0 < 0)
+ return;
+ len = sizeof(addr);
+ err = xgetsockname(p0, sockaddr(&addr), &len);
+ if (err)
+ goto close_peer0;
+
+ c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
+ if (c0 < 0)
+ goto close_peer0;
+ err = xconnect(c0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+ err = xgetsockname(c0, sockaddr(&addr), &len);
+ if (err)
+ goto close_cli0;
+ err = xconnect(p0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+
+ p1 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (p1 < 0)
+ goto close_cli0;
+ err = xgetsockname(p1, sockaddr(&addr), &len);
+ if (err)
+ goto close_cli0;
+
+ c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
+ if (c1 < 0)
+ goto close_peer1;
+ err = xconnect(c1, sockaddr(&addr), len);
+ if (err)
+ goto close_cli1;
+ err = xgetsockname(c1, sockaddr(&addr), &len);
+ if (err)
+ goto close_cli1;
+ err = xconnect(p1, sockaddr(&addr), len);
+ if (err)
+ goto close_cli1;
+
+ key = 0;
+ value = p0;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_cli1;
+
+ key = 1;
+ value = p1;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_cli1;
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close_cli1;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close_cli1;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+ n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: read", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close_cli1:
+ xclose(c1);
+close_peer1:
+ xclose(p1);
+close_cli0:
+ xclose(c0);
+close_peer0:
+ xclose(p0);
+}
+
+static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ udp_skb_redir_to_connected(skel, map, family);
+}
+
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
@@ -1611,6 +1746,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_redir(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
+ test_udp_redir(skel, map, family);
}
void test_sockmap_listen(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index b54bc0c351b7..0252f61d611a 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -68,7 +68,8 @@ void test_test_ima(void)
goto close_prog;
snprintf(cmd, sizeof(cmd), "./ima_setup.sh setup %s", measured_dir);
- if (CHECK_FAIL(system(cmd)))
+ err = system(cmd);
+ if (CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno))
goto close_clean;
err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
@@ -81,7 +82,8 @@ void test_test_ima(void)
close_clean:
snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir);
- CHECK_FAIL(system(cmd));
+ err = system(cmd);
+ CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno);
close_prog:
ima__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index 6939bfd8690f..f62df4d023f9 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -174,8 +174,8 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
* as long as it is used in one of the func ptr
* under SEC(".struct_ops").
*/
-SEC("struct_ops/bictcp_init")
-void BPF_PROG(bictcp_init, struct sock *sk)
+SEC("struct_ops/bpf_cubic_init")
+void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
struct bictcp *ca = inet_csk_ca(sk);
@@ -192,7 +192,7 @@ void BPF_PROG(bictcp_init, struct sock *sk)
* The remaining tcp-cubic functions have an easier way.
*/
SEC("no-sec-prefix-bictcp_cwnd_event")
-void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
struct bictcp *ca = inet_csk_ca(sk);
@@ -384,7 +384,7 @@ tcp_friendliness:
}
/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */
-void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -403,7 +403,7 @@ void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
-__u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
+__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -420,7 +420,7 @@ __u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}
-void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state)
+void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Loss) {
bictcp_reset(inet_csk_ca(sk));
@@ -496,7 +496,7 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)
}
}
-void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
+void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -525,21 +525,21 @@ void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
hystart_update(sk, delay);
}
-__u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
+extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
- return max(tp->snd_cwnd, tp->prior_cwnd);
+__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk)
+{
+ return tcp_reno_undo_cwnd(sk);
}
SEC(".struct_ops")
struct tcp_congestion_ops cubic = {
- .init = (void *)bictcp_init,
- .ssthresh = (void *)bictcp_recalc_ssthresh,
- .cong_avoid = (void *)bictcp_cong_avoid,
- .set_state = (void *)bictcp_state,
- .undo_cwnd = (void *)tcp_reno_undo_cwnd,
- .cwnd_event = (void *)bictcp_cwnd_event,
- .pkts_acked = (void *)bictcp_acked,
+ .init = (void *)bpf_cubic_init,
+ .ssthresh = (void *)bpf_cubic_recalc_ssthresh,
+ .cong_avoid = (void *)bpf_cubic_cong_avoid,
+ .set_state = (void *)bpf_cubic_state,
+ .undo_cwnd = (void *)bpf_cubic_undo_cwnd,
+ .cwnd_event = (void *)bpf_cubic_cwnd_event,
+ .pkts_acked = (void *)bpf_cubic_acked,
.name = "bpf_cubic",
};
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 4dc1a967776a..fd42247da8b4 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -194,22 +194,12 @@ __u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
}
-SEC("struct_ops/tcp_reno_cong_avoid")
-void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (!tcp_is_cwnd_limited(sk))
- return;
+extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
- /* In "safe" area, increase. */
- if (tcp_in_slow_start(tp)) {
- acked = tcp_slow_start(tp, acked);
- if (!acked)
- return;
- }
- /* In dangerous area, increase slowly. */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+SEC("struct_ops/dctcp_reno_cong_avoid")
+void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+{
+ tcp_reno_cong_avoid(sk, ack, acked);
}
SEC(".struct_ops")
@@ -226,7 +216,7 @@ struct tcp_congestion_ops dctcp = {
.in_ack_event = (void *)dctcp_update_alpha,
.cwnd_event = (void *)dctcp_cwnd_event,
.ssthresh = (void *)dctcp_ssthresh,
- .cong_avoid = (void *)tcp_reno_cong_avoid,
+ .cong_avoid = (void *)dctcp_cong_avoid,
.undo_cwnd = (void *)dctcp_cwnd_undo,
.set_state = (void *)dctcp_state,
.flags = TCP_CONG_NEEDS_ECN,
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
new file mode 100644
index 000000000000..470f8723e463
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
+extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
+ __u32 c, __u64 d) __ksym;
+
+SEC("classifier")
+int kfunc_call_test2(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ return bpf_kfunc_call_test2((struct sock *)sk, 1, 2);
+}
+
+SEC("classifier")
+int kfunc_call_test1(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ __u64 a = 1ULL << 32;
+ __u32 ret;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ a = bpf_kfunc_call_test1((struct sock *)sk, 1, a | 2, 3, a | 4);
+ ret = a >> 32; /* ret should be 2 */
+ ret += (__u32)a; /* ret should be 12 */
+
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
new file mode 100644
index 000000000000..b2dcb7d9cb03
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+extern const int bpf_prog_active __ksym;
+extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
+ __u32 c, __u64 d) __ksym;
+extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
+int active_res = -1;
+int sk_state = -1;
+
+int __noinline f1(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ int *active;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active,
+ bpf_get_smp_processor_id());
+ if (active)
+ active_res = *active;
+
+ sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
+
+ return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
+}
+
+SEC("classifier")
+int kfunc_call_test1(struct __sk_buff *skb)
+{
+ return f1(skb);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index fa221141e9c1..a39eba9f5201 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -29,6 +29,7 @@ struct {
} verdict_map SEC(".maps");
static volatile bool test_sockmap; /* toggled by user-space */
+static volatile bool test_ingress; /* toggled by user-space */
SEC("sk_skb/stream_parser")
int prog_stream_parser(struct __sk_buff *skb)
@@ -55,6 +56,27 @@ int prog_stream_verdict(struct __sk_buff *skb)
return verdict;
}
+SEC("sk_skb/skb_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ unsigned int *count;
+ __u32 zero = 0;
+ int verdict;
+
+ if (test_sockmap)
+ verdict = bpf_sk_redirect_map(skb, &sock_map, zero,
+ test_ingress ? BPF_F_INGRESS : 0);
+ else
+ verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero,
+ test_ingress ? BPF_F_INGRESS : 0);
+
+ count = bpf_map_lookup_elem(&verdict_map, &verdict);
+ if (count)
+ (*count)++;
+
+ return verdict;
+}
+
SEC("sk_msg")
int prog_msg_verdict(struct sk_msg_md *msg)
{
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
new file mode 100644
index 000000000000..2d31f66e4f23
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb/skb_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 56d4474e2c83..46633a3bfb0b 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -107,7 +107,7 @@ setup_vethPairs() {
echo "setting up ${VETH0}: namespace: ${NS0}"
fi
ip netns add ${NS1}
- ip link add ${VETH0} type veth peer name ${VETH1}
+ ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4
if [ -f /proc/net/if_inet6 ]; then
echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
fi
@@ -118,6 +118,7 @@ setup_vethPairs() {
ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
ip link set ${VETH0} mtu ${MTU}
ip netns exec ${NS1} ip link set ${VETH1} up
+ ip netns exec ${NS1} ip link set dev lo up
ip link set ${VETH0} up
}
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index eb888c8479c3..336a749673d1 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -19,7 +19,7 @@
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 1,
@@ -136,7 +136,7 @@
{
"calls: wrong src reg",
.insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 3, 0, 0),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
@@ -397,7 +397,7 @@
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.fixup_map_hash_48b = { 3 },
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -1977,7 +1977,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
},
@@ -2003,7 +2003,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
@@ -2028,7 +2028,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 5cf361d8eb1c..17fe33a75034 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -85,7 +85,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -103,7 +103,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -121,7 +121,7 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -137,7 +137,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
@@ -152,7 +152,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 22554894db99..8889b3f55236 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -24,15 +24,15 @@ EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status"
usage()
{
cat <<EOF
-Usage: $0 [-i] [-d <output_dir>] -- [<command>]
+Usage: $0 [-i] [-s] [-d <output_dir>] -- [<command>]
<command> is the command you would normally run when you are in
tools/testing/selftests/bpf. e.g:
$0 -- ./test_progs -t test_lsm
-If no command is specified, "${DEFAULT_COMMAND}" will be run by
-default.
+If no command is specified and a debug shell (-s) is not requested,
+"${DEFAULT_COMMAND}" will be run by default.
If you build your kernel using KBUILD_OUTPUT= or O= options, these
can be passed as environment variables to the script:
@@ -49,6 +49,9 @@ Options:
-d) Update the output directory (default: ${OUTPUT_DIR})
-j) Number of jobs for compilation, similar to -j in make
(default: ${NUM_COMPILE_JOBS})
+ -s) Instead of powering off the VM, start an interactive
+ shell. If <command> is specified, the shell runs after
+ the command finishes executing
EOF
}
@@ -149,6 +152,7 @@ update_init_script()
local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d"
local init_script="${init_script_dir}/S50-startup"
local command="$1"
+ local exit_command="$2"
mount_image
@@ -162,9 +166,10 @@ EOF
fi
- sudo bash -c "cat >${init_script}" <<EOF
-#!/bin/bash
+ sudo bash -c "echo '#!/bin/bash' > ${init_script}"
+ if [[ "${command}" != "" ]]; then
+ sudo bash -c "cat >>${init_script}" <<EOF
# Have a default value in the exit status file
# incase the VM is forcefully stopped.
echo "130" > "/root/${EXIT_STATUS_FILE}"
@@ -175,9 +180,12 @@ echo "130" > "/root/${EXIT_STATUS_FILE}"
stdbuf -oL -eL ${command}
echo "\$?" > "/root/${EXIT_STATUS_FILE}"
} 2>&1 | tee "/root/${LOG_FILE}"
-poweroff -f
+# Ensure that the logs are written to disk
+sync
EOF
+ fi
+ sudo bash -c "echo ${exit_command} >> ${init_script}"
sudo chmod a+x "${init_script}"
unmount_image
}
@@ -277,8 +285,10 @@ main()
local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}"
local command="${DEFAULT_COMMAND}"
local update_image="no"
+ local exit_command="poweroff -f"
+ local debug_shell="no"
- while getopts 'hkid:j:' opt; do
+ while getopts 'hskid:j:' opt; do
case ${opt} in
i)
update_image="yes"
@@ -289,6 +299,11 @@ main()
j)
NUM_COMPILE_JOBS="$OPTARG"
;;
+ s)
+ command=""
+ debug_shell="yes"
+ exit_command="bash"
+ ;;
h)
usage
exit 0
@@ -307,7 +322,7 @@ main()
done
shift $((OPTIND -1))
- if [[ $# -eq 0 ]]; then
+ if [[ $# -eq 0 && "${debug_shell}" == "no" ]]; then
echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
else
command="$@"
@@ -355,10 +370,12 @@ main()
fi
update_selftests "${kernel_checkout}" "${make_command}"
- update_init_script "${command}"
+ update_init_script "${command}" "${exit_command}"
run_vm "${kernel_bzimage}"
- copy_logs
- echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
+ if [[ "${command}" != "" ]]; then
+ copy_logs
+ echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
+ fi
}
catch()
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 1e21a3172687..1135fb980814 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -41,8 +41,12 @@
* Reduce the size of the RX ring to a fraction of the fill ring size.
* iv. fill queue empty
* Do not populate the fill queue and then try to receive pkts.
+ * f. bpf_link resource persistence
+ * Configure sockets at indexes 0 and 1, run a traffic on queue ids 0,
+ * then remove xsk sockets from queue 0 on both veth interfaces and
+ * finally run a traffic on queues ids 1
*
- * Total tests: 10
+ * Total tests: 12
*
* Flow:
* -----
@@ -93,6 +97,13 @@ typedef __u16 __sum16;
#include "xdpxceiver.h"
#include "../kselftest.h"
+static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
+static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
+static const char *IP1 = "192.168.100.162";
+static const char *IP2 = "192.168.100.161";
+static const u16 UDP_PORT1 = 2020;
+static const u16 UDP_PORT2 = 2121;
+
static void __exit_with_error(int error, const char *file, const char *func, int line)
{
if (configured_mode == TEST_MODE_UNCONFIGURED) {
@@ -108,27 +119,12 @@ static void __exit_with_error(int error, const char *file, const char *func, int
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
#define print_ksft_result(void)\
- (ksft_test_result_pass("PASS: %s %s %s%s%s\n", configured_mode ? "DRV" : "SKB",\
+ (ksft_test_result_pass("PASS: %s %s %s%s%s%s\n", configured_mode ? "DRV" : "SKB",\
test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\
test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\
test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\
- test_type == TEST_TYPE_STATS ? "Stats" : ""))
-
-static void pthread_init_mutex(void)
-{
- pthread_mutex_init(&sync_mutex, NULL);
- pthread_mutex_init(&sync_mutex_tx, NULL);
- pthread_cond_init(&signal_rx_condition, NULL);
- pthread_cond_init(&signal_tx_condition, NULL);
-}
-
-static void pthread_destroy_mutex(void)
-{
- pthread_mutex_destroy(&sync_mutex);
- pthread_mutex_destroy(&sync_mutex_tx);
- pthread_cond_destroy(&signal_rx_condition);
- pthread_cond_destroy(&signal_tx_condition);
-}
+ test_type == TEST_TYPE_STATS ? "Stats" : "",\
+ test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
static void *memset32_htonl(void *dest, u32 val, u32 size)
{
@@ -147,24 +143,11 @@ static void *memset32_htonl(void *dest, u32 val, u32 size)
}
/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static inline unsigned short from32to16(unsigned int x)
-{
- /* add up 16-bit and 16-bit for 16+c bit */
- x = (x & 0xffff) + (x >> 16);
- /* add up carry.. */
- x = (x & 0xffff) + (x >> 16);
- return x;
-}
-
-/*
* Fold a partial checksum
* This function code has been taken from
* Linux kernel include/asm-generic/checksum.h
*/
-static inline __u16 csum_fold(__u32 csum)
+static __u16 csum_fold(__u32 csum)
{
u32 sum = (__force u32)csum;
@@ -177,7 +160,7 @@ static inline __u16 csum_fold(__u32 csum)
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
-static inline u32 from64to32(u64 x)
+static u32 from64to32(u64 x)
{
/* add up 32-bit and 32-bit for 32+c bit */
x = (x & 0xffffffff) + (x >> 32);
@@ -186,13 +169,11 @@ static inline u32 from64to32(u64 x)
return (u32)x;
}
-__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum);
-
/*
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
-__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
{
unsigned long long s = (__force u32)sum;
@@ -210,13 +191,12 @@ __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u3
* This function has been taken from
* Linux kernel include/asm-generic/checksum.h
*/
-static inline __u16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
{
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
}
-static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
+static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
{
u32 csum = 0;
u32 cnt = 0;
@@ -271,9 +251,8 @@ static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
}
-static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
+static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
{
- int ret;
struct xsk_umem_config cfg = {
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
@@ -281,17 +260,22 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
.frame_headroom = frame_headroom,
.flags = XSK_UMEM__DEFAULT_FLAGS
};
+ int size = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ struct xsk_umem_info *umem;
+ int ret;
- data->umem = calloc(1, sizeof(struct xsk_umem_info));
- if (!data->umem)
+ umem = calloc(1, sizeof(struct xsk_umem_info));
+ if (!umem)
exit_with_error(errno);
- ret = xsk_umem__create(&data->umem->umem, buffer, size,
- &data->umem->fq, &data->umem->cq, &cfg);
+ ret = xsk_umem__create(&umem->umem, buffer, size,
+ &umem->fq, &umem->cq, &cfg);
if (ret)
exit_with_error(ret);
- data->umem->buffer = buffer;
+ umem->buffer = buffer;
+
+ data->umem_arr[idx] = umem;
}
static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
@@ -307,18 +291,19 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
}
-static int xsk_configure_socket(struct ifobject *ifobject)
+static int xsk_configure_socket(struct ifobject *ifobject, int idx)
{
struct xsk_socket_config cfg;
+ struct xsk_socket_info *xsk;
struct xsk_ring_cons *rxr;
struct xsk_ring_prod *txr;
int ret;
- ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info));
- if (!ifobject->xsk)
+ xsk = calloc(1, sizeof(struct xsk_socket_info));
+ if (!xsk)
exit_with_error(errno);
- ifobject->xsk->umem = ifobject->umem;
+ xsk->umem = ifobject->umem;
cfg.rx_size = rxqsize;
cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
cfg.libbpf_flags = 0;
@@ -326,19 +311,20 @@ static int xsk_configure_socket(struct ifobject *ifobject)
cfg.bind_flags = xdp_bind_flags;
if (test_type != TEST_TYPE_BIDI) {
- rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
- txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+ rxr = (ifobject->fv.vector == rx) ? &xsk->rx : NULL;
+ txr = (ifobject->fv.vector == tx) ? &xsk->tx : NULL;
} else {
- rxr = &ifobject->xsk->rx;
- txr = &ifobject->xsk->tx;
+ rxr = &xsk->rx;
+ txr = &xsk->tx;
}
- ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname,
- opt_queue, ifobject->umem->umem, rxr, txr, &cfg);
-
+ ret = xsk_socket__create(&xsk->xsk, ifobject->ifname, idx,
+ ifobject->umem->umem, rxr, txr, &cfg);
if (ret)
return 1;
+ ifobject->xsk_arr[idx] = xsk;
+
return 0;
}
@@ -364,12 +350,15 @@ static void usage(const char *prog)
ksft_print_msg(str, prog);
}
-static bool switch_namespace(int idx)
+static int switch_namespace(const char *nsname)
{
char fqns[26] = "/var/run/netns/";
int nsfd;
- strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1);
+ if (!nsname || strlen(nsname) == 0)
+ return -1;
+
+ strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1);
nsfd = open(fqns, O_RDONLY);
if (nsfd == -1)
@@ -378,26 +367,9 @@ static bool switch_namespace(int idx)
if (setns(nsfd, 0) == -1)
exit_with_error(errno);
- return true;
-}
+ print_verbose("NS switched: %s\n", nsname);
-static void *nsswitchthread(void *args)
-{
- struct targs *targs = args;
-
- targs->retptr = false;
-
- if (switch_namespace(targs->idx)) {
- ifdict[targs->idx]->ifindex = if_nametoindex(ifdict[targs->idx]->ifname);
- if (!ifdict[targs->idx]->ifindex) {
- ksft_test_result_fail("ERROR: [%s] interface \"%s\" does not exist\n",
- __func__, ifdict[targs->idx]->ifname);
- } else {
- print_verbose("Interface found: %s\n", ifdict[targs->idx]->ifname);
- targs->retptr = true;
- }
- }
- pthread_exit(NULL);
+ return nsfd;
}
static int validate_interfaces(void)
@@ -409,33 +381,6 @@ static int validate_interfaces(void)
ret = false;
ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
}
- if (strcmp(ifdict[i]->nsname, "")) {
- struct targs *targs;
-
- targs = malloc(sizeof(*targs));
- if (!targs)
- exit_with_error(errno);
-
- targs->idx = i;
- if (pthread_create(&ns_thread, NULL, nsswitchthread, targs))
- exit_with_error(errno);
-
- pthread_join(ns_thread, NULL);
-
- if (targs->retptr)
- print_verbose("NS switched: %s\n", ifdict[i]->nsname);
-
- free(targs);
- } else {
- ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname);
- if (!ifdict[i]->ifindex) {
- ksft_test_result_fail
- ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname);
- ret = false;
- } else {
- print_verbose("Interface found: %s\n", ifdict[i]->ifname);
- }
- }
}
return ret;
}
@@ -447,7 +392,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "i:q:DC:v", long_options, &option_index);
+ c = getopt_long(argc, argv, "i:DC:v", long_options, &option_index);
if (c == -1)
break;
@@ -467,9 +412,6 @@ static void parse_command_line(int argc, char **argv)
MAX_INTERFACES_NAMESPACE_CHARS);
interface_index++;
break;
- case 'q':
- opt_queue = atoi(optarg);
- break;
case 'D':
debug_pkt_dump = 1;
break;
@@ -506,7 +448,7 @@ static void kick_tx(struct xsk_socket_info *xsk)
exit_with_error(errno);
}
-static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
{
unsigned int rcvd;
u32 idx;
@@ -514,7 +456,7 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
if (!xsk->outstanding_tx)
return;
- if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
+ if (xsk_ring_prod__needs_wakeup(&xsk->tx))
kick_tx(xsk);
rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
@@ -602,16 +544,15 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
xsk_ring_prod__submit(&xsk->tx, batch_size);
if (!tx_invalid_test) {
xsk->outstanding_tx += batch_size;
- } else {
- if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
- kick_tx(xsk);
+ } else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+ kick_tx(xsk);
}
*frameptr += batch_size;
*frameptr %= num_frames;
complete_tx_only(xsk, batch_size);
}
-static inline int get_batch_size(int pkt_cnt)
+static int get_batch_size(int pkt_cnt)
{
if (!opt_pkt_count)
return BATCH_SIZE;
@@ -667,45 +608,40 @@ static void tx_only_all(struct ifobject *ifobject)
static void worker_pkt_dump(void)
{
- struct in_addr ipaddr;
+ struct ethhdr *ethhdr;
+ struct iphdr *iphdr;
+ struct udphdr *udphdr;
+ char s[128];
+ int payload;
+ void *ptr;
fprintf(stdout, "---------------------------------------\n");
for (int iter = 0; iter < num_frames - 1; iter++) {
+ ptr = pkt_buf[iter]->payload;
+ ethhdr = ptr;
+ iphdr = ptr + sizeof(*ethhdr);
+ udphdr = ptr + sizeof(*ethhdr) + sizeof(*iphdr);
+
/*extract L2 frame */
fprintf(stdout, "DEBUG>> L2: dst mac: ");
for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ((struct ethhdr *)
- pkt_buf[iter]->payload)->h_dest[i]);
+ fprintf(stdout, "%02X", ethhdr->h_dest[i]);
fprintf(stdout, "\nDEBUG>> L2: src mac: ");
for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ((struct ethhdr *)
- pkt_buf[iter]->payload)->h_source[i]);
+ fprintf(stdout, "%02X", ethhdr->h_source[i]);
/*extract L3 frame */
- fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n",
- ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl);
-
- ipaddr.s_addr =
- ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr;
- fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr));
-
- ipaddr.s_addr =
- ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr;
- fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr));
-
+ fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
/*extract L4 frame */
- fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n",
- ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
- sizeof(struct ethhdr) +
- sizeof(struct iphdr)))->source));
-
- fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n",
- ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
- sizeof(struct ethhdr) +
- sizeof(struct iphdr)))->dest));
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
/*extract L5 frame */
- int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
+ payload = *((uint32_t *)(ptr + PKT_HDR_SIZE));
if (payload == EOT) {
print_verbose("End-of-transmission frame received\n");
@@ -809,37 +745,69 @@ static void worker_pkt_validate(void)
}
}
-static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mutex_t *mutexptr,
- atomic_int *spinningptr)
+static void thread_common_ops(struct ifobject *ifobject, void *bufs)
{
+ int umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
int ctr = 0;
int ret;
- xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
- ret = xsk_configure_socket(ifobject);
+ ifobject->ns_fd = switch_namespace(ifobject->nsname);
+
+ if (test_type == TEST_TYPE_BPF_RES)
+ umem_sz *= 2;
+
+ bufs = mmap(NULL, umem_sz,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (bufs == MAP_FAILED)
+ exit_with_error(errno);
+
+ xsk_configure_umem(ifobject, bufs, 0);
+ ifobject->umem = ifobject->umem_arr[0];
+ ret = xsk_configure_socket(ifobject, 0);
/* Retry Create Socket if it fails as xsk_socket__create()
* is asynchronous
- *
- * Essential to lock Mutex here to prevent Tx thread from
- * entering before Rx and causing a deadlock
*/
- pthread_mutex_lock(mutexptr);
while (ret && ctr < SOCK_RECONF_CTR) {
- atomic_store(spinningptr, 1);
- xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
- ret = xsk_configure_socket(ifobject);
+ xsk_configure_umem(ifobject, bufs, 0);
+ ifobject->umem = ifobject->umem_arr[0];
+ ret = xsk_configure_socket(ifobject, 0);
usleep(USLEEP_MAX);
ctr++;
}
- atomic_store(spinningptr, 0);
- pthread_mutex_unlock(mutexptr);
if (ctr >= SOCK_RECONF_CTR)
exit_with_error(ret);
+
+ ifobject->umem = ifobject->umem_arr[0];
+ ifobject->xsk = ifobject->xsk_arr[0];
+
+ if (test_type == TEST_TYPE_BPF_RES) {
+ xsk_configure_umem(ifobject, (u8 *)bufs + (umem_sz / 2), 1);
+ ifobject->umem = ifobject->umem_arr[1];
+ ret = xsk_configure_socket(ifobject, 1);
+ }
+
+ ifobject->umem = ifobject->umem_arr[0];
+ ifobject->xsk = ifobject->xsk_arr[0];
+ print_verbose("Interface [%s] vector [%s]\n",
+ ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx");
+}
+
+static bool testapp_is_test_two_stepped(void)
+{
+ return (test_type != TEST_TYPE_BIDI && test_type != TEST_TYPE_BPF_RES) || second_step;
+}
+
+static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
+{
+ if (testapp_is_test_two_stepped()) {
+ xsk_socket__delete(ifobj->xsk->xsk);
+ (void)xsk_umem__delete(ifobj->umem->umem);
+ }
}
-static void *worker_testapp_validate(void *arg)
+static void *worker_testapp_validate_tx(void *arg)
{
struct udphdr *udp_hdr =
(struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
@@ -849,157 +817,97 @@ static void *worker_testapp_validate(void *arg)
struct generic_data data;
void *bufs = NULL;
- pthread_attr_setstacksize(&attr, THREAD_STACK);
-
- if (!bidi_pass) {
- bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (bufs == MAP_FAILED)
- exit_with_error(errno);
-
- if (strcmp(ifobject->nsname, ""))
- switch_namespace(ifobject->ifdict_index);
+ if (!second_step)
+ thread_common_ops(ifobject, bufs);
+
+ for (int i = 0; i < num_frames; i++) {
+ /*send EOT frame */
+ if (i == (num_frames - 1))
+ data.seqnum = -1;
+ else
+ data.seqnum = i;
+ gen_udp_hdr(&data, ifobject, udp_hdr);
+ gen_ip_hdr(ifobject, ip_hdr);
+ gen_udp_csum(udp_hdr, ip_hdr);
+ gen_eth_hdr(ifobject, eth_hdr);
+ gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
}
- if (ifobject->fv.vector == tx) {
- int spinningrxctr = 0;
+ print_verbose("Sending %d packets on interface %s\n",
+ (opt_pkt_count - 1), ifobject->ifname);
+ tx_only_all(ifobject);
- if (!bidi_pass)
- thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_tx);
-
- while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
- spinningrxctr++;
- usleep(USLEEP_MAX);
- }
+ testapp_cleanup_xsk_res(ifobject);
+ pthread_exit(NULL);
+}
- print_verbose("Interface [%s] vector [Tx]\n", ifobject->ifname);
- for (int i = 0; i < num_frames; i++) {
- /*send EOT frame */
- if (i == (num_frames - 1))
- data.seqnum = -1;
- else
- data.seqnum = i;
- gen_udp_hdr(&data, ifobject, udp_hdr);
- gen_ip_hdr(ifobject, ip_hdr);
- gen_udp_csum(udp_hdr, ip_hdr);
- gen_eth_hdr(ifobject, eth_hdr);
- gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
- }
+static void *worker_testapp_validate_rx(void *arg)
+{
+ struct ifobject *ifobject = (struct ifobject *)arg;
+ struct pollfd fds[MAX_SOCKS] = { };
+ void *bufs = NULL;
- print_verbose("Sending %d packets on interface %s\n",
- (opt_pkt_count - 1), ifobject->ifname);
- tx_only_all(ifobject);
- } else if (ifobject->fv.vector == rx) {
- struct pollfd fds[MAX_SOCKS] = { };
- int ret;
-
- if (!bidi_pass)
- thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx);
-
- print_verbose("Interface [%s] vector [Rx]\n", ifobject->ifname);
- if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
- xsk_populate_fill_ring(ifobject->umem);
-
- TAILQ_INIT(&head);
- if (debug_pkt_dump) {
- pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
- if (!pkt_buf)
- exit_with_error(errno);
- }
+ if (!second_step)
+ thread_common_ops(ifobject, bufs);
- fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
- fds[0].events = POLLIN;
+ if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
+ xsk_populate_fill_ring(ifobject->umem);
- pthread_mutex_lock(&sync_mutex);
- pthread_cond_signal(&signal_rx_condition);
- pthread_mutex_unlock(&sync_mutex);
+ TAILQ_INIT(&head);
+ if (debug_pkt_dump) {
+ pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
+ if (!pkt_buf)
+ exit_with_error(errno);
+ }
- while (1) {
- if (test_type == TEST_TYPE_POLL) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret <= 0)
- continue;
- }
+ fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
+ fds[0].events = POLLIN;
- if (test_type != TEST_TYPE_STATS) {
- rx_pkt(ifobject->xsk, fds);
- worker_pkt_validate();
- } else {
- worker_stats_validate(ifobject);
- }
+ pthread_barrier_wait(&barr);
- if (sigvar)
- break;
+ while (1) {
+ if (test_type != TEST_TYPE_STATS) {
+ rx_pkt(ifobject->xsk, fds);
+ worker_pkt_validate();
+ } else {
+ worker_stats_validate(ifobject);
}
+ if (sigvar)
+ break;
+ }
- if (test_type != TEST_TYPE_STATS)
- print_verbose("Received %d packets on interface %s\n",
- pkt_counter, ifobject->ifname);
+ print_verbose("Received %d packets on interface %s\n",
+ pkt_counter, ifobject->ifname);
- if (test_type == TEST_TYPE_TEARDOWN)
- print_verbose("Destroying socket\n");
- }
+ if (test_type == TEST_TYPE_TEARDOWN)
+ print_verbose("Destroying socket\n");
- if ((test_type != TEST_TYPE_BIDI) || bidi_pass) {
- xsk_socket__delete(ifobject->xsk->xsk);
- (void)xsk_umem__delete(ifobject->umem->umem);
- }
+ testapp_cleanup_xsk_res(ifobject);
pthread_exit(NULL);
}
static void testapp_validate(void)
{
- struct timespec max_wait = { 0, 0 };
bool bidi = test_type == TEST_TYPE_BIDI;
+ bool bpf = test_type == TEST_TYPE_BPF_RES;
- pthread_attr_init(&attr);
- pthread_attr_setstacksize(&attr, THREAD_STACK);
-
- if ((test_type == TEST_TYPE_BIDI) && bidi_pass) {
- pthread_init_mutex();
- if (!switching_notify) {
- print_verbose("Switching Tx/Rx vectors\n");
- switching_notify++;
- }
- }
-
- pthread_mutex_lock(&sync_mutex);
+ if (pthread_barrier_init(&barr, NULL, 2))
+ exit_with_error(errno);
/*Spawn RX thread */
- if (!bidi || !bidi_pass) {
- if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1]))
- exit_with_error(errno);
- } else if (bidi && bidi_pass) {
- /*switch Tx/Rx vectors */
- ifdict[0]->fv.vector = rx;
- if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[0]))
- exit_with_error(errno);
- }
-
- if (clock_gettime(CLOCK_REALTIME, &max_wait))
- exit_with_error(errno);
- max_wait.tv_sec += TMOUT_SEC;
+ pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
- if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT)
+ pthread_barrier_wait(&barr);
+ if (pthread_barrier_destroy(&barr))
exit_with_error(errno);
- pthread_mutex_unlock(&sync_mutex);
-
/*Spawn TX thread */
- if (!bidi || !bidi_pass) {
- if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0]))
- exit_with_error(errno);
- } else if (bidi && bidi_pass) {
- /*switch Tx/Rx vectors */
- ifdict[1]->fv.vector = tx;
- if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[1]))
- exit_with_error(errno);
- }
+ pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx);
pthread_join(t1, NULL);
pthread_join(t0, NULL);
- if (debug_pkt_dump) {
+ if (debug_pkt_dump && test_type != TEST_TYPE_STATS) {
worker_pkt_dump();
for (int iter = 0; iter < num_frames - 1; iter++) {
free(pkt_buf[iter]->payload);
@@ -1008,20 +916,85 @@ static void testapp_validate(void)
free(pkt_buf);
}
- if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !(test_type == TEST_TYPE_STATS))
+ if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
print_ksft_result();
}
-static void testapp_sockets(void)
+static void testapp_teardown(void)
{
- for (int i = 0; i < ((test_type == TEST_TYPE_TEARDOWN) ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER);
- i++) {
+ int i;
+
+ for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
pkt_counter = 0;
prev_pkt = -1;
sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
- test_type == TEST_TYPE_BIDI ? bidi_pass++ : bidi_pass;
+ }
+
+ print_ksft_result();
+}
+
+static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
+{
+ void *(*tmp_func_ptr)(void *) = ifobj1->func_ptr;
+ enum fvector tmp_vector = ifobj1->fv.vector;
+
+ ifobj1->func_ptr = ifobj2->func_ptr;
+ ifobj1->fv.vector = ifobj2->fv.vector;
+
+ ifobj2->func_ptr = tmp_func_ptr;
+ ifobj2->fv.vector = tmp_vector;
+
+ ifdict_tx = ifobj1;
+ ifdict_rx = ifobj2;
+}
+
+static void testapp_bidi(void)
+{
+ for (int i = 0; i < MAX_BIDI_ITER; i++) {
+ pkt_counter = 0;
+ prev_pkt = -1;
+ sigvar = 0;
+ print_verbose("Creating socket\n");
+ testapp_validate();
+ if (!second_step) {
+ print_verbose("Switching Tx/Rx vectors\n");
+ swap_vectors(ifdict[1], ifdict[0]);
+ }
+ second_step = true;
+ }
+
+ swap_vectors(ifdict[0], ifdict[1]);
+
+ print_ksft_result();
+}
+
+static void swap_xsk_res(void)
+{
+ xsk_socket__delete(ifdict_tx->xsk->xsk);
+ xsk_umem__delete(ifdict_tx->umem->umem);
+ xsk_socket__delete(ifdict_rx->xsk->xsk);
+ xsk_umem__delete(ifdict_rx->umem->umem);
+ ifdict_tx->umem = ifdict_tx->umem_arr[1];
+ ifdict_tx->xsk = ifdict_tx->xsk_arr[1];
+ ifdict_rx->umem = ifdict_rx->umem_arr[1];
+ ifdict_rx->xsk = ifdict_rx->xsk_arr[1];
+}
+
+static void testapp_bpf_res(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_BPF_ITER; i++) {
+ pkt_counter = 0;
+ prev_pkt = -1;
+ sigvar = 0;
+ print_verbose("Creating socket\n");
+ testapp_validate();
+ if (!second_step)
+ swap_xsk_res();
+ second_step = true;
}
print_ksft_result();
@@ -1053,78 +1026,33 @@ static void testapp_stats(void)
print_ksft_result();
}
-static void init_iface_config(struct ifaceconfigobj *ifaceconfig)
-{
- /*Init interface0 */
- ifdict[0]->fv.vector = tx;
- memcpy(ifdict[0]->dst_mac, ifaceconfig->dst_mac, ETH_ALEN);
- memcpy(ifdict[0]->src_mac, ifaceconfig->src_mac, ETH_ALEN);
- ifdict[0]->dst_ip = ifaceconfig->dst_ip.s_addr;
- ifdict[0]->src_ip = ifaceconfig->src_ip.s_addr;
- ifdict[0]->dst_port = ifaceconfig->dst_port;
- ifdict[0]->src_port = ifaceconfig->src_port;
-
- /*Init interface1 */
- ifdict[1]->fv.vector = rx;
- memcpy(ifdict[1]->dst_mac, ifaceconfig->src_mac, ETH_ALEN);
- memcpy(ifdict[1]->src_mac, ifaceconfig->dst_mac, ETH_ALEN);
- ifdict[1]->dst_ip = ifaceconfig->src_ip.s_addr;
- ifdict[1]->src_ip = ifaceconfig->dst_ip.s_addr;
- ifdict[1]->dst_port = ifaceconfig->src_port;
- ifdict[1]->src_port = ifaceconfig->dst_port;
-}
-
-static void *nsdisablemodethread(void *args)
+static void init_iface(struct ifobject *ifobj, const char *dst_mac,
+ const char *src_mac, const char *dst_ip,
+ const char *src_ip, const u16 dst_port,
+ const u16 src_port, enum fvector vector)
{
- struct targs *targs = args;
+ struct in_addr ip;
- targs->retptr = false;
+ memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
+ memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
- if (switch_namespace(targs->idx)) {
- targs->retptr = bpf_set_link_xdp_fd(ifdict[targs->idx]->ifindex, -1, targs->flags);
- } else {
- targs->retptr = errno;
- print_verbose("Failed to switch namespace to %s\n", ifdict[targs->idx]->nsname);
- }
-
- pthread_exit(NULL);
-}
+ inet_aton(dst_ip, &ip);
+ ifobj->dst_ip = ip.s_addr;
-static void disable_xdp_mode(int mode)
-{
- int err = 0;
- __u32 flags = XDP_FLAGS_UPDATE_IF_NOEXIST | mode;
- char *mode_str = mode & XDP_FLAGS_SKB_MODE ? "skb" : "drv";
+ inet_aton(src_ip, &ip);
+ ifobj->src_ip = ip.s_addr;
- for (int i = 0; i < MAX_INTERFACES; i++) {
- if (strcmp(ifdict[i]->nsname, "")) {
- struct targs *targs;
-
- targs = malloc(sizeof(*targs));
- memset(targs, 0, sizeof(*targs));
- if (!targs)
- exit_with_error(errno);
-
- targs->idx = i;
- targs->flags = flags;
- if (pthread_create(&ns_thread, NULL, nsdisablemodethread, targs))
- exit_with_error(errno);
-
- pthread_join(ns_thread, NULL);
- err = targs->retptr;
- free(targs);
- } else {
- err = bpf_set_link_xdp_fd(ifdict[i]->ifindex, -1, flags);
- }
-
- if (err) {
- print_verbose("Failed to disable %s mode on interface %s\n",
- mode_str, ifdict[i]->ifname);
- exit_with_error(err);
- }
+ ifobj->dst_port = dst_port;
+ ifobj->src_port = src_port;
- print_verbose("Disabled %s mode for interface: %s\n", mode_str, ifdict[i]->ifname);
- configured_mode = mode & XDP_FLAGS_SKB_MODE ? TEST_MODE_DRV : TEST_MODE_SKB;
+ if (vector == tx) {
+ ifobj->fv.vector = tx;
+ ifobj->func_ptr = worker_testapp_validate_tx;
+ ifdict_tx = ifobj;
+ } else {
+ ifobj->fv.vector = rx;
+ ifobj->func_ptr = worker_testapp_validate_rx;
+ ifdict_rx = ifobj;
}
}
@@ -1135,72 +1063,70 @@ static void run_pkt_test(int mode, int type)
/* reset defaults after potential previous test */
xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
pkt_counter = 0;
- switching_notify = 0;
- bidi_pass = 0;
+ second_step = 0;
prev_pkt = -1;
- ifdict[0]->fv.vector = tx;
- ifdict[1]->fv.vector = rx;
sigvar = 0;
stat_test_type = -1;
rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+ configured_mode = mode;
+
switch (mode) {
case (TEST_MODE_SKB):
- if (configured_mode == TEST_MODE_DRV)
- disable_xdp_mode(XDP_FLAGS_DRV_MODE);
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case (TEST_MODE_DRV):
- if (configured_mode == TEST_MODE_SKB)
- disable_xdp_mode(XDP_FLAGS_SKB_MODE);
xdp_flags |= XDP_FLAGS_DRV_MODE;
break;
default:
break;
}
- pthread_init_mutex();
-
- if (test_type == TEST_TYPE_STATS)
+ switch (test_type) {
+ case TEST_TYPE_STATS:
testapp_stats();
- else if ((test_type != TEST_TYPE_TEARDOWN) && (test_type != TEST_TYPE_BIDI))
+ break;
+ case TEST_TYPE_TEARDOWN:
+ testapp_teardown();
+ break;
+ case TEST_TYPE_BIDI:
+ testapp_bidi();
+ break;
+ case TEST_TYPE_BPF_RES:
+ testapp_bpf_res();
+ break;
+ default:
testapp_validate();
- else
- testapp_sockets();
-
- pthread_destroy_mutex();
+ break;
+ }
}
int main(int argc, char **argv)
{
struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
+ bool failure = false;
+ int i, j;
if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
exit_with_error(errno);
- const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
- const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
- const char *IP1 = "192.168.100.162";
- const char *IP2 = "192.168.100.161";
- u16 UDP_DST_PORT = 2020;
- u16 UDP_SRC_PORT = 2121;
- int i, j;
-
- ifaceconfig = malloc(sizeof(struct ifaceconfigobj));
- memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN);
- memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN);
- inet_aton(IP1, &ifaceconfig->dst_ip);
- inet_aton(IP2, &ifaceconfig->src_ip);
- ifaceconfig->dst_port = UDP_DST_PORT;
- ifaceconfig->src_port = UDP_SRC_PORT;
-
for (int i = 0; i < MAX_INTERFACES; i++) {
ifdict[i] = malloc(sizeof(struct ifobject));
if (!ifdict[i])
exit_with_error(errno);
ifdict[i]->ifdict_index = i;
+ ifdict[i]->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
+ if (!ifdict[i]->xsk_arr) {
+ failure = true;
+ goto cleanup;
+ }
+ ifdict[i]->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
+ if (!ifdict[i]->umem_arr) {
+ failure = true;
+ goto cleanup;
+ }
}
setlocale(LC_ALL, "");
@@ -1209,9 +1135,8 @@ int main(int argc, char **argv)
num_frames = ++opt_pkt_count;
- init_iface_config(ifaceconfig);
-
- disable_xdp_mode(XDP_FLAGS_DRV_MODE);
+ init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
+ init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
@@ -1220,8 +1145,17 @@ int main(int argc, char **argv)
run_pkt_test(i, j);
}
- for (int i = 0; i < MAX_INTERFACES; i++)
+cleanup:
+ for (int i = 0; i < MAX_INTERFACES; i++) {
+ if (ifdict[i]->ns_fd != -1)
+ close(ifdict[i]->ns_fd);
+ free(ifdict[i]->xsk_arr);
+ free(ifdict[i]->umem_arr);
free(ifdict[i]);
+ }
+
+ if (failure)
+ exit_with_error(errno);
ksft_exit_pass();
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 30314ef305c2..6c428b276ab6 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -23,6 +23,7 @@
#define MAX_SOCKS 1
#define MAX_TEARDOWN_ITER 10
#define MAX_BIDI_ITER 2
+#define MAX_BPF_ITER 2
#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
sizeof(struct udphdr))
#define MIN_PKT_SIZE 64
@@ -33,14 +34,11 @@
#define IP_PKT_TOS 0x9
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
-#define TMOUT_SEC (3)
#define EOT (-1)
#define USLEEP_MAX 200000
-#define THREAD_STACK 60000000
#define SOCK_RECONF_CTR 10
#define BATCH_SIZE 64
#define POLL_TMOUT 1000
-#define NEED_WAKEUP true
#define DEFAULT_PKT_CNT 10000
#define RX_FULL_RXQSIZE 32
@@ -63,6 +61,7 @@ enum TEST_TYPES {
TEST_TYPE_TEARDOWN,
TEST_TYPE_BIDI,
TEST_TYPE_STATS,
+ TEST_TYPE_BPF_RES,
TEST_TYPE_MAX
};
@@ -77,11 +76,9 @@ enum STAT_TEST_TYPES {
static int configured_mode = TEST_MODE_UNCONFIGURED;
static u8 debug_pkt_dump;
static u32 num_frames;
-static u8 switching_notify;
-static u8 bidi_pass;
+static bool second_step;
static int test_type;
-static int opt_queue;
static int opt_pkt_count;
static u8 opt_verbose;
@@ -125,48 +122,32 @@ struct generic_data {
u32 seqnum;
};
-struct ifaceconfigobj {
- u8 dst_mac[ETH_ALEN];
- u8 src_mac[ETH_ALEN];
- struct in_addr dst_ip;
- struct in_addr src_ip;
- u16 src_port;
- u16 dst_port;
-} *ifaceconfig;
-
struct ifobject {
- int ifindex;
- int ifdict_index;
char ifname[MAX_INTERFACE_NAME_CHARS];
char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
- struct flow_vector fv;
struct xsk_socket_info *xsk;
+ struct xsk_socket_info **xsk_arr;
+ struct xsk_umem_info **umem_arr;
struct xsk_umem_info *umem;
- u8 dst_mac[ETH_ALEN];
- u8 src_mac[ETH_ALEN];
+ void *(*func_ptr)(void *arg);
+ struct flow_vector fv;
+ int ns_fd;
+ int ifdict_index;
u32 dst_ip;
u32 src_ip;
u16 src_port;
u16 dst_port;
+ u8 dst_mac[ETH_ALEN];
+ u8 src_mac[ETH_ALEN];
};
static struct ifobject *ifdict[MAX_INTERFACES];
+static struct ifobject *ifdict_rx;
+static struct ifobject *ifdict_tx;
/*threads*/
-atomic_int spinning_tx;
-atomic_int spinning_rx;
-pthread_mutex_t sync_mutex;
-pthread_mutex_t sync_mutex_tx;
-pthread_cond_t signal_rx_condition;
-pthread_cond_t signal_tx_condition;
-pthread_t t0, t1, ns_thread;
-pthread_attr_t attr;
-
-struct targs {
- u8 retptr;
- int idx;
- u32 flags;
-};
+pthread_barrier_t barr;
+pthread_t t0, t1;
TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
struct head_s *head_p;