summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-06-01 15:53:08 -0700
committerDavid S. Miller <davem@davemloft.net>2020-06-01 15:53:08 -0700
commit9a25c1df24a6fea9dc79eec950453c4e00f707fd (patch)
tree1188078f9838a3b6a60a3923ed31df142ffc8ed6 /tools
parentefd7ed0f5f2d07ccbb1853c5d46656cdfa1371fb (diff)
parentcf51abcded837ef209faa03a62b2ea44e45995e8 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-06-01 The following pull-request contains BPF updates for your *net-next* tree. We've added 55 non-merge commits during the last 1 day(s) which contain a total of 91 files changed, 4986 insertions(+), 463 deletions(-). The main changes are: 1) Add rx_queue_mapping to bpf_sock from Amritha. 2) Add BPF ring buffer, from Andrii. 3) Attach and run programs through devmap, from David. 4) Allow SO_BINDTODEVICE opt in bpf_setsockopt, from Ferenc. 5) link based flow_dissector, from Jakub. 6) Use tracing helpers for lsm programs, from Jiri. 7) Several sk_msg fixes and extensions, from John. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'tools')
-rw-r--r--tools/bpf/bpftool/btf.c10
-rw-r--r--tools/bpf/bpftool/cgroup.c14
-rw-r--r--tools/bpf/bpftool/feature.c91
-rw-r--r--tools/bpf/bpftool/gen.c6
-rw-r--r--tools/bpf/bpftool/iter.c8
-rw-r--r--tools/bpf/bpftool/link.c55
-rw-r--r--tools/bpf/bpftool/map.c41
-rw-r--r--tools/bpf/bpftool/net.c12
-rw-r--r--tools/bpf/bpftool/perf.c2
-rw-r--r--tools/bpf/bpftool/prog.c27
-rw-r--r--tools/bpf/bpftool/struct_ops.c15
-rw-r--r--tools/include/uapi/linux/bpf.h95
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile6
-rw-r--r--tools/lib/bpf/libbpf.c49
-rw-r--r--tools/lib/bpf/libbpf.h24
-rw-r--r--tools/lib/bpf/libbpf.map7
-rw-r--r--tools/lib/bpf/libbpf_probes.c5
-rw-r--r--tools/lib/bpf/ringbuf.c288
-rw-r--r--tools/testing/selftests/bpf/Makefile5
-rw-r--r--tools/testing/selftests/bpf/bench.c16
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c566
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh75
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c166
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c588
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c211
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c102
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_helpers.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c97
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c20
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c33
-rw-r--r--tools/testing/selftests/bpf/progs/perfbuf_bench.c33
-rw-r--r--tools/testing/selftests/bpf/progs/ringbuf_bench.c60
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c78
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_multi.c77
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_helpers.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_kern.h46
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c44
-rw-r--r--tools/testing/selftests/bpf/test_maps.c52
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c163
-rw-r--r--tools/testing/selftests/bpf/verifier/and.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/array_access.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_value_access.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_access_var_len.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_value_access.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c8
51 files changed, 3127 insertions, 260 deletions
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 41a1346934a1..faac8189b285 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -553,7 +553,7 @@ static int do_dump(int argc, char **argv)
btf = btf__parse_elf(*argv, NULL);
if (IS_ERR(btf)) {
- err = PTR_ERR(btf);
+ err = -PTR_ERR(btf);
btf = NULL;
p_err("failed to load BTF from %s: %s",
*argv, strerror(err));
@@ -951,9 +951,9 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s btf { show | list } [id BTF_ID]\n"
- " %s btf dump BTF_SRC [format FORMAT]\n"
- " %s btf help\n"
+ "Usage: %1$s %2$s { show | list } [id BTF_ID]\n"
+ " %1$s %2$s dump BTF_SRC [format FORMAT]\n"
+ " %1$s %2$s help\n"
"\n"
" BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
" FORMAT := { raw | c }\n"
@@ -961,7 +961,7 @@ static int do_help(int argc, char **argv)
" " HELP_SPEC_PROGRAM "\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, bin_name, bin_name);
+ bin_name, "btf");
return 0;
}
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 27931db421d8..d901cc1b904a 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -491,20 +491,18 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } CGROUP [**effective**]\n"
- " %s %s tree [CGROUP_ROOT] [**effective**]\n"
- " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
- " %s %s detach CGROUP ATTACH_TYPE PROG\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s { show | list } CGROUP [**effective**]\n"
+ " %1$s %2$s tree [CGROUP_ROOT] [**effective**]\n"
+ " %1$s %2$s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
+ " %1$s %2$s detach CGROUP ATTACH_TYPE PROG\n"
+ " %1$s %2$s help\n"
"\n"
HELP_SPEC_ATTACH_TYPES "\n"
" " HELP_SPEC_ATTACH_FLAGS "\n"
" " HELP_SPEC_PROGRAM "\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2]);
+ bin_name, argv[-2]);
return 0;
}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 1b73e63274b5..768bf77df886 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -758,11 +758,29 @@ static void section_misc(const char *define_prefix, __u32 ifindex)
print_end_section();
}
+#ifdef USE_LIBCAP
+#define capability(c) { c, false, #c }
+#define capability_msg(a, i) a[i].set ? "" : a[i].name, a[i].set ? "" : ", "
+#endif
+
static int handle_perms(void)
{
#ifdef USE_LIBCAP
- cap_value_t cap_list[1] = { CAP_SYS_ADMIN };
- bool has_sys_admin_cap = false;
+ struct {
+ cap_value_t cap;
+ bool set;
+ char name[14]; /* strlen("CAP_SYS_ADMIN") */
+ } bpf_caps[] = {
+ capability(CAP_SYS_ADMIN),
+#ifdef CAP_BPF
+ capability(CAP_BPF),
+ capability(CAP_NET_ADMIN),
+ capability(CAP_PERFMON),
+#endif
+ };
+ cap_value_t cap_list[ARRAY_SIZE(bpf_caps)];
+ unsigned int i, nb_bpf_caps = 0;
+ bool cap_sys_admin_only = true;
cap_flag_value_t val;
int res = -1;
cap_t caps;
@@ -774,35 +792,64 @@ static int handle_perms(void)
return -1;
}
- if (cap_get_flag(caps, CAP_SYS_ADMIN, CAP_EFFECTIVE, &val)) {
- p_err("bug: failed to retrieve CAP_SYS_ADMIN status");
- goto exit_free;
- }
- if (val == CAP_SET)
- has_sys_admin_cap = true;
+#ifdef CAP_BPF
+ if (CAP_IS_SUPPORTED(CAP_BPF))
+ cap_sys_admin_only = false;
+#endif
- if (!run_as_unprivileged && !has_sys_admin_cap) {
- p_err("full feature probing requires CAP_SYS_ADMIN, run as root or use 'unprivileged'");
- goto exit_free;
+ for (i = 0; i < ARRAY_SIZE(bpf_caps); i++) {
+ const char *cap_name = bpf_caps[i].name;
+ cap_value_t cap = bpf_caps[i].cap;
+
+ if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &val)) {
+ p_err("bug: failed to retrieve %s status: %s", cap_name,
+ strerror(errno));
+ goto exit_free;
+ }
+
+ if (val == CAP_SET) {
+ bpf_caps[i].set = true;
+ cap_list[nb_bpf_caps++] = cap;
+ }
+
+ if (cap_sys_admin_only)
+ /* System does not know about CAP_BPF, meaning that
+ * CAP_SYS_ADMIN is the only capability required. We
+ * just checked it, break.
+ */
+ break;
}
- if ((run_as_unprivileged && !has_sys_admin_cap) ||
- (!run_as_unprivileged && has_sys_admin_cap)) {
+ if ((run_as_unprivileged && !nb_bpf_caps) ||
+ (!run_as_unprivileged && nb_bpf_caps == ARRAY_SIZE(bpf_caps)) ||
+ (!run_as_unprivileged && cap_sys_admin_only && nb_bpf_caps)) {
/* We are all good, exit now */
res = 0;
goto exit_free;
}
- /* if (run_as_unprivileged && has_sys_admin_cap), drop CAP_SYS_ADMIN */
+ if (!run_as_unprivileged) {
+ if (cap_sys_admin_only)
+ p_err("missing %s, required for full feature probing; run as root or use 'unprivileged'",
+ bpf_caps[0].name);
+ else
+ p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'",
+ capability_msg(bpf_caps, 0),
+ capability_msg(bpf_caps, 1),
+ capability_msg(bpf_caps, 2),
+ capability_msg(bpf_caps, 3));
+ goto exit_free;
+ }
- if (cap_set_flag(caps, CAP_EFFECTIVE, ARRAY_SIZE(cap_list), cap_list,
+ /* if (run_as_unprivileged && nb_bpf_caps > 0), drop capabilities. */
+ if (cap_set_flag(caps, CAP_EFFECTIVE, nb_bpf_caps, cap_list,
CAP_CLEAR)) {
- p_err("bug: failed to clear CAP_SYS_ADMIN from capabilities");
+ p_err("bug: failed to clear capabilities: %s", strerror(errno));
goto exit_free;
}
if (cap_set_proc(caps)) {
- p_err("failed to drop CAP_SYS_ADMIN: %s", strerror(errno));
+ p_err("failed to drop capabilities: %s", strerror(errno));
goto exit_free;
}
@@ -817,7 +864,7 @@ exit_free:
return res;
#else
- /* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN).
+ /* Detection assumes user has specific privileges.
* We do not use libpcap so let's approximate, and restrict usage to
* root user only.
*/
@@ -901,7 +948,7 @@ static int do_probe(int argc, char **argv)
}
}
- /* Full feature detection requires CAP_SYS_ADMIN privilege.
+ /* Full feature detection requires specific privileges.
* Let's approximate, and warn if user is not root.
*/
if (handle_perms())
@@ -937,12 +984,12 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s probe [COMPONENT] [full] [unprivileged] [macros [prefix PREFIX]]\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s probe [COMPONENT] [full] [unprivileged] [macros [prefix PREFIX]]\n"
+ " %1$s %2$s help\n"
"\n"
" COMPONENT := { kernel | dev NAME }\n"
"",
- bin_name, argv[-2], bin_name, argv[-2]);
+ bin_name, argv[-2]);
return 0;
}
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 0e5f0236cc76..a3c4bb86c05a 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -586,12 +586,12 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %1$s gen skeleton FILE\n"
- " %1$s gen help\n"
+ "Usage: %1$s %2$s skeleton FILE\n"
+ " %1$s %2$s help\n"
"\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name);
+ bin_name, "gen");
return 0;
}
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index eb5987a0c3b6..33240fcc6319 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -68,10 +68,10 @@ close_obj:
static int do_help(int argc, char **argv)
{
fprintf(stderr,
- "Usage: %s %s pin OBJ PATH\n"
- " %s %s help\n"
- "\n",
- bin_name, argv[-2], bin_name, argv[-2]);
+ "Usage: %1$s %2$s pin OBJ PATH\n"
+ " %1$s %2$s help\n"
+ "",
+ bin_name, "iter");
return 0;
}
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index b6a0b35c78ae..fca57ee8fafe 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -17,6 +17,7 @@ static const char * const link_type_name[] = {
[BPF_LINK_TYPE_TRACING] = "tracing",
[BPF_LINK_TYPE_CGROUP] = "cgroup",
[BPF_LINK_TYPE_ITER] = "iter",
+ [BPF_LINK_TYPE_NETNS] = "netns",
};
static int link_parse_fd(int *argc, char ***argv)
@@ -62,6 +63,15 @@ show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr)
jsonw_uint_field(json_wtr, "prog_id", info->prog_id);
}
+static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr)
+{
+ if (attach_type < ARRAY_SIZE(attach_type_name))
+ jsonw_string_field(wtr, "attach_type",
+ attach_type_name[attach_type]);
+ else
+ jsonw_uint_field(wtr, "attach_type", attach_type);
+}
+
static int get_prog_info(int prog_id, struct bpf_prog_info *info)
{
__u32 len = sizeof(*info);
@@ -105,22 +115,18 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
jsonw_uint_field(json_wtr, "prog_type",
prog_info.type);
- if (info->tracing.attach_type < ARRAY_SIZE(attach_type_name))
- jsonw_string_field(json_wtr, "attach_type",
- attach_type_name[info->tracing.attach_type]);
- else
- jsonw_uint_field(json_wtr, "attach_type",
- info->tracing.attach_type);
+ show_link_attach_type_json(info->tracing.attach_type,
+ json_wtr);
break;
case BPF_LINK_TYPE_CGROUP:
jsonw_lluint_field(json_wtr, "cgroup_id",
info->cgroup.cgroup_id);
- if (info->cgroup.attach_type < ARRAY_SIZE(attach_type_name))
- jsonw_string_field(json_wtr, "attach_type",
- attach_type_name[info->cgroup.attach_type]);
- else
- jsonw_uint_field(json_wtr, "attach_type",
- info->cgroup.attach_type);
+ show_link_attach_type_json(info->cgroup.attach_type, json_wtr);
+ break;
+ case BPF_LINK_TYPE_NETNS:
+ jsonw_uint_field(json_wtr, "netns_ino",
+ info->netns.netns_ino);
+ show_link_attach_type_json(info->netns.attach_type, json_wtr);
break;
default:
break;
@@ -153,6 +159,14 @@ static void show_link_header_plain(struct bpf_link_info *info)
printf("prog %u ", info->prog_id);
}
+static void show_link_attach_type_plain(__u32 attach_type)
+{
+ if (attach_type < ARRAY_SIZE(attach_type_name))
+ printf("attach_type %s ", attach_type_name[attach_type]);
+ else
+ printf("attach_type %u ", attach_type);
+}
+
static int show_link_close_plain(int fd, struct bpf_link_info *info)
{
struct bpf_prog_info prog_info;
@@ -176,19 +190,15 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
else
printf("\n\tprog_type %u ", prog_info.type);
- if (info->tracing.attach_type < ARRAY_SIZE(attach_type_name))
- printf("attach_type %s ",
- attach_type_name[info->tracing.attach_type]);
- else
- printf("attach_type %u ", info->tracing.attach_type);
+ show_link_attach_type_plain(info->tracing.attach_type);
break;
case BPF_LINK_TYPE_CGROUP:
printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id);
- if (info->cgroup.attach_type < ARRAY_SIZE(attach_type_name))
- printf("attach_type %s ",
- attach_type_name[info->cgroup.attach_type]);
- else
- printf("attach_type %u ", info->cgroup.attach_type);
+ show_link_attach_type_plain(info->cgroup.attach_type);
+ break;
+ case BPF_LINK_TYPE_NETNS:
+ printf("\n\tnetns_ino %u ", info->netns.netns_ino);
+ show_link_attach_type_plain(info->netns.attach_type);
break;
default:
break;
@@ -312,7 +322,6 @@ static int do_help(int argc, char **argv)
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_LINK "\n"
- " " HELP_SPEC_PROGRAM "\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 85cbe9a19170..c5fac8068ba1 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1561,24 +1561,24 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } [MAP]\n"
- " %s %s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
- " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
- " [dev NAME]\n"
- " %s %s dump MAP\n"
- " %s %s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n"
- " %s %s lookup MAP [key DATA]\n"
- " %s %s getnext MAP [key DATA]\n"
- " %s %s delete MAP key DATA\n"
- " %s %s pin MAP FILE\n"
- " %s %s event_pipe MAP [cpu N index M]\n"
- " %s %s peek MAP\n"
- " %s %s push MAP value VALUE\n"
- " %s %s pop MAP\n"
- " %s %s enqueue MAP value VALUE\n"
- " %s %s dequeue MAP\n"
- " %s %s freeze MAP\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s { show | list } [MAP]\n"
+ " %1$s %2$s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
+ " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
+ " [dev NAME]\n"
+ " %1$s %2$s dump MAP\n"
+ " %1$s %2$s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n"
+ " %1$s %2$s lookup MAP [key DATA]\n"
+ " %1$s %2$s getnext MAP [key DATA]\n"
+ " %1$s %2$s delete MAP key DATA\n"
+ " %1$s %2$s pin MAP FILE\n"
+ " %1$s %2$s event_pipe MAP [cpu N index M]\n"
+ " %1$s %2$s peek MAP\n"
+ " %1$s %2$s push MAP value VALUE\n"
+ " %1$s %2$s pop MAP\n"
+ " %1$s %2$s enqueue MAP value VALUE\n"
+ " %1$s %2$s dequeue MAP\n"
+ " %1$s %2$s freeze MAP\n"
+ " %1$s %2$s help\n"
"\n"
" " HELP_SPEC_MAP "\n"
" DATA := { [hex] BYTES }\n"
@@ -1593,11 +1593,6 @@ static int do_help(int argc, char **argv)
" queue | stack | sk_storage | struct_ops }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
bin_name, argv[-2]);
return 0;
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index c5e3895b7c8b..56c3a2bae3ef 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -458,10 +458,10 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } [dev <devname>]\n"
- " %s %s attach ATTACH_TYPE PROG dev <devname> [ overwrite ]\n"
- " %s %s detach ATTACH_TYPE dev <devname>\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s { show | list } [dev <devname>]\n"
+ " %1$s %2$s attach ATTACH_TYPE PROG dev <devname> [ overwrite ]\n"
+ " %1$s %2$s detach ATTACH_TYPE dev <devname>\n"
+ " %1$s %2$s help\n"
"\n"
" " HELP_SPEC_PROGRAM "\n"
" ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
@@ -470,8 +470,8 @@ static int do_help(int argc, char **argv)
" For progs attached to cgroups, use \"bpftool cgroup\"\n"
" to dump program attachments. For program types\n"
" sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n"
- " consult iproute2.\n",
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+ " consult iproute2.\n"
+ "",
bin_name, argv[-2]);
return 0;
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
index 3341aa14acda..ad23934819c7 100644
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
@@ -231,7 +231,7 @@ static int do_show(int argc, char **argv)
static int do_help(int argc, char **argv)
{
fprintf(stderr,
- "Usage: %s %s { show | list | help }\n"
+ "Usage: %1$s %2$s { show | list | help }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 245f941fdbcf..a5eff83496f2 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1984,24 +1984,24 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } [PROG]\n"
- " %s %s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n"
- " %s %s dump jited PROG [{ file FILE | opcodes | linum }]\n"
- " %s %s pin PROG FILE\n"
- " %s %s { load | loadall } OBJ PATH \\\n"
+ "Usage: %1$s %2$s { show | list } [PROG]\n"
+ " %1$s %2$s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n"
+ " %1$s %2$s dump jited PROG [{ file FILE | opcodes | linum }]\n"
+ " %1$s %2$s pin PROG FILE\n"
+ " %1$s %2$s { load | loadall } OBJ PATH \\\n"
" [type TYPE] [dev NAME] \\\n"
" [map { idx IDX | name NAME } MAP]\\\n"
" [pinmaps MAP_DIR]\n"
- " %s %s attach PROG ATTACH_TYPE [MAP]\n"
- " %s %s detach PROG ATTACH_TYPE [MAP]\n"
- " %s %s run PROG \\\n"
+ " %1$s %2$s attach PROG ATTACH_TYPE [MAP]\n"
+ " %1$s %2$s detach PROG ATTACH_TYPE [MAP]\n"
+ " %1$s %2$s run PROG \\\n"
" data_in FILE \\\n"
" [data_out FILE [data_size_out L]] \\\n"
" [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n"
" [repeat N]\n"
- " %s %s profile PROG [duration DURATION] METRICs\n"
- " %s %s tracelog\n"
- " %s %s help\n"
+ " %1$s %2$s profile PROG [duration DURATION] METRICs\n"
+ " %1$s %2$s tracelog\n"
+ " %1$s %2$s help\n"
"\n"
" " HELP_SPEC_MAP "\n"
" " HELP_SPEC_PROGRAM "\n"
@@ -2022,10 +2022,7 @@ static int do_help(int argc, char **argv)
" METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2]);
+ bin_name, argv[-2]);
return 0;
}
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index e17738479edc..b58b91f62ffb 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -566,16 +566,15 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } [STRUCT_OPS_MAP]\n"
- " %s %s dump [STRUCT_OPS_MAP]\n"
- " %s %s register OBJ\n"
- " %s %s unregister STRUCT_OPS_MAP\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s { show | list } [STRUCT_OPS_MAP]\n"
+ " %1$s %2$s dump [STRUCT_OPS_MAP]\n"
+ " %1$s %2$s register OBJ\n"
+ " %1$s %2$s unregister STRUCT_OPS_MAP\n"
+ " %1$s %2$s help\n"
"\n"
" OPTIONS := { {-j|--json} [{-p|--pretty}] }\n"
- " STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n",
- bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2],
+ " STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n"
+ "",
bin_name, argv[-2]);
return 0;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 97e1fd19ff58..b9ed9f14f2a2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -147,6 +147,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_SK_STORAGE,
BPF_MAP_TYPE_DEVMAP_HASH,
BPF_MAP_TYPE_STRUCT_OPS,
+ BPF_MAP_TYPE_RINGBUF,
};
/* Note that tracing related programs such as
@@ -224,6 +225,7 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_GETPEERNAME,
BPF_CGROUP_INET4_GETSOCKNAME,
BPF_CGROUP_INET6_GETSOCKNAME,
+ BPF_XDP_DEVMAP,
__MAX_BPF_ATTACH_TYPE
};
@@ -235,6 +237,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_TRACING = 2,
BPF_LINK_TYPE_CGROUP = 3,
BPF_LINK_TYPE_ITER = 4,
+ BPF_LINK_TYPE_NETNS = 5,
MAX_BPF_LINK_TYPE,
};
@@ -3157,6 +3160,59 @@ union bpf_attr {
* **bpf_sk_cgroup_id**\ ().
* Return
* The id is returned or 0 in case the id could not be retrieved.
+ *
+ * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
+ * Description
+ * Copy *size* bytes from *data* into a ring buffer *ringbuf*.
+ * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ * new data availability is sent.
+ * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ * new data availability is sent unconditionally.
+ * Return
+ * 0, on success;
+ * < 0, on error.
+ *
+ * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
+ * Description
+ * Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ * Return
+ * Valid pointer with *size* bytes of memory available; NULL,
+ * otherwise.
+ *
+ * void bpf_ringbuf_submit(void *data, u64 flags)
+ * Description
+ * Submit reserved ring buffer sample, pointed to by *data*.
+ * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ * new data availability is sent.
+ * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ * new data availability is sent unconditionally.
+ * Return
+ * Nothing. Always succeeds.
+ *
+ * void bpf_ringbuf_discard(void *data, u64 flags)
+ * Description
+ * Discard reserved ring buffer sample, pointed to by *data*.
+ * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ * new data availability is sent.
+ * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ * new data availability is sent unconditionally.
+ * Return
+ * Nothing. Always succeeds.
+ *
+ * u64 bpf_ringbuf_query(void *ringbuf, u64 flags)
+ * Description
+ * Query various characteristics of provided ring buffer. What
+ * exactly is queries is determined by *flags*:
+ * - BPF_RB_AVAIL_DATA - amount of data not yet consumed;
+ * - BPF_RB_RING_SIZE - the size of ring buffer;
+ * - BPF_RB_CONS_POS - consumer position (can wrap around);
+ * - BPF_RB_PROD_POS - producer(s) position (can wrap around);
+ * Data returned is just a momentary snapshots of actual values
+ * and could be inaccurate, so this facility should be used to
+ * power heuristics and for reporting, not to make 100% correct
+ * calculation.
+ * Return
+ * Requested value, or 0, if flags are not recognized.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3288,7 +3344,12 @@ union bpf_attr {
FN(seq_printf), \
FN(seq_write), \
FN(sk_cgroup_id), \
- FN(sk_ancestor_cgroup_id),
+ FN(sk_ancestor_cgroup_id), \
+ FN(ringbuf_output), \
+ FN(ringbuf_reserve), \
+ FN(ringbuf_submit), \
+ FN(ringbuf_discard), \
+ FN(ringbuf_query),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -3398,6 +3459,29 @@ enum {
BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0),
};
+/* BPF_FUNC_bpf_ringbuf_commit, BPF_FUNC_bpf_ringbuf_discard, and
+ * BPF_FUNC_bpf_ringbuf_output flags.
+ */
+enum {
+ BPF_RB_NO_WAKEUP = (1ULL << 0),
+ BPF_RB_FORCE_WAKEUP = (1ULL << 1),
+};
+
+/* BPF_FUNC_bpf_ringbuf_query flags */
+enum {
+ BPF_RB_AVAIL_DATA = 0,
+ BPF_RB_RING_SIZE = 1,
+ BPF_RB_CONS_POS = 2,
+ BPF_RB_PROD_POS = 3,
+};
+
+/* BPF ring buffer constants */
+enum {
+ BPF_RINGBUF_BUSY_BIT = (1U << 31),
+ BPF_RINGBUF_DISCARD_BIT = (1U << 30),
+ BPF_RINGBUF_HDR_SZ = 8,
+};
+
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
@@ -3530,6 +3614,7 @@ struct bpf_sock {
__u32 dst_ip4;
__u32 dst_ip6[4];
__u32 state;
+ __s32 rx_queue_mapping;
};
struct bpf_tcp_sock {
@@ -3623,6 +3708,8 @@ struct xdp_md {
/* Below access go through struct xdp_rxq_info */
__u32 ingress_ifindex; /* rxq->dev->ifindex */
__u32 rx_queue_index; /* rxq->queue_index */
+
+ __u32 egress_ifindex; /* txq->dev->ifindex */
};
enum sk_action {
@@ -3645,6 +3732,8 @@ struct sk_msg_md {
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
__u32 size; /* Total size of sk_msg */
+
+ __bpf_md_ptr(struct bpf_sock *, sk); /* current socket */
};
struct sk_reuseport_md {
@@ -3751,6 +3840,10 @@ struct bpf_link_info {
__u64 cgroup_id;
__u32 attach_type;
} cgroup;
+ struct {
+ __u32 netns_ino;
+ __u32 attach_type;
+ } netns;
};
} __attribute__((aligned(8)));
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index e3962cfbc9a6..190366d05588 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o
+ btf_dump.o ringbuf.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index aee7f1a83c77..bf8ed134cb8a 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -151,7 +151,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
sort -u | wc -l)
-VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
+VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
@@ -218,7 +218,7 @@ check_abi: $(OUTPUT)libbpf.so
sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
- readelf -s --wide $(OUTPUT)libbpf.so | \
+ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \
diff -u $(OUTPUT)libbpf_global_syms.tmp \
@@ -264,7 +264,7 @@ install_pkgconfig: $(PC_FILE)
$(call QUIET_INSTALL, $(PC_FILE)) \
$(call do_install,$(PC_FILE),$(libdir_SQ)/pkgconfig,644)
-install: install_lib install_pkgconfig
+install: install_lib install_pkgconfig install_headers
### Cleaning rules
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fa04cbe547ed..7f01be2b88b8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6657,6 +6657,8 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_ITER,
.is_attach_btf = true,
.attach_fn = attach_iter),
+ BPF_EAPROG_SEC("xdp_devmap", BPF_PROG_TYPE_XDP,
+ BPF_XDP_DEVMAP),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
@@ -7894,8 +7896,9 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
return bpf_program__attach_iter(prog, NULL);
}
-struct bpf_link *
-bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+static struct bpf_link *
+bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
+ const char *target_name)
{
enum bpf_attach_type attach_type;
char errmsg[STRERR_BUFSIZE];
@@ -7915,12 +7918,12 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
link->detach = &bpf_link__detach_fd;
attach_type = bpf_program__get_expected_attach_type(prog);
- link_fd = bpf_link_create(prog_fd, cgroup_fd, attach_type, NULL);
+ link_fd = bpf_link_create(prog_fd, target_fd, attach_type, NULL);
if (link_fd < 0) {
link_fd = -errno;
free(link);
- pr_warn("program '%s': failed to attach to cgroup: %s\n",
- bpf_program__title(prog, false),
+ pr_warn("program '%s': failed to attach to %s: %s\n",
+ bpf_program__title(prog, false), target_name,
libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
return ERR_PTR(link_fd);
}
@@ -7929,6 +7932,18 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
}
struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+{
+ return bpf_program__attach_fd(prog, cgroup_fd, "cgroup");
+}
+
+struct bpf_link *
+bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
+{
+ return bpf_program__attach_fd(prog, netns_fd, "netns");
+}
+
+struct bpf_link *
bpf_program__attach_iter(struct bpf_program *prog,
const struct bpf_iter_attach_opts *opts)
{
@@ -8137,9 +8152,12 @@ void perf_buffer__free(struct perf_buffer *pb)
if (!pb)
return;
if (pb->cpu_bufs) {
- for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) {
+ for (i = 0; i < pb->cpu_cnt; i++) {
struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+ if (!cpu_buf)
+ continue;
+
bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
perf_buffer__free_cpu_buf(pb, cpu_buf);
}
@@ -8456,6 +8474,25 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
return cnt < 0 ? -errno : cnt;
}
+int perf_buffer__consume(struct perf_buffer *pb)
+{
+ int i, err;
+
+ for (i = 0; i < pb->cpu_cnt; i++) {
+ struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+
+ if (!cpu_buf)
+ continue;
+
+ err = perf_buffer__process_records(pb, cpu_buf);
+ if (err) {
+ pr_warn("error while processing records: %d\n", err);
+ return err;
+ }
+ }
+ return 0;
+}
+
struct bpf_prog_info_array_desc {
int array_offset; /* e.g. offset of jited_prog_insns */
int count_offset; /* e.g. offset of jited_prog_len */
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 8ea69558f0a8..334437af3014 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -253,6 +253,8 @@ LIBBPF_API struct bpf_link *
bpf_program__attach_lsm(struct bpf_program *prog);
LIBBPF_API struct bpf_link *
bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
struct bpf_map;
@@ -478,6 +480,27 @@ LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags);
+/* Ring buffer APIs */
+struct ring_buffer;
+
+typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
+
+struct ring_buffer_opts {
+ size_t sz; /* size of this struct, for forward/backward compatiblity */
+};
+
+#define ring_buffer_opts__last_field sz
+
+LIBBPF_API struct ring_buffer *
+ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
+ const struct ring_buffer_opts *opts);
+LIBBPF_API void ring_buffer__free(struct ring_buffer *rb);
+LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+ ring_buffer_sample_fn sample_cb, void *ctx);
+LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
+LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
+
+/* Perf buffer APIs */
struct perf_buffer;
typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu,
@@ -533,6 +556,7 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
+LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
typedef enum bpf_perf_event_ret
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 0133d469d30b..f732c77b7ed0 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -262,4 +262,11 @@ LIBBPF_0.0.9 {
bpf_link_get_fd_by_id;
bpf_link_get_next_id;
bpf_program__attach_iter;
+ bpf_program__attach_netns;
+ perf_buffer__consume;
+ ring_buffer__add;
+ ring_buffer__consume;
+ ring_buffer__free;
+ ring_buffer__new;
+ ring_buffer__poll;
} LIBBPF_0.0.8;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 2c92059c0c90..10cd8d1891f5 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -238,6 +238,11 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
if (btf_fd < 0)
return false;
break;
+ case BPF_MAP_TYPE_RINGBUF:
+ key_size = 0;
+ value_size = 0;
+ max_entries = 4096;
+ break;
case BPF_MAP_TYPE_UNSPEC:
case BPF_MAP_TYPE_HASH:
case BPF_MAP_TYPE_ARRAY:
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
new file mode 100644
index 000000000000..4fc6c6cbb4eb
--- /dev/null
+++ b/tools/lib/bpf/ringbuf.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * Ring buffer operations.
+ *
+ * Copyright (C) 2020 Facebook, Inc.
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <linux/err.h>
+#include <linux/bpf.h>
+#include <asm/barrier.h>
+#include <sys/mman.h>
+#include <sys/epoll.h>
+#include <tools/libc_compat.h>
+
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "bpf.h"
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+struct ring {
+ ring_buffer_sample_fn sample_cb;
+ void *ctx;
+ void *data;
+ unsigned long *consumer_pos;
+ unsigned long *producer_pos;
+ unsigned long mask;
+ int map_fd;
+};
+
+struct ring_buffer {
+ struct epoll_event *events;
+ struct ring *rings;
+ size_t page_size;
+ int epoll_fd;
+ int ring_cnt;
+};
+
+static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
+{
+ if (r->consumer_pos) {
+ munmap(r->consumer_pos, rb->page_size);
+ r->consumer_pos = NULL;
+ }
+ if (r->producer_pos) {
+ munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1));
+ r->producer_pos = NULL;
+ }
+}
+
+/* Add extra RINGBUF maps to this ring buffer manager */
+int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+ ring_buffer_sample_fn sample_cb, void *ctx)
+{
+ struct bpf_map_info info;
+ __u32 len = sizeof(info);
+ struct epoll_event *e;
+ struct ring *r;
+ void *tmp;
+ int err;
+
+ memset(&info, 0, sizeof(info));
+
+ err = bpf_obj_get_info_by_fd(map_fd, &info, &len);
+ if (err) {
+ err = -errno;
+ pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+
+ if (info.type != BPF_MAP_TYPE_RINGBUF) {
+ pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n",
+ map_fd);
+ return -EINVAL;
+ }
+
+ tmp = reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
+ if (!tmp)
+ return -ENOMEM;
+ rb->rings = tmp;
+
+ tmp = reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
+ if (!tmp)
+ return -ENOMEM;
+ rb->events = tmp;
+
+ r = &rb->rings[rb->ring_cnt];
+ memset(r, 0, sizeof(*r));
+
+ r->map_fd = map_fd;
+ r->sample_cb = sample_cb;
+ r->ctx = ctx;
+ r->mask = info.max_entries - 1;
+
+ /* Map writable consumer page */
+ tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ map_fd, 0);
+ if (tmp == MAP_FAILED) {
+ err = -errno;
+ pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+ r->consumer_pos = tmp;
+
+ /* Map read-only producer page and data pages. We map twice as big
+ * data size to allow simple reading of samples that wrap around the
+ * end of a ring buffer. See kernel implementation for details.
+ * */
+ tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ,
+ MAP_SHARED, map_fd, rb->page_size);
+ if (tmp == MAP_FAILED) {
+ err = -errno;
+ ringbuf_unmap_ring(rb, r);
+ pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+ r->producer_pos = tmp;
+ r->data = tmp + rb->page_size;
+
+ e = &rb->events[rb->ring_cnt];
+ memset(e, 0, sizeof(*e));
+
+ e->events = EPOLLIN;
+ e->data.fd = rb->ring_cnt;
+ if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) {
+ err = -errno;
+ ringbuf_unmap_ring(rb, r);
+ pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+
+ rb->ring_cnt++;
+ return 0;
+}
+
+void ring_buffer__free(struct ring_buffer *rb)
+{
+ int i;
+
+ if (!rb)
+ return;
+
+ for (i = 0; i < rb->ring_cnt; ++i)
+ ringbuf_unmap_ring(rb, &rb->rings[i]);
+ if (rb->epoll_fd >= 0)
+ close(rb->epoll_fd);
+
+ free(rb->events);
+ free(rb->rings);
+ free(rb);
+}
+
+struct ring_buffer *
+ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
+ const struct ring_buffer_opts *opts)
+{
+ struct ring_buffer *rb;
+ int err;
+
+ if (!OPTS_VALID(opts, ring_buffer_opts))
+ return NULL;
+
+ rb = calloc(1, sizeof(*rb));
+ if (!rb)
+ return NULL;
+
+ rb->page_size = getpagesize();
+
+ rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (rb->epoll_fd < 0) {
+ err = -errno;
+ pr_warn("ringbuf: failed to create epoll instance: %d\n", err);
+ goto err_out;
+ }
+
+ err = ring_buffer__add(rb, map_fd, sample_cb, ctx);
+ if (err)
+ goto err_out;
+
+ return rb;
+
+err_out:
+ ring_buffer__free(rb);
+ return NULL;
+}
+
+static inline int roundup_len(__u32 len)
+{
+ /* clear out top 2 bits (discard and busy, if set) */
+ len <<= 2;
+ len >>= 2;
+ /* add length prefix */
+ len += BPF_RINGBUF_HDR_SZ;
+ /* round up to 8 byte alignment */
+ return (len + 7) / 8 * 8;
+}
+
+static int ringbuf_process_ring(struct ring* r)
+{
+ int *len_ptr, len, err, cnt = 0;
+ unsigned long cons_pos, prod_pos;
+ bool got_new_data;
+ void *sample;
+
+ cons_pos = smp_load_acquire(r->consumer_pos);
+ do {
+ got_new_data = false;
+ prod_pos = smp_load_acquire(r->producer_pos);
+ while (cons_pos < prod_pos) {
+ len_ptr = r->data + (cons_pos & r->mask);
+ len = smp_load_acquire(len_ptr);
+
+ /* sample not committed yet, bail out for now */
+ if (len & BPF_RINGBUF_BUSY_BIT)
+ goto done;
+
+ got_new_data = true;
+ cons_pos += roundup_len(len);
+
+ if ((len & BPF_RINGBUF_DISCARD_BIT) == 0) {
+ sample = (void *)len_ptr + BPF_RINGBUF_HDR_SZ;
+ err = r->sample_cb(r->ctx, sample, len);
+ if (err) {
+ /* update consumer pos and bail out */
+ smp_store_release(r->consumer_pos,
+ cons_pos);
+ return err;
+ }
+ cnt++;
+ }
+
+ smp_store_release(r->consumer_pos, cons_pos);
+ }
+ } while (got_new_data);
+done:
+ return cnt;
+}
+
+/* Consume available ring buffer(s) data without event polling.
+ * Returns number of records consumed across all registered ring buffers, or
+ * negative number if any of the callbacks return error.
+ */
+int ring_buffer__consume(struct ring_buffer *rb)
+{
+ int i, err, res = 0;
+
+ for (i = 0; i < rb->ring_cnt; i++) {
+ struct ring *ring = &rb->rings[i];
+
+ err = ringbuf_process_ring(ring);
+ if (err < 0)
+ return err;
+ res += err;
+ }
+ return res;
+}
+
+/* Poll for available data and consume records, if any are available.
+ * Returns number of records consumed, or negative number, if any of the
+ * registered callbacks returned error.
+ */
+int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
+{
+ int i, cnt, err, res = 0;
+
+ cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
+ for (i = 0; i < cnt; i++) {
+ __u32 ring_id = rb->events[i].data.fd;
+ struct ring *ring = &rb->rings[ring_id];
+
+ err = ringbuf_process_ring(ring);
+ if (err < 0)
+ return err;
+ res += cnt;
+ }
+ return cnt < 0 ? -errno : res;
+}
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e716e931d0c9..3ce548eff8a8 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -413,12 +413,15 @@ $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
$(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
$(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
+$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
+ $(OUTPUT)/perfbuf_bench.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
$(OUTPUT)/bench_count.o \
$(OUTPUT)/bench_rename.o \
- $(OUTPUT)/bench_trigger.o
+ $(OUTPUT)/bench_trigger.o \
+ $(OUTPUT)/bench_ringbufs.o
$(call msg,BINARY,,$@)
$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 14390689ef90..944ad4721c83 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -130,6 +130,13 @@ static const struct argp_option opts[] = {
{},
};
+extern struct argp bench_ringbufs_argp;
+
+static const struct argp_child bench_parsers[] = {
+ { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
+ {},
+};
+
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
static int pos_args;
@@ -208,6 +215,7 @@ static void parse_cmdline_args(int argc, char **argv)
.options = opts,
.parser = parse_arg,
.doc = argp_program_doc,
+ .children = bench_parsers,
};
if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
exit(1);
@@ -310,6 +318,10 @@ extern const struct bench bench_trig_rawtp;
extern const struct bench bench_trig_kprobe;
extern const struct bench bench_trig_fentry;
extern const struct bench bench_trig_fmodret;
+extern const struct bench bench_rb_libbpf;
+extern const struct bench bench_rb_custom;
+extern const struct bench bench_pb_libbpf;
+extern const struct bench bench_pb_custom;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -327,6 +339,10 @@ static const struct bench *benchs[] = {
&bench_trig_kprobe,
&bench_trig_fentry,
&bench_trig_fmodret,
+ &bench_rb_libbpf,
+ &bench_rb_custom,
+ &bench_pb_libbpf,
+ &bench_pb_custom,
};
static void setup_benchmark()
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
new file mode 100644
index 000000000000..da87c7f31891
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <asm/barrier.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <argp.h>
+#include <stdlib.h>
+#include "bench.h"
+#include "ringbuf_bench.skel.h"
+#include "perfbuf_bench.skel.h"
+
+static struct {
+ bool back2back;
+ int batch_cnt;
+ bool sampled;
+ int sample_rate;
+ int ringbuf_sz; /* per-ringbuf, in bytes */
+ bool ringbuf_use_output; /* use slower output API */
+ int perfbuf_sz; /* per-CPU size, in pages */
+} args = {
+ .back2back = false,
+ .batch_cnt = 500,
+ .sampled = false,
+ .sample_rate = 500,
+ .ringbuf_sz = 512 * 1024,
+ .ringbuf_use_output = false,
+ .perfbuf_sz = 128,
+};
+
+enum {
+ ARG_RB_BACK2BACK = 2000,
+ ARG_RB_USE_OUTPUT = 2001,
+ ARG_RB_BATCH_CNT = 2002,
+ ARG_RB_SAMPLED = 2003,
+ ARG_RB_SAMPLE_RATE = 2004,
+};
+
+static const struct argp_option opts[] = {
+ { "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
+ { "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
+ { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
+ { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
+ { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_RB_BACK2BACK:
+ args.back2back = true;
+ break;
+ case ARG_RB_USE_OUTPUT:
+ args.ringbuf_use_output = true;
+ break;
+ case ARG_RB_BATCH_CNT:
+ args.batch_cnt = strtol(arg, NULL, 10);
+ if (args.batch_cnt < 0) {
+ fprintf(stderr, "Invalid batch count.");
+ argp_usage(state);
+ }
+ break;
+ case ARG_RB_SAMPLED:
+ args.sampled = true;
+ break;
+ case ARG_RB_SAMPLE_RATE:
+ args.sample_rate = strtol(arg, NULL, 10);
+ if (args.sample_rate < 0) {
+ fprintf(stderr, "Invalid perfbuf sample rate.");
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_ringbufs_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* RINGBUF-LIBBPF benchmark */
+
+static struct counter buf_hits;
+
+static inline void bufs_trigger_batch()
+{
+ (void)syscall(__NR_getpgid);
+}
+
+static void bufs_validate()
+{
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+
+ if (args.back2back && env.producer_cnt > 1) {
+ fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
+ exit(1);
+ }
+}
+
+static void *bufs_sample_producer(void *input)
+{
+ if (args.back2back) {
+ /* initial batch to get everything started */
+ bufs_trigger_batch();
+ return NULL;
+ }
+
+ while (true)
+ bufs_trigger_batch();
+ return NULL;
+}
+
+static struct ringbuf_libbpf_ctx {
+ struct ringbuf_bench *skel;
+ struct ring_buffer *ringbuf;
+} ringbuf_libbpf_ctx;
+
+static void ringbuf_libbpf_measure(struct bench_res *res)
+{
+ struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+ res->hits = atomic_swap(&buf_hits.value, 0);
+ res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct ringbuf_bench *ringbuf_setup_skeleton()
+{
+ struct ringbuf_bench *skel;
+
+ setup_libbpf();
+
+ skel = ringbuf_bench__open();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ skel->rodata->batch_cnt = args.batch_cnt;
+ skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+
+ if (args.sampled)
+ /* record data + header take 16 bytes */
+ skel->rodata->wakeup_data_size = args.sample_rate * 16;
+
+ bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
+
+ if (ringbuf_bench__load(skel)) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
+ return skel;
+}
+
+static int buf_process_sample(void *ctx, void *data, size_t len)
+{
+ atomic_inc(&buf_hits.value);
+ return 0;
+}
+
+static void ringbuf_libbpf_setup()
+{
+ struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+ struct bpf_link *link;
+
+ ctx->skel = ringbuf_setup_skeleton();
+ ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
+ buf_process_sample, NULL, NULL);
+ if (!ctx->ringbuf) {
+ fprintf(stderr, "failed to create ringbuf\n");
+ exit(1);
+ }
+
+ link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void *ringbuf_libbpf_consumer(void *input)
+{
+ struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+ while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
+ if (args.back2back)
+ bufs_trigger_batch();
+ }
+ fprintf(stderr, "ringbuf polling failed!\n");
+ return NULL;
+}
+
+/* RINGBUF-CUSTOM benchmark */
+struct ringbuf_custom {
+ __u64 *consumer_pos;
+ __u64 *producer_pos;
+ __u64 mask;
+ void *data;
+ int map_fd;
+};
+
+static struct ringbuf_custom_ctx {
+ struct ringbuf_bench *skel;
+ struct ringbuf_custom ringbuf;
+ int epoll_fd;
+ struct epoll_event event;
+} ringbuf_custom_ctx;
+
+static void ringbuf_custom_measure(struct bench_res *res)
+{
+ struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+
+ res->hits = atomic_swap(&buf_hits.value, 0);
+ res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static void ringbuf_custom_setup()
+{
+ struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+ const size_t page_size = getpagesize();
+ struct bpf_link *link;
+ struct ringbuf_custom *r;
+ void *tmp;
+ int err;
+
+ ctx->skel = ringbuf_setup_skeleton();
+
+ ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (ctx->epoll_fd < 0) {
+ fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
+ exit(1);
+ }
+
+ r = &ctx->ringbuf;
+ r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+ r->mask = args.ringbuf_sz - 1;
+
+ /* Map writable consumer page */
+ tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ r->map_fd, 0);
+ if (tmp == MAP_FAILED) {
+ fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
+ exit(1);
+ }
+ r->consumer_pos = tmp;
+
+ /* Map read-only producer page and data pages. */
+ tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
+ r->map_fd, page_size);
+ if (tmp == MAP_FAILED) {
+ fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
+ exit(1);
+ }
+ r->producer_pos = tmp;
+ r->data = tmp + page_size;
+
+ ctx->event.events = EPOLLIN;
+ err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
+ if (err < 0) {
+ fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
+ exit(1);
+ }
+
+ link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program\n");
+ exit(1);
+ }
+}
+
+#define RINGBUF_BUSY_BIT (1 << 31)
+#define RINGBUF_DISCARD_BIT (1 << 30)
+#define RINGBUF_META_LEN 8
+
+static inline int roundup_len(__u32 len)
+{
+ /* clear out top 2 bits */
+ len <<= 2;
+ len >>= 2;
+ /* add length prefix */
+ len += RINGBUF_META_LEN;
+ /* round up to 8 byte alignment */
+ return (len + 7) / 8 * 8;
+}
+
+static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
+{
+ unsigned long cons_pos, prod_pos;
+ int *len_ptr, len;
+ bool got_new_data;
+
+ cons_pos = smp_load_acquire(r->consumer_pos);
+ while (true) {
+ got_new_data = false;
+ prod_pos = smp_load_acquire(r->producer_pos);
+ while (cons_pos < prod_pos) {
+ len_ptr = r->data + (cons_pos & r->mask);
+ len = smp_load_acquire(len_ptr);
+
+ /* sample not committed yet, bail out for now */
+ if (len & RINGBUF_BUSY_BIT)
+ return;
+
+ got_new_data = true;
+ cons_pos += roundup_len(len);
+
+ atomic_inc(&buf_hits.value);
+ }
+ if (got_new_data)
+ smp_store_release(r->consumer_pos, cons_pos);
+ else
+ break;
+ };
+}
+
+static void *ringbuf_custom_consumer(void *input)
+{
+ struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+ int cnt;
+
+ do {
+ if (args.back2back)
+ bufs_trigger_batch();
+ cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
+ if (cnt > 0)
+ ringbuf_custom_process_ring(&ctx->ringbuf);
+ } while (cnt >= 0);
+ fprintf(stderr, "ringbuf polling failed!\n");
+ return 0;
+}
+
+/* PERFBUF-LIBBPF benchmark */
+static struct perfbuf_libbpf_ctx {
+ struct perfbuf_bench *skel;
+ struct perf_buffer *perfbuf;
+} perfbuf_libbpf_ctx;
+
+static void perfbuf_measure(struct bench_res *res)
+{
+ struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+ res->hits = atomic_swap(&buf_hits.value, 0);
+ res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct perfbuf_bench *perfbuf_setup_skeleton()
+{
+ struct perfbuf_bench *skel;
+
+ setup_libbpf();
+
+ skel = perfbuf_bench__open();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ skel->rodata->batch_cnt = args.batch_cnt;
+
+ if (perfbuf_bench__load(skel)) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
+ return skel;
+}
+
+static enum bpf_perf_event_ret
+perfbuf_process_sample_raw(void *input_ctx, int cpu,
+ struct perf_event_header *e)
+{
+ switch (e->type) {
+ case PERF_RECORD_SAMPLE:
+ atomic_inc(&buf_hits.value);
+ break;
+ case PERF_RECORD_LOST:
+ break;
+ default:
+ return LIBBPF_PERF_EVENT_ERROR;
+ }
+ return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void perfbuf_libbpf_setup()
+{
+ struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+ struct perf_event_attr attr;
+ struct perf_buffer_raw_opts pb_opts = {
+ .event_cb = perfbuf_process_sample_raw,
+ .ctx = (void *)(long)0,
+ .attr = &attr,
+ };
+ struct bpf_link *link;
+
+ ctx->skel = perfbuf_setup_skeleton();
+
+ memset(&attr, 0, sizeof(attr));
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ /* notify only every Nth sample */
+ if (args.sampled) {
+ attr.sample_period = args.sample_rate;
+ attr.wakeup_events = args.sample_rate;
+ } else {
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ }
+
+ if (args.sample_rate > args.batch_cnt) {
+ fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
+ args.sample_rate, args.batch_cnt);
+ exit(1);
+ }
+
+ ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
+ args.perfbuf_sz, &pb_opts);
+ if (!ctx->perfbuf) {
+ fprintf(stderr, "failed to create perfbuf\n");
+ exit(1);
+ }
+
+ link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program\n");
+ exit(1);
+ }
+}
+
+static void *perfbuf_libbpf_consumer(void *input)
+{
+ struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+ while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
+ if (args.back2back)
+ bufs_trigger_batch();
+ }
+ fprintf(stderr, "perfbuf polling failed!\n");
+ return NULL;
+}
+
+/* PERFBUF-CUSTOM benchmark */
+
+/* copies of internal libbpf definitions */
+struct perf_cpu_buf {
+ struct perf_buffer *pb;
+ void *base; /* mmap()'ed memory */
+ void *buf; /* for reconstructing segmented data */
+ size_t buf_size;
+ int fd;
+ int cpu;
+ int map_key;
+};
+
+struct perf_buffer {
+ perf_buffer_event_fn event_cb;
+ perf_buffer_sample_fn sample_cb;
+ perf_buffer_lost_fn lost_cb;
+ void *ctx; /* passed into callbacks */
+
+ size_t page_size;
+ size_t mmap_size;
+ struct perf_cpu_buf **cpu_bufs;
+ struct epoll_event *events;
+ int cpu_cnt; /* number of allocated CPU buffers */
+ int epoll_fd; /* perf event FD */
+ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
+};
+
+static void *perfbuf_custom_consumer(void *input)
+{
+ struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+ struct perf_buffer *pb = ctx->perfbuf;
+ struct perf_cpu_buf *cpu_buf;
+ struct perf_event_mmap_page *header;
+ size_t mmap_mask = pb->mmap_size - 1;
+ struct perf_event_header *ehdr;
+ __u64 data_head, data_tail;
+ size_t ehdr_size;
+ void *base;
+ int i, cnt;
+
+ while (true) {
+ if (args.back2back)
+ bufs_trigger_batch();
+ cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
+ if (cnt <= 0) {
+ fprintf(stderr, "perf epoll failed: %d\n", -errno);
+ exit(1);
+ }
+
+ for (i = 0; i < cnt; ++i) {
+ cpu_buf = pb->events[i].data.ptr;
+ header = cpu_buf->base;
+ base = ((void *)header) + pb->page_size;
+
+ data_head = ring_buffer_read_head(header);
+ data_tail = header->data_tail;
+ while (data_head != data_tail) {
+ ehdr = base + (data_tail & mmap_mask);
+ ehdr_size = ehdr->size;
+
+ if (ehdr->type == PERF_RECORD_SAMPLE)
+ atomic_inc(&buf_hits.value);
+
+ data_tail += ehdr_size;
+ }
+ ring_buffer_write_tail(header, data_tail);
+ }
+ }
+ return NULL;
+}
+
+const struct bench bench_rb_libbpf = {
+ .name = "rb-libbpf",
+ .validate = bufs_validate,
+ .setup = ringbuf_libbpf_setup,
+ .producer_thread = bufs_sample_producer,
+ .consumer_thread = ringbuf_libbpf_consumer,
+ .measure = ringbuf_libbpf_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rb_custom = {
+ .name = "rb-custom",
+ .validate = bufs_validate,
+ .setup = ringbuf_custom_setup,
+ .producer_thread = bufs_sample_producer,
+ .consumer_thread = ringbuf_custom_consumer,
+ .measure = ringbuf_custom_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_libbpf = {
+ .name = "pb-libbpf",
+ .validate = bufs_validate,
+ .setup = perfbuf_libbpf_setup,
+ .producer_thread = bufs_sample_producer,
+ .consumer_thread = perfbuf_libbpf_consumer,
+ .measure = perfbuf_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_custom = {
+ .name = "pb-custom",
+ .validate = bufs_validate,
+ .setup = perfbuf_libbpf_setup,
+ .producer_thread = bufs_sample_producer,
+ .consumer_thread = perfbuf_custom_consumer,
+ .measure = perfbuf_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
new file mode 100755
index 000000000000..af4aa04caba6
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+RUN_BENCH="sudo ./bench -w3 -d10 -a"
+
+function hits()
+{
+ echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function drops()
+{
+ echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function header()
+{
+ local len=${#1}
+
+ printf "\n%s\n" "$1"
+ for i in $(seq 1 $len); do printf '='; done
+ printf '\n'
+}
+
+function summarize()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
+}
+
+header "Single-producer, parallel producer"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+ summarize $b "$($RUN_BENCH $b)"
+done
+
+header "Single-producer, parallel producer, sampled notification"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+ summarize $b "$($RUN_BENCH --rb-sampled $b)"
+done
+
+header "Single-producer, back-to-back mode"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+ summarize $b "$($RUN_BENCH --rb-b2b $b)"
+ summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)"
+done
+
+header "Ringbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+ summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
+done
+header "Perfbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+ summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
+done
+
+header "Ringbuf back-to-back, reserve+commit vs output"
+summarize "reserve" "$($RUN_BENCH --rb-b2b rb-custom)"
+summarize "output" "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)"
+
+header "Ringbuf sampled, reserve+commit vs output"
+summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled rb-custom)"
+summarize "output-sampled" "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)"
+
+header "Single-producer, consumer/producer competing on the same CPU, low batch count"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+ summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
+done
+
+header "Ringbuf, multi-producer contention"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+ summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
+done
+
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 2301c4d3ecec..ea14e3ece812 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -6,6 +6,8 @@
#include <linux/if_tun.h>
#include <sys/uio.h>
+#include "bpf_flow.skel.h"
+
#ifndef IP_MF
#define IP_MF 0x2000
#endif
@@ -101,6 +103,7 @@ struct test {
#define VLAN_HLEN 4
+static __u32 duration;
struct test tests[] = {
{
.name = "ipv4",
@@ -444,17 +447,130 @@ static int ifup(const char *ifname)
return 0;
}
+static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
+{
+ int i, err, map_fd, prog_fd;
+ struct bpf_program *prog;
+ char prog_name[32];
+
+ map_fd = bpf_map__fd(prog_array);
+ if (map_fd < 0)
+ return -1;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "flow_dissector/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (!prog)
+ return -1;
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0)
+ return -1;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (err)
+ return -1;
+ }
+ return 0;
+}
+
+static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
+{
+ int i, err, keys_fd;
+
+ keys_fd = bpf_map__fd(keys);
+ if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ /* Keep in sync with 'flags' from eth_get_headlen. */
+ __u32 eth_get_headlen_flags =
+ BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
+ struct bpf_prog_test_run_attr tattr = {};
+ struct bpf_flow_keys flow_keys = {};
+ __u32 key = (__u32)(tests[i].keys.sport) << 16 |
+ tests[i].keys.dport;
+
+ /* For skb-less case we can't pass input flags; run
+ * only the tests that have a matching set of flags.
+ */
+
+ if (tests[i].flags != eth_get_headlen_flags)
+ continue;
+
+ err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
+ CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
+
+ err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
+ CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
+
+ CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
+ CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+
+ err = bpf_map_delete_elem(keys_fd, &key);
+ CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
+ }
+}
+
+static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd)
+{
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(skel->progs._dissect);
+ if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+ return;
+
+ err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno))
+ return;
+
+ run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+
+ err = bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
+ CHECK(err, "bpf_prog_detach", "err %d errno %d\n", err, errno);
+}
+
+static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
+{
+ struct bpf_link *link;
+ int err, net_fd;
+
+ net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno))
+ return;
+
+ link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
+ if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
+ goto out_close;
+
+ run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+
+ err = bpf_link__destroy(link);
+ CHECK(err, "bpf_link__destroy", "err %d\n", err);
+out_close:
+ close(net_fd);
+}
+
void test_flow_dissector(void)
{
int i, err, prog_fd, keys_fd = -1, tap_fd;
- struct bpf_object *obj;
- __u32 duration = 0;
+ struct bpf_flow *skel;
- err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
- "jmp_table", "last_dissection", &prog_fd, &keys_fd);
- if (CHECK_FAIL(err))
+ skel = bpf_flow__open_and_load();
+ if (CHECK(!skel, "skel", "failed to open/load skeleton\n"))
return;
+ prog_fd = bpf_program__fd(skel->progs._dissect);
+ if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+ goto out_destroy_skel;
+ keys_fd = bpf_map__fd(skel->maps.last_dissection);
+ if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+ goto out_destroy_skel;
+ err = init_prog_array(skel->obj, skel->maps.jmp_table);
+ if (CHECK(err, "init_prog_array", "err %d\n", err))
+ goto out_destroy_skel;
+
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_flow_keys flow_keys;
struct bpf_prog_test_run_attr tattr = {
@@ -487,43 +603,17 @@ void test_flow_dissector(void)
* via BPF map in this case.
*/
- err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
- CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno);
-
tap_fd = create_tap("tap0");
CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno);
err = ifup("tap0");
CHECK(err, "ifup", "err %d errno %d\n", err, errno);
- for (i = 0; i < ARRAY_SIZE(tests); i++) {
- /* Keep in sync with 'flags' from eth_get_headlen. */
- __u32 eth_get_headlen_flags =
- BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
- struct bpf_prog_test_run_attr tattr = {};
- struct bpf_flow_keys flow_keys = {};
- __u32 key = (__u32)(tests[i].keys.sport) << 16 |
- tests[i].keys.dport;
-
- /* For skb-less case we can't pass input flags; run
- * only the tests that have a matching set of flags.
- */
-
- if (tests[i].flags != eth_get_headlen_flags)
- continue;
-
- err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
- CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
-
- err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
- CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
-
- CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
- CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
-
- err = bpf_map_delete_elem(keys_fd, &key);
- CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
- }
+ /* Test direct prog attachment */
+ test_skb_less_prog_attach(skel, tap_fd);
+ /* Test indirect prog attachment via link */
+ test_skb_less_link_create(skel, tap_fd);
- bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
- bpf_object__close(obj);
+ close(tap_fd);
+out_destroy_skel:
+ bpf_flow__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 1f51ba66b98b..15cb554a66d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -11,6 +11,7 @@
#include <fcntl.h>
#include <sched.h>
#include <stdbool.h>
+#include <sys/stat.h>
#include <unistd.h>
#include <linux/bpf.h>
@@ -18,21 +19,30 @@
#include "test_progs.h"
-static bool is_attached(int netns)
+static int init_net = -1;
+
+static __u32 query_attached_prog_id(int netns)
{
- __u32 cnt;
+ __u32 prog_ids[1] = {};
+ __u32 prog_cnt = ARRAY_SIZE(prog_ids);
int err;
- err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL, NULL, &cnt);
+ err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL,
+ prog_ids, &prog_cnt);
if (CHECK_FAIL(err)) {
perror("bpf_prog_query");
- return true; /* fail-safe */
+ return 0;
}
- return cnt > 0;
+ return prog_cnt == 1 ? prog_ids[0] : 0;
+}
+
+static bool prog_is_attached(int netns)
+{
+ return query_attached_prog_id(netns) > 0;
}
-static int load_prog(void)
+static int load_prog(enum bpf_prog_type type)
{
struct bpf_insn prog[] = {
BPF_MOV64_IMM(BPF_REG_0, BPF_OK),
@@ -40,61 +50,566 @@ static int load_prog(void)
};
int fd;
- fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
- ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ fd = bpf_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
if (CHECK_FAIL(fd < 0))
perror("bpf_load_program");
return fd;
}
-static void do_flow_dissector_reattach(void)
+static __u32 query_prog_id(int prog)
{
- int prog_fd[2] = { -1, -1 };
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
int err;
- prog_fd[0] = load_prog();
- if (prog_fd[0] < 0)
- return;
+ err = bpf_obj_get_info_by_fd(prog, &info, &info_len);
+ if (CHECK_FAIL(err || info_len != sizeof(info))) {
+ perror("bpf_obj_get_info_by_fd");
+ return 0;
+ }
- prog_fd[1] = load_prog();
- if (prog_fd[1] < 0)
- goto out_close;
+ return info.id;
+}
+
+static int unshare_net(int old_net)
+{
+ int err, new_net;
- err = bpf_prog_attach(prog_fd[0], 0, BPF_FLOW_DISSECTOR, 0);
+ err = unshare(CLONE_NEWNET);
if (CHECK_FAIL(err)) {
- perror("bpf_prog_attach-0");
- goto out_close;
+ perror("unshare(CLONE_NEWNET)");
+ return -1;
+ }
+ new_net = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK_FAIL(new_net < 0)) {
+ perror("open(/proc/self/ns/net)");
+ setns(old_net, CLONE_NEWNET);
+ return -1;
}
+ return new_net;
+}
+
+static void test_prog_attach_prog_attach(int netns, int prog1, int prog2)
+{
+ int err;
+
+ err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
/* Expect success when attaching a different program */
- err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+ err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0);
if (CHECK_FAIL(err)) {
- perror("bpf_prog_attach-1");
+ perror("bpf_prog_attach(prog2) #1");
goto out_detach;
}
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
/* Expect failure when attaching the same program twice */
- err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+ err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0);
if (CHECK_FAIL(!err || errno != EINVAL))
- perror("bpf_prog_attach-2");
+ perror("bpf_prog_attach(prog2) #2");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
out_detach:
err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
if (CHECK_FAIL(err))
perror("bpf_prog_detach");
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_link_create(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int link1, link2;
+
+ link1 = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure creating link when another link exists */
+ errno = 0;
+ link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+ perror("bpf_prog_attach(prog2) expected E2BIG");
+ if (link2 != -1)
+ close(link2);
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link1);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_prog_attach_link_create(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int err, link;
+
+ err = bpf_prog_attach(prog1, -1, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure creating link when prog attached */
+ errno = 0;
+ link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link != -1 || errno != EEXIST))
+ perror("bpf_link_create(prog2) expected EEXIST");
+ if (link != -1)
+ close(link);
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR);
+ if (CHECK_FAIL(err))
+ perror("bpf_prog_detach");
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_prog_attach(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure attaching prog when link exists */
+ errno = 0;
+ err = bpf_prog_attach(prog2, -1, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(!err || errno != EEXIST))
+ perror("bpf_prog_attach(prog2) expected EEXIST");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_prog_detach(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure detaching prog when link exists */
+ errno = 0;
+ err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR);
+ if (CHECK_FAIL(!err || errno != EINVAL))
+ perror("bpf_prog_detach expected EINVAL");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_prog_attach_detach_query(int netns, int prog1, int prog2)
+{
+ int err;
+
+ err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_detach");
+ return;
+ }
+
+ /* Expect no prog attached after successful detach */
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_close_query(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link);
+ /* Expect no prog attached after closing last link FD */
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_no_old_prog(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect success replacing the prog when old prog not specified */
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(err))
+ perror("bpf_link_update");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
+
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_replace_old_prog(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link;
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect success F_REPLACE and old prog specified to succeed */
+ update_opts.flags = BPF_F_REPLACE;
+ update_opts.old_prog_fd = prog1;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(err))
+ perror("bpf_link_update");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
+
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_invalid_opts(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect update to fail w/ old prog FD but w/o F_REPLACE*/
+ errno = 0;
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = prog1;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != EINVAL)) {
+ perror("bpf_link_update expected EINVAL");
+ goto out_close;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect update to fail on old prog FD mismatch */
+ errno = 0;
+ update_opts.flags = BPF_F_REPLACE;
+ update_opts.old_prog_fd = prog2;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != EPERM)) {
+ perror("bpf_link_update expected EPERM");
+ goto out_close;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect update to fail for invalid old prog FD */
+ errno = 0;
+ update_opts.flags = BPF_F_REPLACE;
+ update_opts.old_prog_fd = -1;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != EBADF)) {
+ perror("bpf_link_update expected EBADF");
+ goto out_close;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect update to fail with invalid flags */
+ errno = 0;
+ update_opts.flags = BPF_F_ALLOW_MULTI;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != EINVAL))
+ perror("bpf_link_update expected EINVAL");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+out_close:
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_invalid_prog(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link, prog3;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure when new prog FD is not valid */
+ errno = 0;
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, -1, &update_opts);
+ if (CHECK_FAIL(!err || errno != EBADF)) {
+ perror("bpf_link_update expected EINVAL");
+ goto out_close_link;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ prog3 = load_prog(BPF_PROG_TYPE_SOCKET_FILTER);
+ if (prog3 < 0)
+ goto out_close_link;
+
+ /* Expect failure when new prog FD type doesn't match */
+ errno = 0;
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog3, &update_opts);
+ if (CHECK_FAIL(!err || errno != EINVAL))
+ perror("bpf_link_update expected EINVAL");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(prog3);
+out_close_link:
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_netns_gone(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link, old_net;
+
+ old_net = netns;
+ netns = unshare_net(old_net);
+ if (netns < 0)
+ return;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(netns);
+ err = setns(old_net, CLONE_NEWNET);
+ if (CHECK_FAIL(err)) {
+ perror("setns(CLONE_NEWNET)");
+ close(link);
+ return;
+ }
+
+ /* Expect failure when netns destroyed */
+ errno = 0;
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != ENOLINK))
+ perror("bpf_link_update");
+
+ close(link);
+}
+
+static void test_link_get_info(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ struct bpf_link_info info = {};
+ struct stat netns_stat = {};
+ __u32 info_len, link_id;
+ int err, link, old_net;
+
+ old_net = netns;
+ netns = unshare_net(old_net);
+ if (netns < 0)
+ return;
+
+ err = fstat(netns, &netns_stat);
+ if (CHECK_FAIL(err)) {
+ perror("stat(netns)");
+ goto out_resetns;
+ }
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ goto out_resetns;
+ }
+
+ info_len = sizeof(info);
+ err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_obj_get_info");
+ goto out_unlink;
+ }
+ CHECK_FAIL(info_len != sizeof(info));
+
+ /* Expect link info to be sane and match prog and netns details */
+ CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+ CHECK_FAIL(info.id == 0);
+ CHECK_FAIL(info.prog_id != query_prog_id(prog1));
+ CHECK_FAIL(info.netns.netns_ino != netns_stat.st_ino);
+ CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_link_update(prog2)");
+ goto out_unlink;
+ }
+
+ link_id = info.id;
+ info_len = sizeof(info);
+ err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_obj_get_info");
+ goto out_unlink;
+ }
+ CHECK_FAIL(info_len != sizeof(info));
+
+ /* Expect no info change after update except in prog id */
+ CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+ CHECK_FAIL(info.id != link_id);
+ CHECK_FAIL(info.prog_id != query_prog_id(prog2));
+ CHECK_FAIL(info.netns.netns_ino != netns_stat.st_ino);
+ CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+ /* Leave netns link is attached to and close last FD to it */
+ err = setns(old_net, CLONE_NEWNET);
+ if (CHECK_FAIL(err)) {
+ perror("setns(NEWNET)");
+ goto out_unlink;
+ }
+ close(netns);
+ old_net = -1;
+ netns = -1;
+
+ info_len = sizeof(info);
+ err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_obj_get_info");
+ goto out_unlink;
+ }
+ CHECK_FAIL(info_len != sizeof(info));
+
+ /* Expect netns_ino to change to 0 */
+ CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+ CHECK_FAIL(info.id != link_id);
+ CHECK_FAIL(info.prog_id != query_prog_id(prog2));
+ CHECK_FAIL(info.netns.netns_ino != 0);
+ CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+out_unlink:
+ close(link);
+out_resetns:
+ if (old_net != -1)
+ setns(old_net, CLONE_NEWNET);
+ if (netns != -1)
+ close(netns);
+}
+
+static void run_tests(int netns)
+{
+ struct test {
+ const char *test_name;
+ void (*test_func)(int netns, int prog1, int prog2);
+ } tests[] = {
+ { "prog attach, prog attach",
+ test_prog_attach_prog_attach },
+ { "link create, link create",
+ test_link_create_link_create },
+ { "prog attach, link create",
+ test_prog_attach_link_create },
+ { "link create, prog attach",
+ test_link_create_prog_attach },
+ { "link create, prog detach",
+ test_link_create_prog_detach },
+ { "prog attach, detach, query",
+ test_prog_attach_detach_query },
+ { "link create, close, query",
+ test_link_create_close_query },
+ { "link update no old prog",
+ test_link_update_no_old_prog },
+ { "link update with replace old prog",
+ test_link_update_replace_old_prog },
+ { "link update invalid opts",
+ test_link_update_invalid_opts },
+ { "link update invalid prog",
+ test_link_update_invalid_prog },
+ { "link update netns gone",
+ test_link_update_netns_gone },
+ { "link get info",
+ test_link_get_info },
+ };
+ int i, progs[2] = { -1, -1 };
+ char test_name[80];
+
+ for (i = 0; i < ARRAY_SIZE(progs); i++) {
+ progs[i] = load_prog(BPF_PROG_TYPE_FLOW_DISSECTOR);
+ if (progs[i] < 0)
+ goto out_close;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ snprintf(test_name, sizeof(test_name),
+ "flow dissector %s%s",
+ tests[i].test_name,
+ netns == init_net ? " (init_net)" : "");
+ if (test__start_subtest(test_name))
+ tests[i].test_func(netns, progs[0], progs[1]);
+ }
out_close:
- close(prog_fd[1]);
- close(prog_fd[0]);
+ for (i = 0; i < ARRAY_SIZE(progs); i++) {
+ if (progs[i] != -1)
+ CHECK_FAIL(close(progs[i]));
+ }
}
void test_flow_dissector_reattach(void)
{
- int init_net, self_net, err;
+ int err, new_net, saved_net;
- self_net = open("/proc/self/ns/net", O_RDONLY);
- if (CHECK_FAIL(self_net < 0)) {
+ saved_net = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK_FAIL(saved_net < 0)) {
perror("open(/proc/self/ns/net");
return;
}
@@ -111,30 +626,29 @@ void test_flow_dissector_reattach(void)
goto out_close;
}
- if (is_attached(init_net)) {
+ if (prog_is_attached(init_net)) {
test__skip();
printf("Can't test with flow dissector attached to init_net\n");
goto out_setns;
}
/* First run tests in root network namespace */
- do_flow_dissector_reattach();
+ run_tests(init_net);
/* Then repeat tests in a non-root namespace */
- err = unshare(CLONE_NEWNET);
- if (CHECK_FAIL(err)) {
- perror("unshare(CLONE_NEWNET)");
+ new_net = unshare_net(init_net);
+ if (new_net < 0)
goto out_setns;
- }
- do_flow_dissector_reattach();
+ run_tests(new_net);
+ close(new_net);
out_setns:
/* Move back to netns we started in. */
- err = setns(self_net, CLONE_NEWNET);
+ err = setns(saved_net, CLONE_NEWNET);
if (CHECK_FAIL(err))
perror("setns(/proc/self/ns/net)");
out_close:
close(init_net);
- close(self_net);
+ close(saved_net);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
new file mode 100644
index 000000000000..bb8541f240e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <sys/epoll.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/sysinfo.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include "test_ringbuf.skel.h"
+
+#define EDONE 7777
+
+static int duration = 0;
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+static int sample_cnt;
+
+static int process_sample(void *ctx, void *data, size_t len)
+{
+ struct sample *s = data;
+
+ sample_cnt++;
+
+ switch (s->seq) {
+ case 0:
+ CHECK(s->value != 333, "sample1_value", "exp %ld, got %ld\n",
+ 333L, s->value);
+ return 0;
+ case 1:
+ CHECK(s->value != 777, "sample2_value", "exp %ld, got %ld\n",
+ 777L, s->value);
+ return -EDONE;
+ default:
+ /* we don't care about the rest */
+ return 0;
+ }
+}
+
+static struct test_ringbuf *skel;
+static struct ring_buffer *ringbuf;
+
+static void trigger_samples()
+{
+ skel->bss->dropped = 0;
+ skel->bss->total = 0;
+ skel->bss->discarded = 0;
+
+ /* trigger exactly two samples */
+ skel->bss->value = 333;
+ syscall(__NR_getpgid);
+ skel->bss->value = 777;
+ syscall(__NR_getpgid);
+}
+
+static void *poll_thread(void *input)
+{
+ long timeout = (long)input;
+
+ return (void *)(long)ring_buffer__poll(ringbuf, timeout);
+}
+
+void test_ringbuf(void)
+{
+ const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
+ pthread_t thread;
+ long bg_ret = -1;
+ int err;
+
+ skel = test_ringbuf__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ return;
+
+ /* only trigger BPF program for current process */
+ skel->bss->pid = getpid();
+
+ ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
+ process_sample, NULL, NULL);
+ if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
+ goto cleanup;
+
+ err = test_ringbuf__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
+ goto cleanup;
+
+ trigger_samples();
+
+ /* 2 submitted + 1 discarded records */
+ CHECK(skel->bss->avail_data != 3 * rec_sz,
+ "err_avail_size", "exp %ld, got %ld\n",
+ 3L * rec_sz, skel->bss->avail_data);
+ CHECK(skel->bss->ring_size != 4096,
+ "err_ring_size", "exp %ld, got %ld\n",
+ 4096L, skel->bss->ring_size);
+ CHECK(skel->bss->cons_pos != 0,
+ "err_cons_pos", "exp %ld, got %ld\n",
+ 0L, skel->bss->cons_pos);
+ CHECK(skel->bss->prod_pos != 3 * rec_sz,
+ "err_prod_pos", "exp %ld, got %ld\n",
+ 3L * rec_sz, skel->bss->prod_pos);
+
+ /* poll for samples */
+ err = ring_buffer__poll(ringbuf, -1);
+
+ /* -EDONE is used as an indicator that we are done */
+ if (CHECK(err != -EDONE, "err_done", "done err: %d\n", err))
+ goto cleanup;
+
+ /* we expect extra polling to return nothing */
+ err = ring_buffer__poll(ringbuf, 0);
+ if (CHECK(err != 0, "extra_samples", "poll result: %d\n", err))
+ goto cleanup;
+
+ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+ 0L, skel->bss->dropped);
+ CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+ 2L, skel->bss->total);
+ CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+ 1L, skel->bss->discarded);
+
+ /* now validate consumer position is updated and returned */
+ trigger_samples();
+ CHECK(skel->bss->cons_pos != 3 * rec_sz,
+ "err_cons_pos", "exp %ld, got %ld\n",
+ 3L * rec_sz, skel->bss->cons_pos);
+ err = ring_buffer__poll(ringbuf, -1);
+ CHECK(err <= 0, "poll_err", "err %d\n", err);
+
+ /* start poll in background w/ long timeout */
+ err = pthread_create(&thread, NULL, poll_thread, (void *)(long)10000);
+ if (CHECK(err, "bg_poll", "pthread_create failed: %d\n", err))
+ goto cleanup;
+
+ /* turn off notifications now */
+ skel->bss->flags = BPF_RB_NO_WAKEUP;
+
+ /* give background thread a bit of a time */
+ usleep(50000);
+ trigger_samples();
+ /* sleeping arbitrarily is bad, but no better way to know that
+ * epoll_wait() **DID NOT** unblock in background thread
+ */
+ usleep(50000);
+ /* background poll should still be blocked */
+ err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+ if (CHECK(err != EBUSY, "try_join", "err %d\n", err))
+ goto cleanup;
+
+ /* BPF side did everything right */
+ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+ 0L, skel->bss->dropped);
+ CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+ 2L, skel->bss->total);
+ CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+ 1L, skel->bss->discarded);
+
+ /* clear flags to return to "adaptive" notification mode */
+ skel->bss->flags = 0;
+
+ /* produce new samples, no notification should be triggered, because
+ * consumer is now behind
+ */
+ trigger_samples();
+
+ /* background poll should still be blocked */
+ err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+ if (CHECK(err != EBUSY, "try_join", "err %d\n", err))
+ goto cleanup;
+
+ /* now force notifications */
+ skel->bss->flags = BPF_RB_FORCE_WAKEUP;
+ sample_cnt = 0;
+ trigger_samples();
+
+ /* now we should get a pending notification */
+ usleep(50000);
+ err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+ if (CHECK(err, "join_bg", "err %d\n", err))
+ goto cleanup;
+
+ if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret))
+ goto cleanup;
+
+ /* 3 rounds, 2 samples each */
+ CHECK(sample_cnt != 6, "wrong_sample_cnt",
+ "expected to see %d samples, got %d\n", 6, sample_cnt);
+
+ /* BPF side did everything right */
+ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+ 0L, skel->bss->dropped);
+ CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+ 2L, skel->bss->total);
+ CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+ 1L, skel->bss->discarded);
+
+ test_ringbuf__detach(skel);
+cleanup:
+ ring_buffer__free(ringbuf);
+ test_ringbuf__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
new file mode 100644
index 000000000000..78e450609803
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <sys/epoll.h>
+#include "test_ringbuf_multi.skel.h"
+
+static int duration = 0;
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+static int process_sample(void *ctx, void *data, size_t len)
+{
+ int ring = (unsigned long)ctx;
+ struct sample *s = data;
+
+ switch (s->seq) {
+ case 0:
+ CHECK(ring != 1, "sample1_ring", "exp %d, got %d\n", 1, ring);
+ CHECK(s->value != 333, "sample1_value", "exp %ld, got %ld\n",
+ 333L, s->value);
+ break;
+ case 1:
+ CHECK(ring != 2, "sample2_ring", "exp %d, got %d\n", 2, ring);
+ CHECK(s->value != 777, "sample2_value", "exp %ld, got %ld\n",
+ 777L, s->value);
+ break;
+ default:
+ CHECK(true, "extra_sample", "unexpected sample seq %d, val %ld\n",
+ s->seq, s->value);
+ return -1;
+ }
+
+ return 0;
+}
+
+void test_ringbuf_multi(void)
+{
+ struct test_ringbuf_multi *skel;
+ struct ring_buffer *ringbuf;
+ int err;
+
+ skel = test_ringbuf_multi__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ return;
+
+ /* only trigger BPF program for current process */
+ skel->bss->pid = getpid();
+
+ ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf1),
+ process_sample, (void *)(long)1, NULL);
+ if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
+ goto cleanup;
+
+ err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2),
+ process_sample, (void *)(long)2);
+ if (CHECK(err, "ringbuf_add", "failed to add another ring\n"))
+ goto cleanup;
+
+ err = test_ringbuf_multi__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger few samples, some will be skipped */
+ skel->bss->target_ring = 0;
+ skel->bss->value = 333;
+ syscall(__NR_getpgid);
+
+ /* skipped, no ringbuf in slot 1 */
+ skel->bss->target_ring = 1;
+ skel->bss->value = 555;
+ syscall(__NR_getpgid);
+
+ skel->bss->target_ring = 2;
+ skel->bss->value = 777;
+ syscall(__NR_getpgid);
+
+ /* poll for samples, should get 2 ringbufs back */
+ err = ring_buffer__poll(ringbuf, -1);
+ if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err))
+ goto cleanup;
+
+ /* expect extra polling to return nothing */
+ err = ring_buffer__poll(ringbuf, 0);
+ if (CHECK(err < 0, "extra_samples", "poll result: %d\n", err))
+ goto cleanup;
+
+ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+ 0L, skel->bss->dropped);
+ CHECK(skel->bss->skipped != 1, "err_skipped", "exp %ld, got %ld\n",
+ 1L, skel->bss->skipped);
+ CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+ 2L, skel->bss->total);
+
+cleanup:
+ ring_buffer__free(ringbuf);
+ test_ringbuf_multi__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
new file mode 100644
index 000000000000..f302ad84a298
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+void test_skb_helpers(void)
+{
+ struct __sk_buff skb = {
+ .wire_len = 100,
+ .gso_segs = 8,
+ .gso_size = 10,
+ };
+ struct bpf_prog_test_run_attr tattr = {
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ .ctx_out = &skb,
+ .ctx_size_out = sizeof(skb),
+ };
+ struct bpf_object *obj;
+ int err;
+
+ err = bpf_prog_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &tattr.prog_fd);
+ if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ return;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err, "len", "err %d errno %d\n", err, errno);
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index aa43e0bd210c..96e7b7f84c65 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Cloudflare
+#include <error.h>
#include "test_progs.h"
+#include "test_skmsg_load_helpers.skel.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
@@ -70,10 +72,43 @@ out:
close(s);
}
+static void test_skmsg_helpers(enum bpf_map_type map_type)
+{
+ struct test_skmsg_load_helpers *skel;
+ int err, map, verdict;
+
+ skel = test_skmsg_load_helpers__open_and_load();
+ if (CHECK_FAIL(!skel)) {
+ perror("test_skmsg_load_helpers__open_and_load");
+ return;
+ }
+
+ verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ map = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach");
+ goto out;
+ }
+
+ err = bpf_prog_detach2(verdict, map, BPF_SK_MSG_VERDICT);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_detach2");
+ goto out;
+ }
+out:
+ test_skmsg_load_helpers__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash create_update_free"))
test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap sk_msg load helpers"))
+ test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash sk_msg load helpers"))
+ test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
new file mode 100644
index 000000000000..d19dbd668f6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_helpers.skel.h"
+
+#define IFINDEX_LO 1
+
+struct bpf_devmap_val {
+ u32 ifindex; /* device index */
+ union {
+ int fd; /* prog fd on map write */
+ u32 id; /* prog id on map read */
+ } bpf_prog;
+};
+
+void test_xdp_with_devmap_helpers(void)
+{
+ struct test_xdp_with_devmap_helpers *skel;
+ struct bpf_prog_info info = {};
+ struct bpf_devmap_val val = {
+ .ifindex = IFINDEX_LO,
+ };
+ __u32 len = sizeof(info);
+ __u32 duration = 0, idx = 0;
+ int err, dm_fd, map_fd;
+
+
+ skel = test_xdp_with_devmap_helpers__open_and_load();
+ if (CHECK_FAIL(!skel)) {
+ perror("test_xdp_with_devmap_helpers__open_and_load");
+ return;
+ }
+
+ /* can not attach program with DEVMAPs that allow programs
+ * as xdp generic
+ */
+ dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+ CHECK(err == 0, "Generic attach of program with 8-byte devmap",
+ "should have failed\n");
+
+ dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+ map_fd = bpf_map__fd(skel->maps.dm_ports);
+ err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
+ if (CHECK_FAIL(err))
+ goto out_close;
+
+ val.bpf_prog.fd = dm_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ CHECK(err, "Add program to devmap entry",
+ "err %d errno %d\n", err, errno);
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
+ CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
+ "expected %u read %u\n", info.id, val.bpf_prog.id);
+
+ /* can not attach BPF_XDP_DEVMAP program to a device */
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+ CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
+ "should have failed\n");
+
+ val.ifindex = 1;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
+ "should have failed\n");
+
+out_close:
+ test_xdp_with_devmap_helpers__destroy(skel);
+}
+
+void test_neg_xdp_devmap_helpers(void)
+{
+ struct test_xdp_devmap_helpers *skel;
+ __u32 duration = 0;
+
+ skel = test_xdp_devmap_helpers__open_and_load();
+ if (CHECK(skel,
+ "Load of XDP program accessing egress ifindex without attach type",
+ "should have failed\n")) {
+ test_xdp_devmap_helpers__destroy(skel);
+ }
+}
+
+
+void test_xdp_devmap_attach(void)
+{
+ if (test__start_subtest("DEVMAP with programs in entries"))
+ test_xdp_with_devmap_helpers();
+
+ if (test__start_subtest("Verifier check of DEVMAP programs"))
+ test_neg_xdp_devmap_helpers();
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 9941f0ba471e..de6de9221518 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -20,20 +20,20 @@
#include <bpf/bpf_endian.h>
int _version SEC("version") = 1;
-#define PROG(F) SEC(#F) int bpf_func_##F
+#define PROG(F) PROG_(F, _##F)
+#define PROG_(NUM, NAME) SEC("flow_dissector/"#NUM) int bpf_func##NAME
/* These are the identifiers of the BPF programs that will be used in tail
* calls. Name is limited to 16 characters, with the terminating character and
* bpf_func_ above, we have only 6 to work with, anything after will be cropped.
*/
-enum {
- IP,
- IPV6,
- IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */
- IPV6FR, /* Fragmentation IPv6 Extension Header */
- MPLS,
- VLAN,
-};
+#define IP 0
+#define IPV6 1
+#define IPV6OP 2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */
+#define IPV6FR 3 /* Fragmentation IPv6 Extension Header */
+#define MPLS 4
+#define VLAN 5
+#define MAX_PROG 6
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
@@ -59,7 +59,7 @@ struct frag_hdr {
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
- __uint(max_entries, 8);
+ __uint(max_entries, MAX_PROG);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index c2c85c31cffd..1ab2c5eba86c 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -9,6 +9,8 @@
#include <linux/in6.h>
#include <sys/socket.h>
#include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
@@ -21,6 +23,10 @@
#define TCP_CA_NAME_MAX 16
#endif
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
int _version SEC("version") = 1;
__attribute__ ((noinline))
@@ -75,6 +81,29 @@ static __inline int set_cc(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+ char veth1[IFNAMSIZ] = "test_sock_addr1";
+ char veth2[IFNAMSIZ] = "test_sock_addr2";
+ char missing[IFNAMSIZ] = "nonexistent_dev";
+ char del_bind[IFNAMSIZ] = "";
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth1, sizeof(veth1)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth2, sizeof(veth2)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &missing, sizeof(missing)) != -ENODEV)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &del_bind, sizeof(del_bind)))
+ return 1;
+
+ return 0;
+}
+
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
@@ -88,6 +117,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
+ /* Bind to device and unbind it. */
+ if (bind_to_device(ctx))
+ return 0;
+
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
return 0;
else if (ctx->type == SOCK_STREAM)
diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
new file mode 100644
index 000000000000..e5ab4836a641
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(value_size, sizeof(int));
+ __uint(key_size, sizeof(int));
+} perfbuf SEC(".maps");
+
+const volatile int batch_cnt = 0;
+
+long sample_val = 42;
+long dropped __attribute__((aligned(128))) = 0;
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_perfbuf(void *ctx)
+{
+ __u64 *sample;
+ int i;
+
+ for (i = 0; i < batch_cnt; i++) {
+ if (bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU,
+ &sample_val, sizeof(sample_val)))
+ __sync_add_and_fetch(&dropped, 1);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
new file mode 100644
index 000000000000..123607d314d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+const volatile int batch_cnt = 0;
+const volatile long use_output = 0;
+
+long sample_val = 42;
+long dropped __attribute__((aligned(128))) = 0;
+
+const volatile long wakeup_data_size = 0;
+
+static __always_inline long get_flags()
+{
+ long sz;
+
+ if (!wakeup_data_size)
+ return 0;
+
+ sz = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+ return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_ringbuf(void *ctx)
+{
+ long *sample, flags;
+ int i;
+
+ if (!use_output) {
+ for (i = 0; i < batch_cnt; i++) {
+ sample = bpf_ringbuf_reserve(&ringbuf,
+ sizeof(sample_val), 0);
+ if (!sample) {
+ __sync_add_and_fetch(&dropped, 1);
+ } else {
+ *sample = sample_val;
+ flags = get_flags();
+ bpf_ringbuf_submit(sample, flags);
+ }
+ }
+ } else {
+ for (i = 0; i < batch_cnt; i++) {
+ flags = get_flags();
+ if (bpf_ringbuf_output(&ringbuf, &sample_val,
+ sizeof(sample_val), flags))
+ __sync_add_and_fetch(&dropped, 1);
+ }
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
new file mode 100644
index 000000000000..8ba9959b036b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+/* inputs */
+int pid = 0;
+long value = 0;
+long flags = 0;
+
+/* outputs */
+long total = 0;
+long discarded = 0;
+long dropped = 0;
+
+long avail_data = 0;
+long ring_size = 0;
+long cons_pos = 0;
+long prod_pos = 0;
+
+/* inner state */
+long seq = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int test_ringbuf(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample;
+ int zero = 0;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+ if (!sample) {
+ __sync_fetch_and_add(&dropped, 1);
+ return 1;
+ }
+
+ sample->pid = pid;
+ bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+ sample->value = value;
+
+ sample->seq = seq++;
+ __sync_fetch_and_add(&total, 1);
+
+ if (sample->seq & 1) {
+ /* copy from reserved sample to a new one... */
+ bpf_ringbuf_output(&ringbuf, sample, sizeof(*sample), flags);
+ /* ...and then discard reserved sample */
+ bpf_ringbuf_discard(sample, flags);
+ __sync_fetch_and_add(&discarded, 1);
+ } else {
+ bpf_ringbuf_submit(sample, flags);
+ }
+
+ avail_data = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+ ring_size = bpf_ringbuf_query(&ringbuf, BPF_RB_RING_SIZE);
+ cons_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_CONS_POS);
+ prod_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_PROD_POS);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
new file mode 100644
index 000000000000..edf3b6953533
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+struct ringbuf_map {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf1 SEC(".maps"),
+ ringbuf2 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __array(values, struct ringbuf_map);
+} ringbuf_arr SEC(".maps") = {
+ .values = {
+ [0] = &ringbuf1,
+ [2] = &ringbuf2,
+ },
+};
+
+/* inputs */
+int pid = 0;
+int target_ring = 0;
+long value = 0;
+
+/* outputs */
+long total = 0;
+long dropped = 0;
+long skipped = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int test_ringbuf(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample;
+ void *rb;
+ int zero = 0;
+
+ if (cur_pid != pid)
+ return 0;
+
+ rb = bpf_map_lookup_elem(&ringbuf_arr, &target_ring);
+ if (!rb) {
+ skipped += 1;
+ return 1;
+ }
+
+ sample = bpf_ringbuf_reserve(rb, sizeof(*sample), 0);
+ if (!sample) {
+ dropped += 1;
+ return 1;
+ }
+
+ sample->pid = pid;
+ bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+ sample->value = value;
+
+ sample->seq = total;
+ total += 1;
+
+ bpf_ringbuf_submit(sample, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_skb_helpers.c b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
new file mode 100644
index 000000000000..bb3fbf1a29e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define TEST_COMM_LEN 16
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, u32);
+ __type(value, u32);
+} cgroup_map SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("classifier/test_skb_helpers")
+int test_skb_helpers(struct __sk_buff *skb)
+{
+ struct task_struct *task;
+ char comm[TEST_COMM_LEN];
+ __u32 tpid;
+
+ task = (struct task_struct *)bpf_get_current_task();
+ bpf_probe_read_kernel(&tpid , sizeof(tpid), &task->tgid);
+ bpf_probe_read_kernel_str(&comm, sizeof(comm), &task->comm);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
new file mode 100644
index 000000000000..45e8fc75a739
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Isovalent, Inc.
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, __u32);
+ __type(value, __u64);
+} socket_storage SEC(".maps");
+
+SEC("sk_msg")
+int prog_msg_verdict(struct sk_msg_md *msg)
+{
+ struct task_struct *task = (struct task_struct *)bpf_get_current_task();
+ int verdict = SK_PASS;
+ __u32 pid, tpid;
+ __u64 *sk_stg;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ sk_stg = bpf_sk_storage_get(&socket_storage, msg->sk, 0, BPF_SK_STORAGE_GET_F_CREATE);
+ if (!sk_stg)
+ return SK_DROP;
+ *sk_stg = pid;
+ bpf_probe_read_kernel(&tpid , sizeof(tpid), &task->tgid);
+ if (pid != tpid)
+ verdict = SK_DROP;
+ bpf_sk_storage_delete(&socket_storage, (void *)msg->sk);
+ return verdict;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index a443d3637db3..057036ca1111 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -79,11 +79,18 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
+ __uint(max_entries, 2);
__type(key, int);
__type(value, int);
} sock_skb_opts SEC(".maps");
+struct {
+ __uint(type, TEST_MAP_TYPE);
+ __uint(max_entries, 20);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} tls_sock_map SEC(".maps");
+
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
{
@@ -118,6 +125,43 @@ int bpf_prog2(struct __sk_buff *skb)
}
+SEC("sk_skb3")
+int bpf_prog3(struct __sk_buff *skb)
+{
+ const int one = 1;
+ int err, *f, ret = SK_PASS;
+ void *data_end;
+ char *c;
+
+ err = bpf_skb_pull_data(skb, 19);
+ if (err)
+ goto tls_out;
+
+ c = (char *)(long)skb->data;
+ data_end = (void *)(long)skb->data_end;
+
+ if (c + 18 < data_end)
+ memcpy(&c[13], "PASS", 4);
+ f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+ if (f && *f) {
+ __u64 flags = 0;
+
+ ret = 0;
+ flags = *f;
+#ifdef SOCKMAP
+ return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
+#else
+ return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
+#endif
+ }
+
+ f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+ if (f && *f)
+ ret = SK_DROP;
+tls_out:
+ return ret;
+}
+
SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
new file mode 100644
index 000000000000..e5c0f131c8a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* fails to load without expected_attach_type = BPF_XDP_DEVMAP
+ * because of access to egress_ifindex
+ */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp_dm_log")
+int xdpdm_devlog(struct xdp_md *ctx)
+{
+ char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ unsigned int len = data_end - data;
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ ctx->ingress_ifindex, ctx->egress_ifindex, len);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
new file mode 100644
index 000000000000..deef0e050863
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+SEC("xdp_redir")
+int xdp_redir_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&dm_ports, 1, 0);
+}
+
+/* invalid program on DEVMAP entry;
+ * SEC name means expected attach type not set
+ */
+SEC("xdp_dummy")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp_devmap")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+ char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ unsigned int len = data_end - data;
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ ctx->ingress_ifindex, ctx->egress_ifindex, len);
+
+ return XDP_PASS;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index c6766b2cff85..6a12a0e01e07 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1394,23 +1394,25 @@ static void test_map_rdonly(void)
key = 1;
value = 1234;
- /* Insert key=1 element. */
+ /* Try to insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
errno == EPERM);
- /* Check that key=2 is not found. */
+ /* Check that key=1 is not found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+
+ close(fd);
}
-static void test_map_wronly(void)
+static void test_map_wronly_hash(void)
{
int fd, key = 0, value = 0;
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
MAP_SIZE, map_flags | BPF_F_WRONLY);
if (fd < 0) {
- printf("Failed to create map for read only test '%s'!\n",
+ printf("Failed to create map for write only test '%s'!\n",
strerror(errno));
exit(1);
}
@@ -1420,9 +1422,49 @@ static void test_map_wronly(void)
/* Insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
- /* Check that key=2 is not found. */
+ /* Check that reading elements and keys from the map is not allowed. */
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+
+ close(fd);
+}
+
+static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
+{
+ int fd, value = 0;
+
+ assert(map_type == BPF_MAP_TYPE_QUEUE ||
+ map_type == BPF_MAP_TYPE_STACK);
+ fd = bpf_create_map(map_type, 0, sizeof(value), MAP_SIZE,
+ map_flags | BPF_F_WRONLY);
+ /* Stack/Queue maps do not support BPF_F_NO_PREALLOC */
+ if (map_flags & BPF_F_NO_PREALLOC) {
+ assert(fd < 0 && errno == EINVAL);
+ return;
+ }
+ if (fd < 0) {
+ printf("Failed to create map '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ value = 1234;
+ assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
+
+ /* Peek element should fail */
+ assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+
+ /* Pop element should fail */
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+ errno == EPERM);
+
+ close(fd);
+}
+
+static void test_map_wronly(void)
+{
+ test_map_wronly_hash();
+ test_map_wronly_stack_or_queue(BPF_MAP_TYPE_STACK);
+ test_map_wronly_stack_or_queue(BPF_MAP_TYPE_QUEUE);
}
static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index c80643828b82..37695fc8096a 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -63,8 +63,8 @@ int s1, s2, c1, c2, p1, p2;
int test_cnt;
int passed;
int failed;
-int map_fd[8];
-struct bpf_map *maps[8];
+int map_fd[9];
+struct bpf_map *maps[9];
int prog_fd[11];
int txmsg_pass;
@@ -79,7 +79,10 @@ int txmsg_end_push;
int txmsg_start_pop;
int txmsg_pop;
int txmsg_ingress;
-int txmsg_skb;
+int txmsg_redir_skb;
+int txmsg_ktls_skb;
+int txmsg_ktls_skb_drop;
+int txmsg_ktls_skb_redir;
int ktls;
int peek_flag;
@@ -104,7 +107,7 @@ static const struct option long_options[] = {
{"txmsg_start_pop", required_argument, NULL, 'w'},
{"txmsg_pop", required_argument, NULL, 'x'},
{"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
- {"txmsg_skb", no_argument, &txmsg_skb, 1 },
+ {"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 },
{"ktls", no_argument, &ktls, 1 },
{"peek", no_argument, &peek_flag, 1 },
{"whitelist", required_argument, NULL, 'n' },
@@ -169,7 +172,8 @@ static void test_reset(void)
txmsg_start_push = txmsg_end_push = 0;
txmsg_pass = txmsg_drop = txmsg_redir = 0;
txmsg_apply = txmsg_cork = 0;
- txmsg_ingress = txmsg_skb = 0;
+ txmsg_ingress = txmsg_redir_skb = 0;
+ txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
}
static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
@@ -502,14 +506,41 @@ unwind_iov:
static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
{
- int i, j, bytes_cnt = 0;
+ int i, j = 0, bytes_cnt = 0;
unsigned char k = 0;
for (i = 0; i < msg->msg_iovlen; i++) {
unsigned char *d = msg->msg_iov[i].iov_base;
- for (j = 0;
- j < msg->msg_iov[i].iov_len && size; j++) {
+ /* Special case test for skb ingress + ktls */
+ if (i == 0 && txmsg_ktls_skb) {
+ if (msg->msg_iov[i].iov_len < 4)
+ return -EIO;
+ if (txmsg_ktls_skb_redir) {
+ if (memcmp(&d[13], "PASS", 4) != 0) {
+ fprintf(stderr,
+ "detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]);
+ return -EIO;
+ }
+ d[13] = 0;
+ d[14] = 1;
+ d[15] = 2;
+ d[16] = 3;
+ j = 13;
+ } else if (txmsg_ktls_skb) {
+ if (memcmp(d, "PASS", 4) != 0) {
+ fprintf(stderr,
+ "detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]);
+ return -EIO;
+ }
+ d[0] = 0;
+ d[1] = 1;
+ d[2] = 2;
+ d[3] = 3;
+ }
+ }
+
+ for (; j < msg->msg_iov[i].iov_len && size; j++) {
if (d[j] != k++) {
fprintf(stderr,
"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
@@ -724,7 +755,7 @@ static int sendmsg_test(struct sockmap_options *opt)
rxpid = fork();
if (rxpid == 0) {
iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
- if (opt->drop_expected)
+ if (opt->drop_expected || txmsg_ktls_skb_drop)
_exit(0);
if (!iov_buf) /* zero bytes sent case */
@@ -911,8 +942,28 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
return err;
}
+ /* Attach programs to TLS sockmap */
+ if (txmsg_ktls_skb) {
+ err = bpf_prog_attach(prog_fd[0], map_fd[8],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[8], err, strerror(errno));
+ return err;
+ }
+
+ err = bpf_prog_attach(prog_fd[2], map_fd[8],
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err) {
+ fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+ }
+
/* Attach to cgroups */
- err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
if (err) {
fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
err, strerror(errno));
@@ -928,15 +979,15 @@ run:
/* Attach txmsg program to sockmap */
if (txmsg_pass)
- tx_prog_fd = prog_fd[3];
- else if (txmsg_redir)
tx_prog_fd = prog_fd[4];
- else if (txmsg_apply)
+ else if (txmsg_redir)
tx_prog_fd = prog_fd[5];
- else if (txmsg_cork)
+ else if (txmsg_apply)
tx_prog_fd = prog_fd[6];
- else if (txmsg_drop)
+ else if (txmsg_cork)
tx_prog_fd = prog_fd[7];
+ else if (txmsg_drop)
+ tx_prog_fd = prog_fd[8];
else
tx_prog_fd = 0;
@@ -1108,7 +1159,35 @@ run:
}
}
- if (txmsg_skb) {
+ if (txmsg_ktls_skb) {
+ int ingress = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ if (txmsg_ktls_skb_redir) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[7],
+ &i, &ingress, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+
+ if (txmsg_ktls_skb_drop) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY);
+ }
+ }
+
+ if (txmsg_redir_skb) {
int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
p2 : p1;
int ingress = BPF_F_INGRESS;
@@ -1123,8 +1202,7 @@ run:
}
i = 3;
- err = bpf_map_update_elem(map_fd[0],
- &i, &skb_fd, BPF_ANY);
+ err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
@@ -1158,9 +1236,12 @@ run:
fprintf(stderr, "unknown test\n");
out:
/* Detatch and zero all the maps */
- bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+ bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+ bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
+ bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
+
if (tx_prog_fd >= 0)
bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
@@ -1229,8 +1310,10 @@ static void test_options(char *options)
}
if (txmsg_ingress)
strncat(options, "ingress,", OPTSTRING);
- if (txmsg_skb)
- strncat(options, "skb,", OPTSTRING);
+ if (txmsg_redir_skb)
+ strncat(options, "redir_skb,", OPTSTRING);
+ if (txmsg_ktls_skb)
+ strncat(options, "ktls_skb,", OPTSTRING);
if (ktls)
strncat(options, "ktls,", OPTSTRING);
if (peek_flag)
@@ -1362,6 +1445,40 @@ static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
test_send(opt, cgrp);
}
+static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
+{
+ bool data = opt->data_test;
+ int k = ktls;
+
+ opt->data_test = true;
+ ktls = 1;
+
+ txmsg_pass = txmsg_drop = 0;
+ txmsg_ingress = txmsg_redir = 0;
+ txmsg_ktls_skb = 1;
+ txmsg_pass = 1;
+
+ /* Using data verification so ensure iov layout is
+ * expected from test receiver side. e.g. has enough
+ * bytes to write test code.
+ */
+ opt->iov_length = 100;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ test_exec(cgrp, opt);
+
+ txmsg_ktls_skb_drop = 1;
+ test_exec(cgrp, opt);
+
+ txmsg_ktls_skb_drop = 0;
+ txmsg_ktls_skb_redir = 1;
+ test_exec(cgrp, opt);
+
+ opt->data_test = data;
+ ktls = k;
+}
+
+
/* Test cork with hung data. This tests poor usage patterns where
* cork can leave data on the ring if user program is buggy and
* doesn't flush them somehow. They do take some time however
@@ -1542,11 +1659,13 @@ char *map_names[] = {
"sock_bytes",
"sock_redir_flags",
"sock_skb_opts",
+ "tls_sock_map",
};
int prog_attach_type[] = {
BPF_SK_SKB_STREAM_PARSER,
BPF_SK_SKB_STREAM_VERDICT,
+ BPF_SK_SKB_STREAM_VERDICT,
BPF_CGROUP_SOCK_OPS,
BPF_SK_MSG_VERDICT,
BPF_SK_MSG_VERDICT,
@@ -1560,6 +1679,7 @@ int prog_attach_type[] = {
int prog_type[] = {
BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_SOCK_OPS,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_SK_MSG,
@@ -1620,6 +1740,7 @@ struct _test test[] = {
{"txmsg test redirect", test_txmsg_redir},
{"txmsg test drop", test_txmsg_drop},
{"txmsg test ingress redirect", test_txmsg_ingress_redir},
+ {"txmsg test skb", test_txmsg_skb},
{"txmsg test apply", test_txmsg_apply},
{"txmsg test cork", test_txmsg_cork},
{"txmsg test hanging corks", test_txmsg_cork_hangs},
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
index e0fad1548737..d781bc86e100 100644
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ b/tools/testing/selftests/bpf/verifier/and.c
@@ -15,7 +15,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R0 max value is outside of the array range",
+ .errstr = "R0 max value is outside of the allowed memory range",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
@@ -44,7 +44,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R0 max value is outside of the array range",
+ .errstr = "R0 max value is outside of the allowed memory range",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index f3c33e128709..1c4b1939f5a8 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -117,7 +117,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
@@ -137,7 +137,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
+ .errstr = "R0 unbounded memory access, make sure to bounds check any such access",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index 58f4aa593b1b..4d6645f2874c 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -20,7 +20,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "R0 max value is outside of the array range",
+ .errstr = "R0 max value is outside of the allowed memory range",
.result = REJECT,
},
{
@@ -146,7 +146,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
.result = REJECT
},
{
@@ -354,7 +354,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "R0 max value is outside of the array range",
+ .errstr = "R0 max value is outside of the allowed memory range",
.result = REJECT
},
{
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 7629a0cebb9b..94258c6b5235 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -105,7 +105,7 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.fixup_map_hash_8b = { 16 },
.result = REJECT,
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
},
{
"calls: overlapping caller/callee",
diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c
index b9fb28e8e224..988f46a1a4c7 100644
--- a/tools/testing/selftests/bpf/verifier/direct_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c
@@ -68,7 +68,7 @@
},
.fixup_map_array_48b = { 1 },
.result = REJECT,
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
},
{
"direct map access, write test 7",
@@ -220,7 +220,7 @@
},
.fixup_map_array_small = { 1 },
.result = REJECT,
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
},
{
"direct map access, write test 19",
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index 67ab12410050..5a605ae131a9 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -318,7 +318,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 4 },
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
diff --git a/tools/testing/selftests/bpf/verifier/helper_value_access.c b/tools/testing/selftests/bpf/verifier/helper_value_access.c
index 7572e403ddb9..961f28139b96 100644
--- a/tools/testing/selftests/bpf/verifier/helper_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/helper_value_access.c
@@ -280,7 +280,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -415,7 +415,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -926,7 +926,7 @@
},
.fixup_map_hash_16b = { 3, 10 },
.result = REJECT,
- .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
+ .errstr = "R2 unbounded memory access, make sure to bounds check any such access",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index a53d99cebd9f..97ee658e1242 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -50,7 +50,7 @@
.fixup_map_array_48b = { 8 },
.result = ACCEPT,
.result_unpriv = REJECT,
- .errstr_unpriv = "R0 min value is outside of the array range",
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
.retval = 1,
},
{
@@ -325,7 +325,7 @@
},
.fixup_map_array_48b = { 3 },
.result = REJECT,
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
.result_unpriv = REJECT,
.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
},
@@ -601,7 +601,7 @@
},
.fixup_map_array_48b = { 3 },
.result = REJECT,
- .errstr = "R1 max value is outside of the array range",
+ .errstr = "R1 max value is outside of the allowed memory range",
.errstr_unpriv = "R1 pointer arithmetic of map value goes out of range",
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
@@ -726,7 +726,7 @@
},
.fixup_map_array_48b = { 3 },
.result = REJECT,
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
},
{
"map access: value_ptr -= known scalar, 2",