summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2016-07-14 18:08:05 +0200
committerDavid S. Miller <davem@davemloft.net>2016-07-15 14:23:56 -0700
commit555c8a8623a3a87b3c990ba30b7fd2e5914e41d2 (patch)
tree51e4fcdea68602c29e21fdd23519a214b2208ed6 /kernel
parent8e7a3920ac277dd4e690c0e70c9750176e3acb83 (diff)
bpf: avoid stack copy and use skb ctx for event output
This work addresses a couple of issues bpf_skb_event_output() helper currently has: i) We need two copies instead of just a single one for the skb data when it should be part of a sample. The data can be non-linear and thus needs to be extracted via bpf_skb_load_bytes() helper first, and then copied once again into the ring buffer slot. ii) Since bpf_skb_load_bytes() currently needs to be used first, the helper needs to see a constant size on the passed stack buffer to make sure BPF verifier can do sanity checks on it during verification time. Thus, just passing skb->len (or any other non-constant value) wouldn't work, but changing bpf_skb_load_bytes() is also not the proper solution, since the two copies are generally still needed. iii) bpf_skb_load_bytes() is just for rather small buffers like headers, since they need to sit on the limited BPF stack anyway. Instead of working around in bpf_skb_load_bytes(), this work improves the bpf_skb_event_output() helper to address all 3 at once. We can make use of the passed in skb context that we have in the helper anyway, and use some of the reserved flag bits as a length argument. The helper will use the new __output_custom() facility from perf side with bpf_skb_copy() as callback helper to walk and extract the data. It will pass the data for setup to bpf_event_output(), which generates and pushes the raw record with an additional frag part. The linear data used in the first frag of the record serves as programmatically defined meta data passed along with the appended sample. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/core.c6
-rw-r--r--kernel/trace/bpf_trace.c33
2 files changed, 19 insertions, 20 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index d638062f66d6..03fd23d4d587 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1054,9 +1054,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
return NULL;
}
-const struct bpf_func_proto * __weak bpf_get_event_output_proto(void)
+u64 __weak
+bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
+ void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{
- return NULL;
+ return -ENOTSUPP;
}
/* Always built-in helper functions. */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c35883a9bc11..ebfbb7dd7033 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -298,29 +298,26 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
-static u64 bpf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
+u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
+ void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{
struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
+ struct perf_raw_frag frag = {
+ .copy = ctx_copy,
+ .size = ctx_size,
+ .data = ctx,
+ };
+ struct perf_raw_record raw = {
+ .frag = {
+ .next = ctx_size ? &frag : NULL,
+ .size = meta_size,
+ .data = meta,
+ },
+ };
perf_fetch_caller_regs(regs);
- return bpf_perf_event_output((long)regs, r2, flags, r4, size);
-}
-
-static const struct bpf_func_proto bpf_event_output_proto = {
- .func = bpf_event_output,
- .gpl_only = true,
- .ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_CONST_MAP_PTR,
- .arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_STACK,
- .arg5_type = ARG_CONST_STACK_SIZE,
-};
-
-const struct bpf_func_proto *bpf_get_event_output_proto(void)
-{
- return &bpf_event_output_proto;
+ return __bpf_perf_event_output(regs, map, flags, &raw);
}
static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)