From 283ca526a9bd75aed7350220d7b1f8027d99c3fd Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 12 Dec 2017 02:25:30 +0100 Subject: bpf: fix corruption on concurrent perf_event_output calls When tracing and networking programs are both attached in the system and both use event-output helpers that eventually call into perf_event_output(), then we could end up in a situation where the tracing attached program runs in user context while a cls_bpf program is triggered on that same CPU out of softirq context. Since both rely on the same per-cpu perf_sample_data, we could potentially corrupt it. This can only ever happen in a combination of the two types; all tracing programs use a bpf_prog_active counter to bail out in case a program is already running on that CPU out of a different context. XDP and cls_bpf programs by themselves don't have this issue as they run in the same context only. Therefore, split both perf_sample_data so they cannot be accessed from each other. Fixes: 20b9d7ac4852 ("bpf: avoid excessive stack usage for perf_sample_data") Reported-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Tested-by: Song Liu Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- kernel/trace/bpf_trace.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0ce99c379c30..40207c2a4113 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -343,14 +343,13 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { .arg4_type = ARG_CONST_SIZE, }; -static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); +static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd); static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, - u64 flags, struct perf_raw_record *raw) + u64 flags, struct perf_sample_data *sd) { struct bpf_array *array = container_of(map, struct bpf_array, map); - struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; struct bpf_event_entry *ee; @@ -373,8 +372,6 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; - perf_sample_data_init(sd, 0, 0); - sd->raw = raw; perf_event_output(event, sd, regs); return 0; } @@ -382,6 +379,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, u64, flags, void *, data, u64, size) { + struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); struct perf_raw_record raw = { .frag = { .size = size, @@ -392,7 +390,10 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; - return __bpf_perf_event_output(regs, map, flags, &raw); + perf_sample_data_init(sd, 0, 0); + sd->raw = &raw; + + return __bpf_perf_event_output(regs, map, flags, sd); } static const struct bpf_func_proto bpf_perf_event_output_proto = { @@ -407,10 +408,12 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { }; static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); +static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd); u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) { + struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd); struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); struct perf_raw_frag frag = { .copy = ctx_copy, @@ -428,8 +431,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, }; perf_fetch_caller_regs(regs); + perf_sample_data_init(sd, 0, 0); + sd->raw = &raw; - return __bpf_perf_event_output(regs, map, flags, &raw); + return __bpf_perf_event_output(regs, map, flags, sd); } BPF_CALL_0(bpf_get_current_task) -- cgit v1.2.3 From 9147efcbe0b7cc96b18eb64b1a3f0d4bba81443c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Dec 2017 14:22:39 -0800 Subject: bpf: add schedule points to map alloc/free While using large percpu maps, htab_map_alloc() can hold cpu for hundreds of ms. This patch adds cond_resched() calls to percpu alloc/free call sites, all running in process context. Signed-off-by: Eric Dumazet Signed-off-by: Alexei Starovoitov --- kernel/bpf/hashtab.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index e469e05c8e83..3905d4bc5b80 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -114,6 +114,7 @@ static void htab_free_elems(struct bpf_htab *htab) pptr = htab_elem_get_ptr(get_htab_elem(htab, i), htab->map.key_size); free_percpu(pptr); + cond_resched(); } free_elems: bpf_map_area_free(htab->elems); @@ -159,6 +160,7 @@ static int prealloc_init(struct bpf_htab *htab) goto free_elems; htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, pptr); + cond_resched(); } skip_percpu_elems: -- cgit v1.2.3