diff options
author | wuqiang.matt <wuqiang.matt@bytedance.com> | 2023-10-17 21:56:52 +0800 |
---|---|---|
committer | Masami Hiramatsu (Google) <mhiramat@kernel.org> | 2023-10-18 23:59:54 +0900 |
commit | 4bbd9345565933823f38a419df65661f12adbe5e (patch) | |
tree | debaf6ea237ce00ce92be26ce3ef8d6999cbd24e /include/linux | |
parent | 92f90d3b0d5e384f218c8068138ed1b3afa025af (diff) |
kprobes: kretprobe scalability improvement
kretprobe is using freelist to manage return-instances, but freelist,
as LIFO queue based on singly linked list, scales badly and reduces
the overall throughput of kretprobed routines, especially for high
contention scenarios.
Here's a typical throughput test of sys_prctl (counts in 10 seconds,
measured with perf stat -a -I 10000 -e syscalls:sys_enter_prctl):
OS: Debian 10 X86_64, Linux 6.5rc7 with freelist
HW: XEON 8336C x 2, 64 cores/128 threads, DDR4 3200MT/s
1T 2T 4T 8T 16T 24T
24150045 29317964 15446741 12494489 18287272 17708768
32T 48T 64T 72T 96T 128T
16200682 13737658 11645677 11269858 10470118 9931051
This patch introduces objpool to replace freelist. objpool is a
high performance queue, which can bring near-linear scalability
to kretprobed routines. Tests of kretprobe throughput show the
biggest ratio as 159x of original freelist. Here's the result:
1T 2T 4T 8T 16T
native: 41186213 82336866 164250978 328662645 658810299
freelist: 24150045 29317964 15446741 12494489 18287272
objpool: 23926730 48010314 96125218 191782984 385091769
32T 48T 64T 96T 128T
native: 1330338351 1969957941 2512291791 2615754135 2671040914
freelist: 16200682 13737658 11645677 10470118 9931051
objpool: 764481096 1147149781 1456220214 1502109662 1579015050
Testings on 96-core ARM64 output similarly, but with the biggest
ratio up to 448x:
OS: Debian 10 AARCH64, Linux 6.5rc7
HW: Kunpeng-920 96 cores/2 sockets/4 NUMA nodes, DDR4 2933 MT/s
1T 2T 4T 8T 16T
native: . 30066096 63569843 126194076 257447289 505800181
freelist: 16152090 11064397 11124068 7215768 5663013
objpool: 13997541 28032100 55726624 110099926 221498787
24T 32T 48T 64T 96T
native: 763305277 1015925192 1521075123 2033009392 3021013752
freelist: 5015810 4602893 3766792 3382478 2945292
objpool: 328192025 439439564 668534502 887401381 1319972072
Link: https://lore.kernel.org/all/20231017135654.82270-4-wuqiang.matt@bytedance.com/
Signed-off-by: wuqiang.matt <wuqiang.matt@bytedance.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/kprobes.h | 11 | ||||
-rw-r--r-- | include/linux/rethook.h | 16 |
2 files changed, 7 insertions, 20 deletions
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 85a64cb95d75..365eb092e9c4 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -26,8 +26,7 @@ #include <linux/rcupdate.h> #include <linux/mutex.h> #include <linux/ftrace.h> -#include <linux/refcount.h> -#include <linux/freelist.h> +#include <linux/objpool.h> #include <linux/rethook.h> #include <asm/kprobes.h> @@ -141,7 +140,7 @@ static inline bool kprobe_ftrace(struct kprobe *p) */ struct kretprobe_holder { struct kretprobe *rp; - refcount_t ref; + struct objpool_head pool; }; struct kretprobe { @@ -154,7 +153,6 @@ struct kretprobe { #ifdef CONFIG_KRETPROBE_ON_RETHOOK struct rethook *rh; #else - struct freelist_head freelist; struct kretprobe_holder *rph; #endif }; @@ -165,10 +163,7 @@ struct kretprobe_instance { #ifdef CONFIG_KRETPROBE_ON_RETHOOK struct rethook_node node; #else - union { - struct freelist_node freelist; - struct rcu_head rcu; - }; + struct rcu_head rcu; struct llist_node llist; struct kretprobe_holder *rph; kprobe_opcode_t *ret_addr; diff --git a/include/linux/rethook.h b/include/linux/rethook.h index 26b6f3c81a76..ce69b2b7bc35 100644 --- a/include/linux/rethook.h +++ b/include/linux/rethook.h @@ -6,11 +6,10 @@ #define _LINUX_RETHOOK_H #include <linux/compiler.h> -#include <linux/freelist.h> +#include <linux/objpool.h> #include <linux/kallsyms.h> #include <linux/llist.h> #include <linux/rcupdate.h> -#include <linux/refcount.h> struct rethook_node; @@ -30,14 +29,12 @@ typedef void (*rethook_handler_t) (struct rethook_node *, void *, unsigned long, struct rethook { void *data; rethook_handler_t handler; - struct freelist_head pool; - refcount_t ref; + struct objpool_head pool; struct rcu_head rcu; }; /** * struct rethook_node - The rethook shadow-stack entry node. - * @freelist: The freelist, linked to struct rethook::pool. * @rcu: The rcu_head for deferred freeing. * @llist: The llist, linked to a struct task_struct::rethooks. * @rethook: The pointer to the struct rethook. @@ -48,20 +45,16 @@ struct rethook { * on each entry of the shadow stack. */ struct rethook_node { - union { - struct freelist_node freelist; - struct rcu_head rcu; - }; + struct rcu_head rcu; struct llist_node llist; struct rethook *rethook; unsigned long ret_addr; unsigned long frame; }; -struct rethook *rethook_alloc(void *data, rethook_handler_t handler); +struct rethook *rethook_alloc(void *data, rethook_handler_t handler, int size, int num); void rethook_stop(struct rethook *rh); void rethook_free(struct rethook *rh); -void rethook_add_node(struct rethook *rh, struct rethook_node *node); struct rethook_node *rethook_try_get(struct rethook *rh); void rethook_recycle(struct rethook_node *node); void rethook_hook(struct rethook_node *node, struct pt_regs *regs, bool mcount); @@ -98,4 +91,3 @@ void rethook_flush_task(struct task_struct *tk); #endif #endif - |