diff options
author | Thomas Zimmermann <tzimmermann@suse.de> | 2021-12-16 14:47:38 +0100 |
---|---|---|
committer | Thomas Zimmermann <tzimmermann@suse.de> | 2021-12-16 14:48:27 +0100 |
commit | 9758ff2fa240173e9a45613b07774b7a78b7653e (patch) | |
tree | 44c1951f9fe4ded7f18d26ca677d006c0e191569 /tools | |
parent | 0b665d4af35837f0a0ae63135b84a3c187c1db3b (diff) | |
parent | 244a36e50da05c33b860d20638ee4628017a5334 (diff) |
Merge drm/drm-next into drm-misc-nextdrm-misc-next-2021-12-16
Backmerging for v5.16-rc5. Resolves a conflict between drm-misc-next
and drm-misc-fixes in the vc4 driver.
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'tools')
121 files changed, 3547 insertions, 1066 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index d0ce5cfd3ac1..d5b5f2ab87a0 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -277,6 +277,7 @@ #define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -298,6 +299,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 2ef1f6513c68..5a776a08f78c 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -504,4 +504,8 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 +/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ +#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ +#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index a59cb0ee609c..73409e27be01 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -83,6 +83,7 @@ struct btf_id { int cnt; }; int addr_cnt; + bool is_set; Elf64_Addr addr[ADDR_CNT]; }; @@ -451,8 +452,10 @@ static int symbols_collect(struct object *obj) * in symbol's size, together with 'cnt' field hence * that - 1. */ - if (id) + if (id) { id->cnt = sym.st_size / sizeof(int) - 1; + id->is_set = true; + } } else { pr_err("FAILED unsupported prefix %s\n", prefix); return -1; @@ -568,9 +571,8 @@ static int id_patch(struct object *obj, struct btf_id *id) int *ptr = data->d_buf; int i; - if (!id->id) { + if (!id->id && !id->is_set) pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); - } for (i = 0; i < id->addr_cnt; i++) { unsigned long addr = id->addr[i]; diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index bbd1150578f7..8791d0e2762b 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -88,5 +88,4 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OU $(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ - LIBBPF_OUTPUT=$(BPFOBJ_OUTPUT) \ - LIBBPF_DESTDIR=$(BPF_DESTDIR) CC=$(HOSTCC) LD=$(HOSTLD) + CC=$(HOSTCC) LD=$(HOSTLD) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 45a9a59828c3..ae61f464043a 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -48,7 +48,6 @@ FEATURE_TESTS_BASIC := \ numa_num_possible_cpus \ libperl \ libpython \ - libpython-version \ libslang \ libslang-include-subdir \ libtraceevent \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 0a3244ad9673..1480910c792e 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -32,7 +32,6 @@ FILES= \ test-numa_num_possible_cpus.bin \ test-libperl.bin \ test-libpython.bin \ - test-libpython-version.bin \ test-libslang.bin \ test-libslang-include-subdir.bin \ test-libtraceevent.bin \ @@ -227,9 +226,6 @@ $(OUTPUT)test-libperl.bin: $(OUTPUT)test-libpython.bin: $(BUILD) $(FLAGS_PYTHON_EMBED) -$(OUTPUT)test-libpython-version.bin: - $(BUILD) - $(OUTPUT)test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 920439527291..5ffafb967b6e 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -14,10 +14,6 @@ # include "test-libpython.c" #undef main -#define main main_test_libpython_version -# include "test-libpython-version.c" -#undef main - #define main main_test_libperl # include "test-libperl.c" #undef main @@ -177,7 +173,6 @@ int main(int argc, char *argv[]) { main_test_libpython(); - main_test_libpython_version(); main_test_libperl(); main_test_hello(); main_test_libelf(); @@ -200,7 +195,6 @@ int main(int argc, char *argv[]) main_test_timerfd(); main_test_stackprotector_all(); main_test_libdw_dwarf_unwind(); - main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); main_test_pthread_barrier(); diff --git a/tools/build/feature/test-libpython-version.c b/tools/build/feature/test-libpython-version.c deleted file mode 100644 index 47714b942d4d..000000000000 --- a/tools/build/feature/test-libpython-version.c +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <Python.h> - -#if PY_VERSION_HEX >= 0x03000000 - #error -#endif - -int main(void) -{ - return 0; -} diff --git a/tools/include/linux/debug_locks.h b/tools/include/linux/debug_locks.h deleted file mode 100644 index 72d595ce764a..000000000000 --- a/tools/include/linux/debug_locks.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_DEBUG_LOCKS_H_ -#define _LIBLOCKDEP_DEBUG_LOCKS_H_ - -#include <stddef.h> -#include <linux/compiler.h> -#include <asm/bug.h> - -#define DEBUG_LOCKS_WARN_ON(x) WARN_ON(x) - -extern bool debug_locks; -extern bool debug_locks_silent; - -#endif diff --git a/tools/include/linux/hardirq.h b/tools/include/linux/hardirq.h deleted file mode 100644 index b25580b6a9be..000000000000 --- a/tools/include/linux/hardirq.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_HARDIRQ_H_ -#define _LIBLOCKDEP_LINUX_HARDIRQ_H_ - -#define SOFTIRQ_BITS 0UL -#define HARDIRQ_BITS 0UL -#define SOFTIRQ_SHIFT 0UL -#define HARDIRQ_SHIFT 0UL -#define hardirq_count() 0UL -#define softirq_count() 0UL - -#endif diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h deleted file mode 100644 index 501262aee8ff..000000000000 --- a/tools/include/linux/irqflags.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ -#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ - -# define lockdep_hardirq_context() 0 -# define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled() 0 -# define lockdep_softirqs_enabled(p) 0 -# define lockdep_hardirq_enter() do { } while (0) -# define lockdep_hardirq_exit() do { } while (0) -# define lockdep_softirq_enter() do { } while (0) -# define lockdep_softirq_exit() do { } while (0) -# define INIT_TRACE_IRQFLAGS - -# define stop_critical_timings() do { } while (0) -# define start_critical_timings() do { } while (0) - -#define raw_local_irq_disable() do { } while (0) -#define raw_local_irq_enable() do { } while (0) -#define raw_local_irq_save(flags) ((flags) = 0) -#define raw_local_irq_restore(flags) ((void)(flags)) -#define raw_local_save_flags(flags) ((flags) = 0) -#define raw_irqs_disabled_flags(flags) ((void)(flags)) -#define raw_irqs_disabled() 0 -#define raw_safe_halt() - -#define local_irq_enable() do { } while (0) -#define local_irq_disable() do { } while (0) -#define local_irq_save(flags) ((flags) = 0) -#define local_irq_restore(flags) ((void)(flags)) -#define local_save_flags(flags) ((flags) = 0) -#define irqs_disabled() (1) -#define irqs_disabled_flags(flags) ((void)(flags), 0) -#define safe_halt() do { } while (0) - -#define trace_lock_release(x, y) -#define trace_lock_acquire(a, b, c, d, e, f, g) - -#endif diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index a7e54a08fb54..3e8df500cfbd 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -7,6 +7,7 @@ #include <assert.h> #include <linux/build_bug.h> #include <linux/compiler.h> +#include <linux/math.h> #include <endian.h> #include <byteswap.h> @@ -14,8 +15,6 @@ #define UINT_MAX (~0U) #endif -#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) - #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) #define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) @@ -52,15 +51,6 @@ _min1 < _min2 ? _min1 : _min2; }) #endif -#ifndef roundup -#define roundup(x, y) ( \ -{ \ - const typeof(y) __y = y; \ - (((x) + (__y - 1)) / __y) * __y; \ -} \ -) -#endif - #ifndef BUG_ON #ifdef NDEBUG #define BUG_ON(cond) do { if (cond) {} } while (0) @@ -104,16 +94,6 @@ int scnprintf_pad(char * buf, size_t size, const char * fmt, ...); #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) -/* - * This looks more complex than it should be. But we need to - * get the type for the ~ right in round_down (it needs to be - * as wide as the result!), and we want to evaluate the macro - * arguments just once each. - */ -#define __round_mask(x, y) ((__typeof__(x))((y)-1)) -#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) -#define round_down(x, y) ((x) & ~__round_mask(x, y)) - #define current_gfp_context(k) 0 #define synchronize_rcu() diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h deleted file mode 100644 index e56997288f2b..000000000000 --- a/tools/include/linux/lockdep.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LOCKDEP_H_ -#define _LIBLOCKDEP_LOCKDEP_H_ - -#include <sys/prctl.h> -#include <sys/syscall.h> -#include <string.h> -#include <limits.h> -#include <linux/utsname.h> -#include <linux/compiler.h> -#include <linux/export.h> -#include <linux/kern_levels.h> -#include <linux/err.h> -#include <linux/rcu.h> -#include <linux/list.h> -#include <linux/hardirq.h> -#include <unistd.h> - -#define MAX_LOCK_DEPTH 63UL - -#define asmlinkage -#define __visible - -#include "../../../include/linux/lockdep.h" - -struct task_struct { - u64 curr_chain_key; - int lockdep_depth; - unsigned int lockdep_recursion; - struct held_lock held_locks[MAX_LOCK_DEPTH]; - gfp_t lockdep_reclaim_gfp; - int pid; - int state; - char comm[17]; -}; - -#define TASK_RUNNING 0 - -extern struct task_struct *__curr(void); - -#define current (__curr()) - -static inline int debug_locks_off(void) -{ - return 1; -} - -#define task_pid_nr(tsk) ((tsk)->pid) - -#define KSYM_NAME_LEN 128 -#define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__) -#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define pr_warn pr_err -#define pr_cont pr_err - -#define list_del_rcu list_del - -#define atomic_t unsigned long -#define atomic_inc(x) ((*(x))++) - -#define print_tainted() "" -#define static_obj(x) 1 - -#define debug_show_all_locks() -extern void debug_check_no_locks_held(void); - -static __used bool __is_kernel_percpu_address(unsigned long addr, void *can_addr) -{ - return false; -} - -#endif diff --git a/tools/include/linux/math.h b/tools/include/linux/math.h new file mode 100644 index 000000000000..4e7af99ec9eb --- /dev/null +++ b/tools/include/linux/math.h @@ -0,0 +1,25 @@ +#ifndef _TOOLS_MATH_H +#define _TOOLS_MATH_H + +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +#ifndef roundup +#define roundup(x, y) ( \ +{ \ + const typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#endif + +#endif diff --git a/tools/include/linux/proc_fs.h b/tools/include/linux/proc_fs.h deleted file mode 100644 index 8b3b03b64fda..000000000000 --- a/tools/include/linux/proc_fs.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _TOOLS_INCLUDE_LINUX_PROC_FS_H -#define _TOOLS_INCLUDE_LINUX_PROC_FS_H - -#endif /* _TOOLS_INCLUDE_LINUX_PROC_FS_H */ diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index c934572d935c..622266b197d0 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -37,6 +37,4 @@ static inline bool arch_spin_is_locked(arch_spinlock_t *mutex) return true; } -#include <linux/lockdep.h> - #endif diff --git a/tools/include/linux/stacktrace.h b/tools/include/linux/stacktrace.h deleted file mode 100644 index ae343ac35bfa..000000000000 --- a/tools/include/linux/stacktrace.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_STACKTRACE_H_ -#define _LIBLOCKDEP_LINUX_STACKTRACE_H_ - -#include <execinfo.h> - -struct stack_trace { - unsigned int nr_entries, max_entries; - unsigned long *entries; - int skip; -}; - -static inline void print_stack_trace(struct stack_trace *trace, int spaces) -{ - backtrace_symbols_fd((void **)trace->entries, trace->nr_entries, 1); -} - -#define save_stack_trace(trace) \ - ((trace)->nr_entries = \ - backtrace((void **)(trace)->entries, (trace)->max_entries)) - -static inline int dump_stack(void) -{ - void *array[64]; - size_t size; - - size = backtrace(array, 64); - backtrace_symbols_fd(array, size, 1); - - return 0; -} - -#endif diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index b3610fdd1fee..eebd3894fe89 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -7,24 +7,23 @@ /* This struct should be in sync with struct rtnl_link_stats64 */ struct rtnl_link_stats { - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ + __u32 rx_packets; + __u32 tx_packets; + __u32 rx_bytes; + __u32 tx_bytes; + __u32 rx_errors; + __u32 tx_errors; + __u32 rx_dropped; + __u32 tx_dropped; + __u32 multicast; __u32 collisions; - /* detailed rx_errors: */ __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ + __u32 rx_over_errors; + __u32 rx_crc_errors; + __u32 rx_frame_errors; + __u32 rx_fifo_errors; + __u32 rx_missed_errors; /* detailed tx_errors */ __u32 tx_aborted_errors; @@ -37,29 +36,201 @@ struct rtnl_link_stats { __u32 rx_compressed; __u32 tx_compressed; - __u32 rx_nohandler; /* dropped, no handler found */ + __u32 rx_nohandler; }; -/* The main device statistics structure */ +/** + * struct rtnl_link_stats64 - The main device statistics structure. + * + * @rx_packets: Number of good packets received by the interface. + * For hardware interfaces counts all good packets received from the device + * by the host, including packets which host had to drop at various stages + * of processing (even in the driver). + * + * @tx_packets: Number of packets successfully transmitted. + * For hardware interfaces counts packets which host was able to successfully + * hand over to the device, which does not necessarily mean that packets + * had been successfully transmitted out of the device, only that device + * acknowledged it copied them out of host memory. + * + * @rx_bytes: Number of good received bytes, corresponding to @rx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @rx_errors: Total number of bad packets received on this network device. + * This counter must include events counted by @rx_length_errors, + * @rx_crc_errors, @rx_frame_errors and other errors not otherwise + * counted. + * + * @tx_errors: Total number of transmit problems. + * This counter must include events counter by @tx_aborted_errors, + * @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors, + * @tx_window_errors and other errors not otherwise counted. + * + * @rx_dropped: Number of packets received but not processed, + * e.g. due to lack of resources or unsupported protocol. + * For hardware interfaces this counter may include packets discarded + * due to L2 address filtering but should not include packets dropped + * by the device due to buffer exhaustion which are counted separately in + * @rx_missed_errors (since procfs folds those two counters together). + * + * @tx_dropped: Number of packets dropped on their way to transmission, + * e.g. due to lack of resources. + * + * @multicast: Multicast packets received. + * For hardware interfaces this statistic is commonly calculated + * at the device level (unlike @rx_packets) and therefore may include + * packets which did not reach the host. + * + * For IEEE 802.3 devices this counter may be equivalent to: + * + * - 30.3.1.1.21 aMulticastFramesReceivedOK + * + * @collisions: Number of collisions during packet transmissions. + * + * @rx_length_errors: Number of packets dropped due to invalid length. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to a sum + * of the following attributes: + * + * - 30.3.1.1.23 aInRangeLengthErrors + * - 30.3.1.1.24 aOutOfRangeLengthField + * - 30.3.1.1.25 aFrameTooLongErrors + * + * @rx_over_errors: Receiver FIFO overflow event counter. + * + * Historically the count of overflow events. Such events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * The recommended interpretation for high speed interfaces is - + * number of packets dropped because they did not fit into buffers + * provided by the host, e.g. packets larger than MTU or next buffer + * in the ring was not available for a scatter transfer. + * + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * This statistics was historically used interchangeably with + * @rx_fifo_errors. + * + * This statistic corresponds to hardware events and is not commonly used + * on software devices. + * + * @rx_crc_errors: Number of packets received with a CRC error. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.6 aFrameCheckSequenceErrors + * + * @rx_frame_errors: Receiver frame alignment errors. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to: + * + * - 30.3.1.1.7 aAlignmentErrors + * + * @rx_fifo_errors: Receiver FIFO error counter. + * + * Historically the count of overflow events. Those events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * This statistics was used interchangeably with @rx_over_errors. + * Not recommended for use in drivers for high speed interfaces. + * + * This statistic is used on software devices, e.g. to count software + * packet queue overflow (can) or sequencing errors (GRE). + * + * @rx_missed_errors: Count of packets missed by the host. + * Folded into the "drop" counter in `/proc/net/dev`. + * + * Counts number of packets dropped by the device due to lack + * of buffer space. This usually indicates that the host interface + * is slower than the network interface, or host is not keeping up + * with the receive packet rate. + * + * This statistic corresponds to hardware events and is not used + * on software devices. + * + * @tx_aborted_errors: + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * For IEEE 802.3 devices capable of half-duplex operation this counter + * must be equivalent to: + * + * - 30.3.1.1.11 aFramesAbortedDueToXSColls + * + * High speed interfaces may use this counter as a general device + * discard counter. + * + * @tx_carrier_errors: Number of frame transmission errors due to loss + * of carrier during transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.13 aCarrierSenseErrors + * + * @tx_fifo_errors: Number of frame transmission errors due to device + * FIFO underrun / underflow. This condition occurs when the device + * begins transmission of a frame but is unable to deliver the + * entire frame to the transmitter in time for transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for + * old half-duplex Ethernet. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices possibly equivalent to: + * + * - 30.3.2.1.4 aSQETestErrors + * + * @tx_window_errors: Number of frame transmission errors due + * to late collisions (for Ethernet - after the first 64B of transmission). + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.10 aLateCollisions + * + * @rx_compressed: Number of correctly received compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @tx_compressed: Number of transmitted compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @rx_nohandler: Number of packets received on the interface + * but dropped by the networking stack because the device is + * not designated to receive packets (e.g. backup link in a bond). + */ struct rtnl_link_stats64 { - __u64 rx_packets; /* total packets received */ - __u64 tx_packets; /* total packets transmitted */ - __u64 rx_bytes; /* total bytes received */ - __u64 tx_bytes; /* total bytes transmitted */ - __u64 rx_errors; /* bad packets received */ - __u64 tx_errors; /* packet transmit problems */ - __u64 rx_dropped; /* no space in linux buffers */ - __u64 tx_dropped; /* no space available in linux */ - __u64 multicast; /* multicast packets received */ + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; __u64 collisions; /* detailed rx_errors: */ __u64 rx_length_errors; - __u64 rx_over_errors; /* receiver ring buff overflow */ - __u64 rx_crc_errors; /* recved pkt with crc error */ - __u64 rx_frame_errors; /* recv'd frame alignment error */ - __u64 rx_fifo_errors; /* recv'r fifo overrun */ - __u64 rx_missed_errors; /* receiver missed packet */ + __u64 rx_over_errors; + __u64 rx_crc_errors; + __u64 rx_frame_errors; + __u64 rx_fifo_errors; + __u64 rx_missed_errors; /* detailed tx_errors */ __u64 tx_aborted_errors; @@ -71,8 +242,7 @@ struct rtnl_link_stats64 { /* for cslip etc */ __u64 rx_compressed; __u64 tx_compressed; - - __u64 rx_nohandler; /* dropped, no handler found */ + __u64 rx_nohandler; }; /* The struct should be in sync with struct ifmap */ @@ -170,12 +340,29 @@ enum { IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, + IFLA_PROTO_DOWN_REASON, + + /* device (sysfs) name as parent, used instead + * of IFLA_LINK where there's no parent netdev + */ + IFLA_PARENT_DEV_NAME, + IFLA_PARENT_DEV_BUS_NAME, + __IFLA_MAX }; #define IFLA_MAX (__IFLA_MAX - 1) +enum { + IFLA_PROTO_DOWN_REASON_UNSPEC, + IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */ + IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */ + + __IFLA_PROTO_DOWN_REASON_CNT, + IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1 +}; + /* backwards compatibility for userspace */ #ifndef __KERNEL__ #define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) @@ -293,6 +480,7 @@ enum { IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, + IFLA_BR_MCAST_QUERIER_STATE, __IFLA_BR_MAX, }; @@ -346,6 +534,8 @@ enum { IFLA_BRPORT_BACKUP_PORT, IFLA_BRPORT_MRP_RING_OPEN, IFLA_BRPORT_MRP_IN_OPEN, + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -433,6 +623,7 @@ enum macvlan_macaddr_mode { }; #define MACVLAN_FLAG_NOPROMISC 1 +#define MACVLAN_FLAG_NODST 2 /* skip dst macvlan if matching src macvlan */ /* VRF section */ enum { @@ -597,6 +788,18 @@ enum ifla_geneve_df { GENEVE_DF_MAX = __GENEVE_DF_END - 1, }; +/* Bareudp section */ +enum { + IFLA_BAREUDP_UNSPEC, + IFLA_BAREUDP_PORT, + IFLA_BAREUDP_ETHERTYPE, + IFLA_BAREUDP_SRCPORT_MIN, + IFLA_BAREUDP_MULTIPROTO_MODE, + __IFLA_BAREUDP_MAX +}; + +#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1) + /* PPP section */ enum { IFLA_PPP_UNSPEC, @@ -899,7 +1102,14 @@ enum { #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) -/* HSR section */ +/* HSR/PRP section, both uses same interface */ + +/* Different redundancy protocols for hsr device */ +enum { + HSR_PROTOCOL_HSR, + HSR_PROTOCOL_PRP, + HSR_PROTOCOL_MAX, +}; enum { IFLA_HSR_UNSPEC, @@ -909,6 +1119,9 @@ enum { IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ IFLA_HSR_SEQ_NR, IFLA_HSR_VERSION, /* HSR version */ + IFLA_HSR_PROTOCOL, /* Indicate different protocol than + * HSR. For example PRP. + */ __IFLA_HSR_MAX, }; @@ -1033,6 +1246,8 @@ enum { #define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5 (1U << 5) enum { IFLA_RMNET_UNSPEC, @@ -1048,4 +1263,14 @@ struct ifla_rmnet_flags { __u32 mask; }; +/* MCTP section */ + +enum { + IFLA_MCTP_UNSPEC, + IFLA_MCTP_NET, + __IFLA_MCTP_MAX, +}; + +#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index a067410ebea5..1daa45268de2 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -269,6 +269,7 @@ struct kvm_xen_exit { #define KVM_EXIT_AP_RESET_HOLD 32 #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 +#define KVM_EXIT_RISCV_SBI 35 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -397,13 +398,23 @@ struct kvm_run { * "ndata" is correct, that new fields are enumerated in "flags", * and that each flag enumerates fields that are 64-bit aligned * and sized (so that ndata+internal.data[] is valid/accurate). + * + * Space beyond the defined fields may be used to store arbitrary + * debug information relating to the emulation failure. It is + * accounted for in "ndata" but the format is unspecified and is + * not represented in "flags". Any such information is *not* ABI! */ struct { __u32 suberror; __u32 ndata; __u64 flags; - __u8 insn_size; - __u8 insn_bytes[15]; + union { + struct { + __u8 insn_size; + __u8 insn_bytes[15]; + }; + }; + /* Arbitrary debug data may follow. */ } emulation_failure; /* KVM_EXIT_OSI */ struct { @@ -469,6 +480,13 @@ struct kvm_run { } msr; /* KVM_EXIT_XEN */ struct kvm_xen_exit xen; + /* KVM_EXIT_RISCV_SBI */ + struct { + unsigned long extension_id; + unsigned long function_id; + unsigned long args[6]; + unsigned long ret[2]; + } riscv_sbi; /* Fix the size of the union. */ char padding[256]; }; @@ -1112,6 +1130,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_BINARY_STATS_FD 203 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 +#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #ifdef KVM_CAP_IRQ_ROUTING @@ -1223,11 +1242,16 @@ struct kvm_irqfd { /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ #define KVM_CLOCK_TSC_STABLE 2 +#define KVM_CLOCK_REALTIME (1 << 2) +#define KVM_CLOCK_HOST_TSC (1 << 3) struct kvm_clock_data { __u64 clock; __u32 flags; - __u32 pad[9]; + __u32 pad0; + __u64 realtime; + __u64 host_tsc; + __u32 pad[4]; }; /* For KVM_CAP_SW_TLB */ diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index d26e5472fe50..6f3df004479b 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -45,8 +45,8 @@ struct bpf_gen { int nr_fd_array; }; -void bpf_gen__init(struct bpf_gen *gen, int log_level); -int bpf_gen__finish(struct bpf_gen *gen); +void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps); +int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps); void bpf_gen__free(struct bpf_gen *gen); void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx); diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 502dea53a742..9934851ccde7 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -18,7 +18,7 @@ #define MAX_USED_MAPS 64 #define MAX_USED_PROGS 32 #define MAX_KFUNC_DESCS 256 -#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS) +#define MAX_FD_ARRAY_SZ (MAX_USED_MAPS + MAX_KFUNC_DESCS) /* The following structure describes the stack layout of the loader program. * In addition R6 contains the pointer to context. @@ -33,8 +33,8 @@ */ struct loader_stack { __u32 btf_fd; - __u32 prog_fd[MAX_USED_PROGS]; __u32 inner_map_fd; + __u32 prog_fd[MAX_USED_PROGS]; }; #define stack_off(field) \ @@ -42,6 +42,11 @@ struct loader_stack { #define attr_field(attr, field) (attr + offsetof(union bpf_attr, field)) +static int blob_fd_array_off(struct bpf_gen *gen, int index) +{ + return gen->fd_array + index * sizeof(int); +} + static int realloc_insn_buf(struct bpf_gen *gen, __u32 size) { size_t off = gen->insn_cur - gen->insn_start; @@ -102,11 +107,15 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in emit(gen, insn2); } -void bpf_gen__init(struct bpf_gen *gen, int log_level) +static int add_data(struct bpf_gen *gen, const void *data, __u32 size); +static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off); + +void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps) { - size_t stack_sz = sizeof(struct loader_stack); + size_t stack_sz = sizeof(struct loader_stack), nr_progs_sz; int i; + gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); gen->log_level = log_level; /* save ctx pointer into R6 */ emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1)); @@ -118,19 +127,27 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level) emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); + /* amount of stack actually used, only used to calculate iterations, not stack offset */ + nr_progs_sz = offsetof(struct loader_stack, prog_fd[nr_progs]); /* jump over cleanup code */ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, - /* size of cleanup code below */ - (stack_sz / 4) * 3 + 2)); + /* size of cleanup code below (including map fd cleanup) */ + (nr_progs_sz / 4) * 3 + 2 + + /* 6 insns for emit_sys_close_blob, + * 6 insns for debug_regs in emit_sys_close_blob + */ + nr_maps * (6 + (gen->log_level ? 6 : 0)))); /* remember the label where all error branches will jump to */ gen->cleanup_label = gen->insn_cur - gen->insn_start; /* emit cleanup code: close all temp FDs */ - for (i = 0; i < stack_sz; i += 4) { + for (i = 0; i < nr_progs_sz; i += 4) { emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i)); emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close)); } + for (i = 0; i < nr_maps; i++) + emit_sys_close_blob(gen, blob_fd_array_off(gen, i)); /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */ emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); emit(gen, BPF_EXIT_INSN()); @@ -160,8 +177,6 @@ static int add_data(struct bpf_gen *gen, const void *data, __u32 size) */ static int add_map_fd(struct bpf_gen *gen) { - if (!gen->fd_array) - gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); if (gen->nr_maps == MAX_USED_MAPS) { pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS); gen->error = -E2BIG; @@ -174,8 +189,6 @@ static int add_kfunc_btf_fd(struct bpf_gen *gen) { int cur; - if (!gen->fd_array) - gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); if (gen->nr_fd_array == MAX_KFUNC_DESCS) { cur = add_data(gen, NULL, sizeof(int)); return (cur - gen->fd_array) / sizeof(int); @@ -183,11 +196,6 @@ static int add_kfunc_btf_fd(struct bpf_gen *gen) return MAX_USED_MAPS + gen->nr_fd_array++; } -static int blob_fd_array_off(struct bpf_gen *gen, int index) -{ - return gen->fd_array + index * sizeof(int); -} - static int insn_bytes_to_bpf_size(__u32 sz) { switch (sz) { @@ -359,10 +367,15 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off) __emit_sys_close(gen); } -int bpf_gen__finish(struct bpf_gen *gen) +int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) { int i; + if (nr_progs != gen->nr_progs || nr_maps != gen->nr_maps) { + pr_warn("progs/maps mismatch\n"); + gen->error = -EFAULT; + return gen->error; + } emit_sys_close_stack(gen, stack_off(btf_fd)); for (i = 0; i < gen->nr_progs; i++) move_stack2ctx(gen, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a1bea1953df6..7c74342bb668 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7258,7 +7258,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) } if (obj->gen_loader) - bpf_gen__init(obj->gen_loader, attr->log_level); + bpf_gen__init(obj->gen_loader, attr->log_level, obj->nr_programs, obj->nr_maps); err = bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); @@ -7277,7 +7277,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) for (i = 0; i < obj->nr_maps; i++) obj->maps[i].fd = -1; if (!err) - err = bpf_gen__finish(obj->gen_loader); + err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); } /* clean up fd_array */ diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 81a4c543ff7e..4b384c907027 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -375,6 +375,7 @@ static int read_symbols(struct elf *elf) return -1; } memset(sym, 0, sizeof(*sym)); + INIT_LIST_HEAD(&sym->pv_target); sym->alias = sym; sym->idx = i; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index c90c7084e45a..bdf699f6552b 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -153,6 +153,10 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) !strcmp(func->name, "_paravirt_ident_64")) return; + /* already added this function */ + if (!list_empty(&func->pv_target)) + return; + list_add(&func->pv_target, &f->pv_ops[idx].targets); f->pv_ops[idx].clean = false; } diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 07e65a061fd3..3df74cf5651a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -271,8 +271,6 @@ endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) -FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) -FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_LDFLAGS-libaio = -lrt @@ -1010,6 +1008,9 @@ ifndef NO_AUXTRACE ifndef NO_AUXTRACE $(call detected,CONFIG_AUXTRACE) CFLAGS += -DHAVE_AUXTRACE_SUPPORT + ifeq ($(feature-reallocarray), 0) + CFLAGS += -DCOMPAT_NEED_REALLOCARRAY + endif endif endif diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 1ca7bc337932..e2c481fcede6 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -363,3 +363,4 @@ 446 n64 landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 n64 process_mrelease sys_process_mrelease +449 n64 futex_waitv sys_futex_waitv diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 7bef917cc84e..15109af9d075 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -528,3 +528,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index df5261e5cfe1..ed9c5c2eafad 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv sys_futex_waitv diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index bc5259db5fd9..b9d6306cc14e 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -820,7 +820,7 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.ordered_events = true; inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ - output_data_offset = 4096; + output_data_offset = roundup(8192 + session->header.data_offset, 4096); if (inject->strip) strip_init(inject); } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8167ebfe776a..8ae400429870 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -619,14 +619,17 @@ static int report__browse_hists(struct report *rep) int ret; struct perf_session *session = rep->session; struct evlist *evlist = session->evlist; - const char *help = perf_tip(system_path(TIPDIR)); + char *help = NULL, *path = NULL; - if (help == NULL) { + path = system_path(TIPDIR); + if (perf_tip(&help, path) || help == NULL) { /* fallback for people who don't install perf ;-) */ - help = perf_tip(DOCDIR); - if (help == NULL) - help = "Cannot load tips.txt file, please install perf!"; + free(path); + path = system_path(DOCDIR); + if (perf_tip(&help, path) || help == NULL) + help = strdup("Cannot load tips.txt file, please install perf!"); } + free(path); switch (use_browser) { case 1: @@ -651,7 +654,7 @@ static int report__browse_hists(struct report *rep) ret = evlist__tty_browse_hists(evlist, rep, help); break; } - + free(help); return ret; } diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index fbb68deba59f..d01532d40acb 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -88,7 +88,6 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes struct evsel *evsel; struct event_name tmp; struct evlist *evlist = evlist__new_default(); - char *unit = strdup("KRAVA"); TEST_ASSERT_VAL("failed to get evlist", evlist); @@ -99,7 +98,8 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes perf_evlist__id_add(&evlist->core, &evsel->core, 0, 0, 123); - evsel->unit = unit; + free((char *)evsel->unit); + evsel->unit = strdup("KRAVA"); TEST_ASSERT_VAL("failed to synthesize attr update unit", !perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit)); @@ -119,7 +119,6 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes TEST_ASSERT_VAL("failed to synthesize attr update cpus", !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus)); - free(unit); evlist__delete(evlist); return 0; } diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index c895de481fe1..d54c5371c6a6 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -169,7 +169,9 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u TEST_ASSERT_VAL("#num_dies", expr__parse(&num_dies, ctx, "#num_dies") == 0); TEST_ASSERT_VAL("#num_cores >= #num_dies", num_cores >= num_dies); TEST_ASSERT_VAL("#num_packages", expr__parse(&num_packages, ctx, "#num_packages") == 0); - TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages); + + if (num_dies) // Some platforms do not have CPU die support, for example s390 + TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages); /* * Source count returns the number of events aggregating in a leader diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 574b7e4efd3a..07b6f4ec024f 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -109,6 +109,7 @@ static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist, struct evsel *evsel; u64 count; + perf_stat__reset_shadow_stats(); evlist__for_each_entry(evlist, evsel) { count = find_value(evsel->name, vals); perf_stat__update_shadow_stats(evsel, count, 0, st); diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index b669d22f2b13..07f2411b0ad4 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -36,7 +36,7 @@ * These are based on the input value (213) specified * in branch_stack variable. */ -#define BS_EXPECTED_BE 0xa00d000000000000 +#define BS_EXPECTED_BE 0xa000d00000000000 #define BS_EXPECTED_LE 0xd5000000 #define FLAG(s) s->branch_stack->entries[i].flags diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c index 820d942b30c3..9d4c45184e71 100644 --- a/tools/perf/tests/wp.c +++ b/tools/perf/tests/wp.c @@ -21,6 +21,7 @@ do { \ volatile u64 data1; volatile u8 data2[3]; +#ifndef __s390x__ static int wp_read(int fd, long long *count, int size) { int ret = read(fd, count, size); @@ -48,7 +49,6 @@ static void get__perf_event_attr(struct perf_event_attr *attr, int wp_type, attr->exclude_hv = 1; } -#ifndef __s390x__ static int __event(int wp_type, void *wp_addr, unsigned long wp_len) { int fd; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index c1f24d004852..5075ecead5f3 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -535,6 +535,18 @@ struct perf_hpp_list perf_hpp_list = { #undef __HPP_SORT_ACC_FN #undef __HPP_SORT_RAW_FN +static void fmt_free(struct perf_hpp_fmt *fmt) +{ + /* + * At this point fmt should be completely + * unhooked, if not it's a bug. + */ + BUG_ON(!list_empty(&fmt->list)); + BUG_ON(!list_empty(&fmt->sort_list)); + + if (fmt->free) + fmt->free(fmt); +} void perf_hpp__init(void) { @@ -598,9 +610,10 @@ void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, list_add(&format->sort_list, &list->sorts); } -void perf_hpp__column_unregister(struct perf_hpp_fmt *format) +static void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { list_del_init(&format->list); + fmt_free(format); } void perf_hpp__cancel_cumulate(void) @@ -672,19 +685,6 @@ next: } -static void fmt_free(struct perf_hpp_fmt *fmt) -{ - /* - * At this point fmt should be completely - * unhooked, if not it's a bug. - */ - BUG_ON(!list_empty(&fmt->list)); - BUG_ON(!list_empty(&fmt->sort_list)); - - if (fmt->free) - fmt->free(fmt); -} - void perf_hpp__reset_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt, *tmp; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 4748bcfe61de..fccac06b573a 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -51,6 +51,7 @@ struct arm_spe { u8 timeless_decoding; u8 data_queued; + u64 sample_type; u8 sample_flc; u8 sample_llc; u8 sample_tlb; @@ -287,6 +288,12 @@ static void arm_spe_prep_sample(struct arm_spe *spe, event->sample.header.size = sizeof(struct perf_event_header); } +static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample); +} + static inline int arm_spe_deliver_synth_event(struct arm_spe *spe, struct arm_spe_queue *speq __maybe_unused, @@ -295,6 +302,12 @@ arm_spe_deliver_synth_event(struct arm_spe *spe, { int ret; + if (spe->synth_opts.inject) { + ret = arm_spe__inject_event(event, sample, spe->sample_type); + if (ret) + return ret; + } + ret = perf_session__deliver_synth_event(spe->session, event, sample); if (ret) pr_err("ARM SPE: failed to deliver event, error %d\n", ret); @@ -986,6 +999,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) else attr.sample_type |= PERF_SAMPLE_TIME; + spe->sample_type = attr.sample_type; + attr.exclude_user = evsel->core.attr.exclude_user; attr.exclude_kernel = evsel->core.attr.exclude_kernel; attr.exclude_hv = evsel->core.attr.exclude_hv; diff --git a/tools/perf/util/bpf_skel/bperf.h b/tools/perf/util/bpf_skel/bperf.h deleted file mode 100644 index 186a5551ddb9..000000000000 --- a/tools/perf/util/bpf_skel/bperf.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -// Copyright (c) 2021 Facebook - -#ifndef __BPERF_STAT_H -#define __BPERF_STAT_H - -typedef struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(struct bpf_perf_event_value)); - __uint(max_entries, 1); -} reading_map; - -#endif /* __BPERF_STAT_H */ diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index b8fa3cb2da23..f193998530d4 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -1,14 +1,23 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook -#include <linux/bpf.h> -#include <linux/perf_event.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "bperf.h" #include "bperf_u.h" -reading_map diff_readings SEC(".maps"); -reading_map accum_readings SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} diff_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} accum_readings SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c index 4f70d1459e86..e2a2d4cd7779 100644 --- a/tools/perf/util/bpf_skel/bperf_leader.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c @@ -1,10 +1,8 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook -#include <linux/bpf.h> -#include <linux/perf_event.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "bperf.h" struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); @@ -13,8 +11,19 @@ struct { __uint(map_flags, BPF_F_PRESERVE_ELEMS); } events SEC(".maps"); -reading_map prev_readings SEC(".maps"); -reading_map diff_readings SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} prev_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} diff_readings SEC(".maps"); SEC("raw_tp/sched_switch") int BPF_PROG(on_switch) diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c index ab12b4c4ece2..97037d3b3d9f 100644 --- a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c +++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2020 Facebook -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 95ffed66369c..c59331eea1d9 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -44,13 +44,16 @@ struct perf_event_attr; /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) +/* number of register is bound by the number of bits in regs_dump::mask (64) */ +#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64)) + struct regs_dump { u64 abi; u64 mask; u64 *regs; /* Cached values/mask filled by first register access. */ - u64 cache_regs[PERF_REGS_MAX]; + u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE]; u64 cache_mask; }; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a59fb2ecb84e..ac0127be0459 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -241,7 +241,7 @@ void evsel__init(struct evsel *evsel, { perf_evsel__init(&evsel->core, attr, idx); evsel->tracking = !idx; - evsel->unit = ""; + evsel->unit = strdup(""); evsel->scale = 1.0; evsel->max_events = ULONG_MAX; evsel->evlist = NULL; @@ -276,13 +276,8 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) } if (evsel__is_clock(evsel)) { - /* - * The evsel->unit points to static alias->unit - * so it's ok to use static string in here. - */ - static const char *unit = "msec"; - - evsel->unit = unit; + free((char *)evsel->unit); + evsel->unit = strdup("msec"); evsel->scale = 1e-6; } @@ -420,7 +415,11 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->max_events = orig->max_events; evsel->tool_event = orig->tool_event; - evsel->unit = orig->unit; + free((char *)evsel->unit); + evsel->unit = strdup(orig->unit); + if (evsel->unit == NULL) + goto out_err; + evsel->scale = orig->scale; evsel->snapshot = orig->snapshot; evsel->per_pkg = orig->per_pkg; @@ -1441,6 +1440,7 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); + zfree(&evsel->unit); zfree(&evsel->metric_id); evsel__zero_per_pkg(evsel); hashmap__free(evsel->per_pkg_mask); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index fda8d14c891f..e3c1a532d059 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2321,6 +2321,7 @@ out: #define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \ static int process_##__feat(struct feat_fd *ff, void *data __maybe_unused) \ {\ + free(ff->ph->env.__feat_env); \ ff->ph->env.__feat_env = do_read_string(ff); \ return ff->ph->env.__feat_env ? 0 : -ENOMEM; \ } @@ -4124,6 +4125,7 @@ int perf_event__process_feature(struct perf_session *session, struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event; int type = fe->header.type; u64 feat = fe->feat_id; + int ret = 0; if (type < 0 || type >= PERF_RECORD_HEADER_MAX) { pr_warning("invalid record type %d in pipe-mode\n", type); @@ -4141,11 +4143,13 @@ int perf_event__process_feature(struct perf_session *session, ff.size = event->header.size - sizeof(*fe); ff.ph = &session->header; - if (feat_ops[feat].process(&ff, NULL)) - return -1; + if (feat_ops[feat].process(&ff, NULL)) { + ret = -1; + goto out; + } if (!feat_ops[feat].print || !tool->show_feat_hdr) - return 0; + goto out; if (!feat_ops[feat].full_only || tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) { @@ -4154,8 +4158,9 @@ int perf_event__process_feature(struct perf_session *session, fprintf(stdout, "# %s info available, use -I to display\n", feat_ops[feat].name); } - - return 0; +out: + free_event_desc(ff.events); + return ret; } size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) @@ -4257,9 +4262,11 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, switch (ev->type) { case PERF_EVENT_UPDATE__UNIT: + free((char *)evsel->unit); evsel->unit = strdup(ev->data); break; case PERF_EVENT_UPDATE__NAME: + free(evsel->name); evsel->name = strdup(ev->data); break; case PERF_EVENT_UPDATE__SCALE: @@ -4268,11 +4275,11 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, break; case PERF_EVENT_UPDATE__CPUS: ev_cpus = (struct perf_record_event_update_cpus *)ev->data; - map = cpu_map__new_data(&ev_cpus->cpus); - if (map) + if (map) { + perf_cpu_map__put(evsel->core.own_cpus); evsel->core.own_cpus = map; - else + } else pr_err("failed to get event_update cpus\n"); default: break; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 65fe65ba03c2..b776465e04ef 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -289,15 +289,10 @@ static long hist_time(unsigned long htime) return htime; } -static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 weight, u64 ins_lat, u64 p_stage_cyc) +static void he_stat__add_period(struct he_stat *he_stat, u64 period) { - he_stat->period += period; - he_stat->weight += weight; he_stat->nr_events += 1; - he_stat->ins_lat += ins_lat; - he_stat->p_stage_cyc += p_stage_cyc; } static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) @@ -308,9 +303,6 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->period_guest_sys += src->period_guest_sys; dest->period_guest_us += src->period_guest_us; dest->nr_events += src->nr_events; - dest->weight += src->weight; - dest->ins_lat += src->ins_lat; - dest->p_stage_cyc += src->p_stage_cyc; } static void he_stat__decay(struct he_stat *he_stat) @@ -598,9 +590,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; u64 period = entry->stat.period; - u64 weight = entry->stat.weight; - u64 ins_lat = entry->stat.ins_lat; - u64 p_stage_cyc = entry->stat.p_stage_cyc; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -619,11 +608,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc); + he_stat__add_period(&he->stat, period); hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc); + he_stat__add_period(he->stat_acc, period); /* * This mem info was allocated from sample__resolve_mem @@ -733,9 +722,6 @@ __hists__add_entry(struct hists *hists, .stat = { .nr_events = 1, .period = sample->period, - .weight = sample->weight, - .ins_lat = sample->ins_lat, - .p_stage_cyc = sample->p_stage_cyc, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, @@ -748,6 +734,9 @@ __hists__add_entry(struct hists *hists, .raw_size = sample->raw_size, .ops = ops, .time = hist_time(sample->time), + .weight = sample->weight, + .ins_lat = sample->ins_lat, + .p_stage_cyc = sample->p_stage_cyc, }, *he = hists__findnew_entry(hists, &entry, al, sample_self); if (!hists->has_callchains && he && he->callchain_size != 0) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 5343b62476e6..621f35ae1efa 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -369,7 +369,6 @@ enum { }; void perf_hpp__init(void); -void perf_hpp__column_unregister(struct perf_hpp_fmt *format); void perf_hpp__cancel_cumulate(void); void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 5f83937bf8f3..0e013c2d9eb4 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1205,61 +1205,69 @@ out_no_progress: static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) { + enum intel_pt_sample_type type = decoder->state.type; bool ret = false; + decoder->state.type &= ~INTEL_PT_BRANCH; + if (decoder->set_fup_tx_flags) { decoder->set_fup_tx_flags = false; decoder->tx_flags = decoder->fup_tx_flags; - decoder->state.type = INTEL_PT_TRANSACTION; + decoder->state.type |= INTEL_PT_TRANSACTION; if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX) decoder->state.type |= INTEL_PT_BRANCH; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.flags = decoder->fup_tx_flags; - return true; + ret = true; } if (decoder->set_fup_ptw) { decoder->set_fup_ptw = false; - decoder->state.type = INTEL_PT_PTW; + decoder->state.type |= INTEL_PT_PTW; decoder->state.flags |= INTEL_PT_FUP_IP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.ptw_payload = decoder->fup_ptw_payload; - return true; + ret = true; } if (decoder->set_fup_mwait) { decoder->set_fup_mwait = false; - decoder->state.type = INTEL_PT_MWAIT_OP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; + decoder->state.type |= INTEL_PT_MWAIT_OP; decoder->state.mwait_payload = decoder->fup_mwait_payload; ret = true; } if (decoder->set_fup_pwre) { decoder->set_fup_pwre = false; decoder->state.type |= INTEL_PT_PWR_ENTRY; - decoder->state.type &= ~INTEL_PT_BRANCH; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.pwre_payload = decoder->fup_pwre_payload; ret = true; } if (decoder->set_fup_exstop) { decoder->set_fup_exstop = false; decoder->state.type |= INTEL_PT_EX_STOP; - decoder->state.type &= ~INTEL_PT_BRANCH; decoder->state.flags |= INTEL_PT_FUP_IP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; ret = true; } if (decoder->set_fup_bep) { decoder->set_fup_bep = false; decoder->state.type |= INTEL_PT_BLK_ITEMS; - decoder->state.type &= ~INTEL_PT_BRANCH; + ret = true; + } + if (decoder->overflow) { + decoder->overflow = false; + if (!ret && !decoder->pge) { + if (decoder->hop) { + decoder->state.type = 0; + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + } + decoder->pge = true; + decoder->state.type |= INTEL_PT_BRANCH | INTEL_PT_TRACE_BEGIN; + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + return true; + } + } + if (ret) { decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; - ret = true; + } else { + decoder->state.type = type; } return ret; } @@ -1608,7 +1616,16 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) intel_pt_clear_tx_flags(decoder); intel_pt_set_nr(decoder); decoder->timestamp_insn_cnt = 0; - decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + decoder->ip = 0; + decoder->pge = false; + decoder->set_fup_tx_flags = false; + decoder->set_fup_ptw = false; + decoder->set_fup_mwait = false; + decoder->set_fup_pwre = false; + decoder->set_fup_exstop = false; + decoder->set_fup_bep = false; decoder->overflow = true; return -EOVERFLOW; } @@ -2666,6 +2683,8 @@ static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder); /* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err) { + *err = 0; + /* Leap from PSB to PSB, getting ip from FUP within PSB+ */ if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) { *err = intel_pt_scan_for_psb(decoder); @@ -2678,6 +2697,7 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in return HOP_IGNORE; case INTEL_PT_TIP_PGD: + decoder->pge = false; if (!decoder->packet.count) { intel_pt_set_nr(decoder); return HOP_IGNORE; @@ -2705,18 +2725,21 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in if (!decoder->packet.count) return HOP_IGNORE; intel_pt_set_ip(decoder); - if (intel_pt_fup_event(decoder)) - return HOP_RETURN; - if (!decoder->branch_enable) + if (decoder->set_fup_mwait || decoder->set_fup_pwre) + *no_tip = true; + if (!decoder->branch_enable || !decoder->pge) *no_tip = true; if (*no_tip) { decoder->state.type = INTEL_PT_INSTRUCTION; decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; + intel_pt_fup_event(decoder); return HOP_RETURN; } + intel_pt_fup_event(decoder); + decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH; *err = intel_pt_walk_fup_tip(decoder); - if (!*err) + if (!*err && decoder->state.to_ip) decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; return HOP_RETURN; @@ -2897,7 +2920,7 @@ static bool intel_pt_psb_with_fup(struct intel_pt_decoder *decoder, int *err) { struct intel_pt_psb_info data = { .fup = false }; - if (!decoder->branch_enable || !decoder->pge) + if (!decoder->branch_enable) return false; intel_pt_pkt_lookahead(decoder, intel_pt_psb_lookahead_cb, &data); @@ -2924,6 +2947,7 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) if (err) return err; next: + err = 0; if (decoder->cyc_threshold) { if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC) decoder->sample_cyc = false; @@ -2962,6 +2986,7 @@ next: case INTEL_PT_TIP_PGE: { decoder->pge = true; + decoder->overflow = false; intel_pt_mtc_cyc_cnt_pge(decoder); intel_pt_set_nr(decoder); if (decoder->packet.count == 0) { @@ -2999,7 +3024,7 @@ next: break; } intel_pt_set_last_ip(decoder); - if (!decoder->branch_enable) { + if (!decoder->branch_enable || !decoder->pge) { decoder->ip = decoder->last_ip; if (intel_pt_fup_event(decoder)) return 0; @@ -3467,10 +3492,10 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) decoder->set_fup_pwre = false; decoder->set_fup_exstop = false; decoder->set_fup_bep = false; + decoder->overflow = false; if (!decoder->branch_enable) { decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->overflow = false; decoder->state.type = 0; /* Do not have a sample */ return 0; } @@ -3485,7 +3510,6 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; else decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->overflow = false; decoder->state.from_ip = 0; decoder->state.to_ip = decoder->ip; @@ -3607,7 +3631,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) } decoder->have_last_ip = true; - decoder->pkt_state = INTEL_PT_STATE_NO_IP; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; err = intel_pt_walk_psb(decoder); if (err) @@ -3704,7 +3728,8 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) if (err) { decoder->state.err = intel_pt_ext_err(err); - decoder->state.from_ip = decoder->ip; + if (err != -EOVERFLOW) + decoder->state.from_ip = decoder->ip; intel_pt_update_sample_time(decoder); decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; intel_pt_set_nr(decoder); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 556a893508da..10c3187e4c5a 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -2565,6 +2565,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) ptq->sync_switch = false; intel_pt_next_tid(pt, ptq); } + ptq->timestamp = state->est_timestamp; if (pt->synth_opts.errors) { err = intel_ptq_synth_error(ptq, state); if (err) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5bfb6f892489..ba74fdf74af9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -402,8 +402,10 @@ static int add_event_tool(struct list_head *list, int *idx, if (!evsel) return -ENOMEM; evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME) - evsel->unit = "ns"; + if (tool_event == PERF_TOOL_DURATION_TIME) { + free((char *)evsel->unit); + evsel->unit = strdup("ns"); + } return 0; } @@ -1630,7 +1632,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (parse_state->fake_pmu) return 0; - evsel->unit = info.unit; + free((char *)evsel->unit); + evsel->unit = strdup(info.unit); evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 5ee47ae1509c..06a7461ba864 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -25,6 +25,9 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) int i, idx = 0; u64 mask = regs->mask; + if ((u64)id >= PERF_SAMPLE_REGS_CACHE_SIZE) + return -EINVAL; + if (regs->cache_mask & (1ULL << id)) goto out; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 563a9ba8954f..7f782a31bda3 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -461,7 +461,7 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) struct tep_event *tp_format; tp_format = trace_event__tp_format_id(evsel->core.attr.config); - if (!tp_format) + if (IS_ERR_OR_NULL(tp_format)) return NULL; evsel->tp_format = tp_format; diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 20bacd5972ad..34f1b1b1176c 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -15,7 +15,7 @@ int smt_on(void) if (cached) return cached_result; - if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0) + if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) goto done; ncpu = sysconf(_SC_NPROCESSORS_CONF); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 568a88c001c6..a111065b484e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1325,88 +1325,68 @@ struct sort_entry sort_mispredict = { .se_width_idx = HISTC_MISPREDICT, }; -static u64 he_weight(struct hist_entry *he) -{ - return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0; -} - static int64_t -sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right) +sort__weight_cmp(struct hist_entry *left, struct hist_entry *right) { - return he_weight(left) - he_weight(right); + return left->weight - right->weight; } static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he)); + return repsep_snprintf(bf, size, "%-*llu", width, he->weight); } struct sort_entry sort_local_weight = { .se_header = "Local Weight", - .se_cmp = sort__local_weight_cmp, + .se_cmp = sort__weight_cmp, .se_snprintf = hist_entry__local_weight_snprintf, .se_width_idx = HISTC_LOCAL_WEIGHT, }; -static int64_t -sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return left->stat.weight - right->stat.weight; -} - static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight); + return repsep_snprintf(bf, size, "%-*llu", width, + he->weight * he->stat.nr_events); } struct sort_entry sort_global_weight = { .se_header = "Weight", - .se_cmp = sort__global_weight_cmp, + .se_cmp = sort__weight_cmp, .se_snprintf = hist_entry__global_weight_snprintf, .se_width_idx = HISTC_GLOBAL_WEIGHT, }; -static u64 he_ins_lat(struct hist_entry *he) -{ - return he->stat.nr_events ? he->stat.ins_lat / he->stat.nr_events : 0; -} - static int64_t -sort__local_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) +sort__ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) { - return he_ins_lat(left) - he_ins_lat(right); + return left->ins_lat - right->ins_lat; } static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he_ins_lat(he)); + return repsep_snprintf(bf, size, "%-*u", width, he->ins_lat); } struct sort_entry sort_local_ins_lat = { .se_header = "Local INSTR Latency", - .se_cmp = sort__local_ins_lat_cmp, + .se_cmp = sort__ins_lat_cmp, .se_snprintf = hist_entry__local_ins_lat_snprintf, .se_width_idx = HISTC_LOCAL_INS_LAT, }; -static int64_t -sort__global_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return left->stat.ins_lat - right->stat.ins_lat; -} - static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->stat.ins_lat); + return repsep_snprintf(bf, size, "%-*u", width, + he->ins_lat * he->stat.nr_events); } struct sort_entry sort_global_ins_lat = { .se_header = "INSTR Latency", - .se_cmp = sort__global_ins_lat_cmp, + .se_cmp = sort__ins_lat_cmp, .se_snprintf = hist_entry__global_ins_lat_snprintf, .se_width_idx = HISTC_GLOBAL_INS_LAT, }; @@ -1414,13 +1394,13 @@ struct sort_entry sort_global_ins_lat = { static int64_t sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { - return left->stat.p_stage_cyc - right->stat.p_stage_cyc; + return left->p_stage_cyc - right->p_stage_cyc; } static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc); + return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); } struct sort_entry sort_p_stage_cyc = { diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index b67c469aba79..7b7145501933 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -49,9 +49,6 @@ struct he_stat { u64 period_us; u64 period_guest_sys; u64 period_guest_us; - u64 weight; - u64 ins_lat; - u64 p_stage_cyc; u32 nr_events; }; @@ -109,6 +106,9 @@ struct hist_entry { s32 socket; s32 cpu; u64 code_page_size; + u64 weight; + u64 ins_lat; + u64 p_stage_cyc; u8 cpumode; u8 depth; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 37a9492edb3e..df3c4671be72 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -379,32 +379,32 @@ fetch_kernel_version(unsigned int *puint, char *str, return 0; } -const char *perf_tip(const char *dirpath) +int perf_tip(char **strp, const char *dirpath) { struct strlist *tips; struct str_node *node; - char *tip = NULL; struct strlist_config conf = { .dirname = dirpath, .file_only = true, }; + int ret = 0; + *strp = NULL; tips = strlist__new("tips.txt", &conf); if (tips == NULL) - return errno == ENOENT ? NULL : - "Tip: check path of tips.txt or get more memory! ;-p"; + return -errno; if (strlist__nr_entries(tips) == 0) goto out; node = strlist__entry(tips, random() % strlist__nr_entries(tips)); - if (asprintf(&tip, "Tip: %s", node->s) < 0) - tip = (char *)"Tip: get more memory! ;-)"; + if (asprintf(strp, "Tip: %s", node->s) < 0) + ret = -ENOMEM; out: strlist__delete(tips); - return tip; + return ret; } char *perf_exe(char *buf, int len) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ad737052e597..9f0d36ba77f2 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -39,7 +39,7 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_FMT "%d.%d.%d" #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) -const char *perf_tip(const char *dirpath); +int perf_tip(char **strp, const char *dirpath); #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index 331f6d30f472..cd7106876a5f 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -69,6 +69,7 @@ KERNEL_INCLUDE := $(OUTPUT)include ACPICA_INCLUDE := $(srctree)/../../../drivers/acpi/acpica CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE) CFLAGS += $(WARNINGS) +MKDIR = mkdir ifeq ($(strip $(V)),false) QUIET=@ diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules index 2a6c170b57cd..1d7616f5d0ae 100644 --- a/tools/power/acpi/Makefile.rules +++ b/tools/power/acpi/Makefile.rules @@ -21,6 +21,7 @@ $(KERNEL_INCLUDE): $(objdir)%.o: %.c $(KERNEL_INCLUDE) $(ECHO) " CC " $(subst $(OUTPUT),,$@) + $(QUIET) $(MKDIR) -p $(objdir) 2>/dev/null $(QUIET) $(CC) -c $(CFLAGS) -o $@ $< all: $(OUTPUT)$(TOOL) diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h index 565fccdfe6e9..016cff473cfc 100644 --- a/tools/testing/radix-tree/linux/lockdep.h +++ b/tools/testing/radix-tree/linux/lockdep.h @@ -1,5 +1,8 @@ #ifndef _LINUX_LOCKDEP_H #define _LINUX_LOCKDEP_H + +#include <linux/spinlock.h> + struct lock_class_key { unsigned int a; }; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 54b0a41a3775..62fafbeb4672 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -187,7 +187,7 @@ DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \ - BPFTOOL_OUTPUT=$(BUILD_DIR)/bpftool/ \ + BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ cp $(RUNQSLOWER_OUTPUT)runqslower $@ diff --git a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c new file mode 100644 index 000000000000..e1de5f80c3b2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "test_helper_restricted.skel.h" + +void test_helper_restricted(void) +{ + int prog_i = 0, prog_cnt; + int duration = 0; + + do { + struct test_helper_restricted *test; + int maybeOK; + + test = test_helper_restricted__open(); + if (!ASSERT_OK_PTR(test, "open")) + return; + + prog_cnt = test->skeleton->prog_cnt; + + for (int j = 0; j < prog_cnt; ++j) { + struct bpf_program *prog = *test->skeleton->progs[j].prog; + + maybeOK = bpf_program__set_autoload(prog, prog_i == j); + ASSERT_OK(maybeOK, "set autoload"); + } + + maybeOK = test_helper_restricted__load(test); + CHECK(!maybeOK, test->skeleton->progs[prog_i].name, "helper isn't restricted"); + + test_helper_restricted__destroy(test); + } while (++prog_i < prog_cnt); +} diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c new file mode 100644 index 000000000000..68d64c365f90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <time.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct timer { + struct bpf_timer t; +}; + +struct lock { + struct bpf_spin_lock l; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct timer); +} timers SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct lock); +} locks SEC(".maps"); + +static int timer_cb(void *map, int *key, struct timer *timer) +{ + return 0; +} + +static void timer_work(void) +{ + struct timer *timer; + const int key = 0; + + timer = bpf_map_lookup_elem(&timers, &key); + if (timer) { + bpf_timer_init(&timer->t, &timers, CLOCK_MONOTONIC); + bpf_timer_set_callback(&timer->t, timer_cb); + bpf_timer_start(&timer->t, 10E9, 0); + bpf_timer_cancel(&timer->t); + } +} + +static void spin_lock_work(void) +{ + const int key = 0; + struct lock *lock; + + lock = bpf_map_lookup_elem(&locks, &key); + if (lock) { + bpf_spin_lock(&lock->l); + bpf_spin_unlock(&lock->l); + } +} + +SEC("raw_tp/sys_enter") +int raw_tp_timer(void *ctx) +{ + timer_work(); + + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int tp_timer(void *ctx) +{ + timer_work(); + + return 0; +} + +SEC("kprobe/sys_nanosleep") +int kprobe_timer(void *ctx) +{ + timer_work(); + + return 0; +} + +SEC("perf_event") +int perf_event_timer(void *ctx) +{ + timer_work(); + + return 0; +} + +SEC("raw_tp/sys_enter") +int raw_tp_spin_lock(void *ctx) +{ + spin_lock_work(); + + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int tp_spin_lock(void *ctx) +{ + spin_lock_work(); + + return 0; +} + +SEC("kprobe/sys_nanosleep") +int kprobe_spin_lock(void *ctx) +{ + spin_lock_work(); + + return 0; +} + +SEC("perf_event") +int perf_event_spin_lock(void *ctx) +{ + spin_lock_work(); + + return 0; +} + +const char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 25afe423b3f0..465ef3f112c0 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -92,6 +92,7 @@ struct bpf_test { int fixup_map_event_output[MAX_FIXUPS]; int fixup_map_reuseport_array[MAX_FIXUPS]; int fixup_map_ringbuf[MAX_FIXUPS]; + int fixup_map_timer[MAX_FIXUPS]; /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT. * Can be a tab-separated sequence of expected strings. An empty string * means no log verification. @@ -604,8 +605,15 @@ static int create_cgroup_storage(bool percpu) * int cnt; * struct bpf_spin_lock l; * }; + * struct bpf_timer { + * __u64 :64; + * __u64 :64; + * } __attribute__((aligned(8))); + * struct timer { + * struct bpf_timer t; + * }; */ -static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l"; +static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"; static __u32 btf_raw_types[] = { /* int */ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ @@ -616,6 +624,11 @@ static __u32 btf_raw_types[] = { BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ + /* struct bpf_timer */ /* [4] */ + BTF_TYPE_ENC(25, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0), 16), + /* struct timer */ /* [5] */ + BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), + BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */ }; static int load_btf(void) @@ -696,6 +709,29 @@ static int create_sk_storage_map(void) return fd; } +static int create_map_timer(void) +{ + struct bpf_create_map_attr attr = { + .name = "test_map", + .map_type = BPF_MAP_TYPE_ARRAY, + .key_size = 4, + .value_size = 16, + .max_entries = 1, + .btf_key_type_id = 1, + .btf_value_type_id = 5, + }; + int fd, btf_fd; + + btf_fd = load_btf(); + if (btf_fd < 0) + return -1; + attr.btf_fd = btf_fd; + fd = bpf_create_map_xattr(&attr); + if (fd < 0) + printf("Failed to create map with timer\n"); + return fd; +} + static char bpf_vlog[UINT_MAX >> 8]; static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, @@ -722,6 +758,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_event_output = test->fixup_map_event_output; int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; int *fixup_map_ringbuf = test->fixup_map_ringbuf; + int *fixup_map_timer = test->fixup_map_timer; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -907,6 +944,13 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_ringbuf++; } while (*fixup_map_ringbuf); } + if (*fixup_map_timer) { + map_fds[21] = create_map_timer(); + do { + prog[*fixup_map_timer].imm = map_fds[21]; + fixup_map_timer++; + } while (*fixup_map_timer); + } } struct libcap { diff --git a/tools/testing/selftests/bpf/verifier/helper_restricted.c b/tools/testing/selftests/bpf/verifier/helper_restricted.c new file mode 100644 index 000000000000..a067b7098b97 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/helper_restricted.c @@ -0,0 +1,196 @@ +{ + "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unknown func bpf_ktime_get_coarse_ns", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_KPROBE, +}, +{ + "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unknown func bpf_ktime_get_coarse_ns", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unknown func bpf_ktime_get_coarse_ns", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, +}, +{ + "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unknown func bpf_ktime_get_coarse_ns", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT, +}, +{ + "bpf_timer_init isn restricted in BPF_PROG_TYPE_KPROBE", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_EMIT_CALL(BPF_FUNC_timer_init), + BPF_EXIT_INSN(), + }, + .fixup_map_timer = { 3, 8 }, + .errstr = "tracing progs cannot use bpf_timer yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_KPROBE, +}, +{ + "bpf_timer_init is forbidden in BPF_PROG_TYPE_PERF_EVENT", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_EMIT_CALL(BPF_FUNC_timer_init), + BPF_EXIT_INSN(), + }, + .fixup_map_timer = { 3, 8 }, + .errstr = "tracing progs cannot use bpf_timer yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, +}, +{ + "bpf_timer_init is forbidden in BPF_PROG_TYPE_TRACEPOINT", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_EMIT_CALL(BPF_FUNC_timer_init), + BPF_EXIT_INSN(), + }, + .fixup_map_timer = { 3, 8 }, + .errstr = "tracing progs cannot use bpf_timer yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "bpf_timer_init is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_EMIT_CALL(BPF_FUNC_timer_init), + BPF_EXIT_INSN(), + }, + .fixup_map_timer = { 3, 8 }, + .errstr = "tracing progs cannot use bpf_timer yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT, +}, +{ + "bpf_spin_lock is forbidden in BPF_PROG_TYPE_KPROBE", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_spin_lock), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .errstr = "tracing progs cannot use bpf_spin_lock yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_KPROBE, +}, +{ + "bpf_spin_lock is forbidden in BPF_PROG_TYPE_TRACEPOINT", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_spin_lock), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .errstr = "tracing progs cannot use bpf_spin_lock yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "bpf_spin_lock is forbidden in BPF_PROG_TYPE_PERF_EVENT", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_spin_lock), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .errstr = "tracing progs cannot use bpf_spin_lock yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, +}, +{ + "bpf_spin_lock is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_spin_lock), + BPF_EXIT_INSN(), + }, + .fixup_map_spin_lock = { 3 }, + .errstr = "tracing progs cannot use bpf_spin_lock yet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT, +}, diff --git a/tools/testing/selftests/bpf/verifier/map_in_map.c b/tools/testing/selftests/bpf/verifier/map_in_map.c index 2798927ee9ff..128a348b762d 100644 --- a/tools/testing/selftests/bpf/verifier/map_in_map.c +++ b/tools/testing/selftests/bpf/verifier/map_in_map.c @@ -19,6 +19,40 @@ .result = ACCEPT, }, { + "map in map state pruning", + .insns = { + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -4), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_in_map = { 4, 14 }, + .flags = BPF_F_TEST_STATE_FREQ, + .result = VERBOSE_ACCEPT, + .errstr = "processed 25 insns", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ "invalid inner map pointer", .insns = { BPF_ST_MEM(0, BPF_REG_10, -4, 0), diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c index bfb97383e6b5..b4ec228eb95d 100644 --- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c @@ -35,7 +35,7 @@ .prog_type = BPF_PROG_TYPE_XDP, }, { - "XDP pkt read, pkt_data' > pkt_end, good access", + "XDP pkt read, pkt_data' > pkt_end, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -88,6 +88,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "XDP pkt read, pkt_data' > pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ "XDP pkt read, pkt_end > pkt_data', good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), @@ -106,16 +141,16 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end > pkt_data', bad access 1", + "XDP pkt read, pkt_end > pkt_data', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -143,6 +178,42 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "XDP pkt read, pkt_end > pkt_data', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end > pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ "XDP pkt read, pkt_data' < pkt_end, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), @@ -161,16 +232,16 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' < pkt_end, bad access 1", + "XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -198,7 +269,43 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end < pkt_data', good access", + "XDP pkt read, pkt_data' < pkt_end, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' < pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -251,6 +358,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "XDP pkt read, pkt_end < pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ "XDP pkt read, pkt_data' >= pkt_end, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), @@ -268,15 +410,15 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' >= pkt_end, bad access 1", + "XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -304,7 +446,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end >= pkt_data', good access", + "XDP pkt read, pkt_data' >= pkt_end, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -359,7 +535,44 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' <= pkt_end, good access", + "XDP pkt read, pkt_end >= pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -414,6 +627,43 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ "XDP pkt read, pkt_end <= pkt_data', good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), @@ -431,15 +681,15 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end <= pkt_data', bad access 1", + "XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -467,7 +717,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' > pkt_data, good access", + "XDP pkt read, pkt_end <= pkt_data', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end <= pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -520,6 +804,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ "XDP pkt read, pkt_data > pkt_meta', good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -538,16 +857,16 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data > pkt_meta', bad access 1", + "XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -575,6 +894,42 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "XDP pkt read, pkt_data > pkt_meta', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data > pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ "XDP pkt read, pkt_meta' < pkt_data, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -593,16 +948,16 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' < pkt_data, bad access 1", + "XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -630,7 +985,43 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data < pkt_meta', good access", + "XDP pkt read, pkt_meta' < pkt_data, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -683,6 +1074,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "XDP pkt read, pkt_data < pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ "XDP pkt read, pkt_meta' >= pkt_data, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -700,15 +1126,15 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' >= pkt_data, bad access 1", + "XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -736,7 +1162,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data >= pkt_meta', good access", + "XDP pkt read, pkt_meta' >= pkt_data, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -791,7 +1251,44 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' <= pkt_data, good access", + "XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -846,6 +1343,43 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ "XDP pkt read, pkt_data <= pkt_meta', good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -863,15 +1397,15 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data <= pkt_meta', bad access 1", + "XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -898,3 +1432,37 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data <= pkt_meta', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/damon/.gitignore b/tools/testing/selftests/damon/.gitignore new file mode 100644 index 000000000000..c6c2965a6607 --- /dev/null +++ b/tools/testing/selftests/damon/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +huge_count_read_write diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 8a3f2cd9fec0..937d36ae9a69 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -1,7 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for damon selftests -TEST_FILES = _chk_dependency.sh -TEST_PROGS = debugfs_attrs.sh +TEST_GEN_FILES += huge_count_read_write + +TEST_FILES = _chk_dependency.sh _debugfs_common.sh +TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh +TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/_debugfs_common.sh b/tools/testing/selftests/damon/_debugfs_common.sh new file mode 100644 index 000000000000..48989d4813ae --- /dev/null +++ b/tools/testing/selftests/damon/_debugfs_common.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +test_write_result() { + file=$1 + content=$2 + orig_content=$3 + expect_reason=$4 + expected=$5 + + echo "$content" > "$file" + if [ $? -ne "$expected" ] + then + echo "writing $content to $file doesn't return $expected" + echo "expected because: $expect_reason" + echo "$orig_content" > "$file" + exit 1 + fi +} + +test_write_succ() { + test_write_result "$1" "$2" "$3" "$4" 0 +} + +test_write_fail() { + test_write_result "$1" "$2" "$3" "$4" 1 +} + +test_content() { + file=$1 + orig_content=$2 + expected=$3 + expect_reason=$4 + + content=$(cat "$file") + if [ "$content" != "$expected" ] + then + echo "reading $file expected $expected but $content" + echo "expected because: $expect_reason" + echo "$orig_content" > "$file" + exit 1 + fi +} + +source ./_chk_dependency.sh + +damon_onoff="$DBGFS/monitor_on" +if [ $(cat "$damon_onoff") = "on" ] +then + echo "monitoring is on" + exit $ksft_skip +fi diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index 196b6640bf37..902e312bca89 100644 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -1,48 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -test_write_result() { - file=$1 - content=$2 - orig_content=$3 - expect_reason=$4 - expected=$5 - - echo "$content" > "$file" - if [ $? -ne "$expected" ] - then - echo "writing $content to $file doesn't return $expected" - echo "expected because: $expect_reason" - echo "$orig_content" > "$file" - exit 1 - fi -} - -test_write_succ() { - test_write_result "$1" "$2" "$3" "$4" 0 -} - -test_write_fail() { - test_write_result "$1" "$2" "$3" "$4" 1 -} - -test_content() { - file=$1 - orig_content=$2 - expected=$3 - expect_reason=$4 - - content=$(cat "$file") - if [ "$content" != "$expected" ] - then - echo "reading $file expected $expected but $content" - echo "expected because: $expect_reason" - echo "$orig_content" > "$file" - exit 1 - fi -} - -source ./_chk_dependency.sh +source _debugfs_common.sh # Test attrs file # =============== @@ -56,33 +15,3 @@ test_write_fail "$file" "1 2 3 5 4" "$orig_content" \ "min_nr_regions > max_nr_regions" test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written" echo "$orig_content" > "$file" - -# Test schemes file -# ================= - -file="$DBGFS/schemes" -orig_content=$(cat "$file") - -test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \ - "$orig_content" "valid input" -test_write_fail "$file" "1 2 -3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines" -test_write_succ "$file" "" "$orig_content" "disabling" -echo "$orig_content" > "$file" - -# Test target_ids file -# ==================== - -file="$DBGFS/target_ids" -orig_content=$(cat "$file") - -test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input" -test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input" -test_content "$file" "$orig_content" "1 2" "non-integer was there" -test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input" -test_content "$file" "$orig_content" "" "wrong input written" -test_write_succ "$file" "" "$orig_content" "empty input" -test_content "$file" "$orig_content" "" "empty input written" -echo "$orig_content" > "$file" - -echo "PASS" diff --git a/tools/testing/selftests/damon/debugfs_empty_targets.sh b/tools/testing/selftests/damon/debugfs_empty_targets.sh new file mode 100644 index 000000000000..87aff8083822 --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_empty_targets.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test empty targets case +# ======================= + +orig_target_ids=$(cat "$DBGFS/target_ids") +echo "" > "$DBGFS/target_ids" +orig_monitor_on=$(cat "$DBGFS/monitor_on") +test_write_fail "$DBGFS/monitor_on" "on" "orig_monitor_on" "empty target ids" +echo "$orig_target_ids" > "$DBGFS/target_ids" diff --git a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh new file mode 100644 index 000000000000..922cadac2950 --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test huge count read write +# ========================== + +dmesg -C + +for file in "$DBGFS/"* +do + ./huge_count_read_write "$file" +done + +if dmesg | grep -q WARNING +then + dmesg + exit 1 +else + exit 0 +fi diff --git a/tools/testing/selftests/damon/debugfs_schemes.sh b/tools/testing/selftests/damon/debugfs_schemes.sh new file mode 100644 index 000000000000..5b39ab44731c --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_schemes.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test schemes file +# ================= + +file="$DBGFS/schemes" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \ + "$orig_content" "valid input" +test_write_fail "$file" "1 2 +3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines" +test_write_succ "$file" "" "$orig_content" "disabling" +test_write_fail "$file" "2 1 2 1 10 1 3 10 1 1 1 1 1 1 1 1 2 3" \ + "$orig_content" "wrong condition ranges" +echo "$orig_content" > "$file" diff --git a/tools/testing/selftests/damon/debugfs_target_ids.sh b/tools/testing/selftests/damon/debugfs_target_ids.sh new file mode 100644 index 000000000000..49aeabdb0aae --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_target_ids.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test target_ids file +# ==================== + +file="$DBGFS/target_ids" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input" +test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input" +test_content "$file" "$orig_content" "1 2" "non-integer was there" +test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input" +test_content "$file" "$orig_content" "" "wrong input written" +test_write_succ "$file" "" "$orig_content" "empty input" +test_content "$file" "$orig_content" "" "empty input written" +echo "$orig_content" > "$file" diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c new file mode 100644 index 000000000000..ad7a6b4cf338 --- /dev/null +++ b/tools/testing/selftests/damon/huge_count_read_write.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: SeongJae Park <sj@kernel.org> + */ + +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> + +void write_read_with_huge_count(char *file) +{ + int filedesc = open(file, O_RDWR); + char buf[25]; + int ret; + + printf("%s %s\n", __func__, file); + if (filedesc < 0) { + fprintf(stderr, "failed opening %s\n", file); + exit(1); + } + + write(filedesc, "", 0xfffffffful); + perror("after write: "); + ret = read(filedesc, buf, 0xfffffffful); + perror("after read: "); + close(filedesc); +} + +int main(int argc, char *argv[]) +{ + if (argc != 2) { + fprintf(stderr, "Usage: %s <file>\n", argv[0]); + exit(1); + } + write_read_with_huge_count(argv[1]); + + return 0; +} diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 39f2bbe8dd3d..d7b312b44a62 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -3,5 +3,6 @@ TEST_PROGS := gpio-mockup.sh TEST_FILES := gpio-mockup-sysfs.sh TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev +CFLAGS += -O2 -g -Wall -I../../../../usr/include/ include ../lib.mk diff --git a/tools/testing/selftests/gpio/gpio-mockup-cdev.c b/tools/testing/selftests/gpio/gpio-mockup-cdev.c index e83eac71621a..d1640f44f8ac 100644 --- a/tools/testing/selftests/gpio/gpio-mockup-cdev.c +++ b/tools/testing/selftests/gpio/gpio-mockup-cdev.c @@ -117,7 +117,7 @@ int main(int argc, char *argv[]) { char *chip; int opt, ret, cfd, lfd; - unsigned int offset, val, abiv; + unsigned int offset, val = 0, abiv; uint32_t flags_v1; uint64_t flags_v2; diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index d4a830139683..00814c0f87a6 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -23,12 +23,14 @@ /x86_64/platform_info_test /x86_64/set_boot_cpu_id /x86_64/set_sregs_test +/x86_64/sev_migrate_tests /x86_64/smm_test /x86_64/state_test /x86_64/svm_vmcall_test /x86_64/svm_int_ctl_test /x86_64/sync_regs_test /x86_64/tsc_msrs_test +/x86_64/userspace_io_test /x86_64/userspace_msr_exit_test /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c4e34717826a..f307c9f61981 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -59,6 +59,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 5d95113c7b7c..d8909032317a 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -47,7 +47,7 @@ #include "guest_modes.h" /* Global variable used to synchronize all of the vCPU threads. */ -static int iteration = -1; +static int iteration; /* Defines what vCPU threads should do during a given iteration. */ static enum { @@ -215,12 +215,11 @@ static bool spin_wait_for_next_iteration(int *current_iteration) return true; } -static void *vcpu_thread_main(void *arg) +static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args) { - struct perf_test_vcpu_args *vcpu_args = arg; struct kvm_vm *vm = perf_test_args.vm; int vcpu_id = vcpu_args->vcpu_id; - int current_iteration = -1; + int current_iteration = 0; while (spin_wait_for_next_iteration(¤t_iteration)) { switch (READ_ONCE(iteration_work)) { @@ -235,8 +234,6 @@ static void *vcpu_thread_main(void *arg) vcpu_last_completed_iteration[vcpu_id] = current_iteration; } - - return NULL; } static void spin_wait_for_vcpu(int vcpu_id, int target_iteration) @@ -277,8 +274,7 @@ static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description) static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access, const char *description) { - perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1; - sync_global_to_guest(vm, perf_test_args); + perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1); iteration_work = ITERATION_ACCESS_MEMORY; run_iteration(vm, vcpus, description); } @@ -296,48 +292,16 @@ static void mark_memory_idle(struct kvm_vm *vm, int vcpus) run_iteration(vm, vcpus, "Mark memory idle"); } -static pthread_t *create_vcpu_threads(int vcpus) -{ - pthread_t *vcpu_threads; - int i; - - vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0])); - TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads."); - - for (i = 0; i < vcpus; i++) { - vcpu_last_completed_iteration[i] = iteration; - pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main, - &perf_test_args.vcpu_args[i]); - } - - return vcpu_threads; -} - -static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus) -{ - int i; - - /* Set done to signal the vCPU threads to exit */ - done = true; - - for (i = 0; i < vcpus; i++) - pthread_join(vcpu_threads[i], NULL); -} - static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *params = arg; struct kvm_vm *vm; - pthread_t *vcpu_threads; int vcpus = params->vcpus; vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, 1, - params->backing_src); + params->backing_src, !overlap_memory_access); - perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes, - !overlap_memory_access); - - vcpu_threads = create_vcpu_threads(vcpus); + perf_test_start_vcpu_threads(vcpus, vcpu_thread_main); pr_info("\n"); access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory"); @@ -352,8 +316,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) mark_memory_idle(vm, vcpus); access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory"); - terminate_vcpu_threads(vcpu_threads, vcpus); - free(vcpu_threads); + /* Set done to signal the vCPU threads to exit */ + done = true; + + perf_test_join_vcpu_threads(vcpus); perf_test_destroy_vm(vm); } diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 1510b21e6306..6a719d065599 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -42,10 +42,9 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static size_t demand_paging_size; static char *guest_data_prototype; -static void *vcpu_worker(void *data) +static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) { int ret; - struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; @@ -68,8 +67,6 @@ static void *vcpu_worker(void *data) ts_diff = timespec_elapsed(start); PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, ts_diff.tv_sec, ts_diff.tv_nsec); - - return NULL; } static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) @@ -282,7 +279,6 @@ struct test_params { static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; - pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; struct timespec start; @@ -293,9 +289,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) int r; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, - p->src_type); - - perf_test_args.wr_fract = 1; + p->src_type, p->partition_vcpu_memory_access); demand_paging_size = get_backing_src_pagesz(p->src_type); @@ -304,12 +298,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) "Failed to allocate buffer for guest data pattern"); memset(guest_data_prototype, 0xAB, demand_paging_size); - vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); - TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - - perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, - p->partition_vcpu_memory_access); - if (p->uffd_mode) { uffd_handler_threads = malloc(nr_vcpus * sizeof(*uffd_handler_threads)); @@ -322,26 +310,15 @@ static void run_test(enum vm_guest_mode mode, void *arg) TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - vm_paddr_t vcpu_gpa; + struct perf_test_vcpu_args *vcpu_args; void *vcpu_hva; void *vcpu_alias; - uint64_t vcpu_mem_size; - - if (p->partition_vcpu_memory_access) { - vcpu_gpa = guest_test_phys_mem + - (vcpu_id * guest_percpu_mem_size); - vcpu_mem_size = guest_percpu_mem_size; - } else { - vcpu_gpa = guest_test_phys_mem; - vcpu_mem_size = guest_percpu_mem_size * nr_vcpus; - } - PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size); + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; /* Cache the host addresses of the region */ - vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); - vcpu_alias = addr_gpa2alias(vm, vcpu_gpa); + vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); + vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa); /* * Set up user fault fd to handle demand paging @@ -355,32 +332,18 @@ static void run_test(enum vm_guest_mode mode, void *arg) pipefds[vcpu_id * 2], p->uffd_mode, p->uffd_delay, &uffd_args[vcpu_id], vcpu_hva, vcpu_alias, - vcpu_mem_size); + vcpu_args->pages * perf_test_args.guest_page_size); } } - /* Export the shared variables to the guest */ - sync_global_to_guest(vm, perf_test_args); - pr_info("Finished creating vCPUs and starting uffd threads\n"); clock_gettime(CLOCK_MONOTONIC, &start); - - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &perf_test_args.vcpu_args[vcpu_id]); - } - + perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); pr_info("Started all vCPUs\n"); - /* Wait for the vcpu threads to quit */ - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - pthread_join(vcpu_threads[vcpu_id], NULL); - PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); - } - + perf_test_join_vcpu_threads(nr_vcpus); ts_diff = timespec_elapsed(start); - pr_info("All vCPU threads joined\n"); if (p->uffd_mode) { @@ -404,7 +367,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) perf_test_destroy_vm(vm); free(guest_data_prototype); - free(vcpu_threads); if (p->uffd_mode) { free(uffd_handler_threads); free(uffd_args); diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 7ffab5bd5ce5..1954b964d1cf 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -31,7 +31,7 @@ static bool host_quit; static int iteration; static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; -static void *vcpu_worker(void *data) +static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) { int ret; struct kvm_vm *vm = perf_test_args.vm; @@ -41,7 +41,6 @@ static void *vcpu_worker(void *data) struct timespec ts_diff; struct timespec total = (struct timespec){0}; struct timespec avg; - struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; run = vcpu_state(vm, vcpu_id); @@ -83,8 +82,6 @@ static void *vcpu_worker(void *data) pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); - - return NULL; } struct test_params { @@ -170,7 +167,6 @@ static void free_bitmaps(unsigned long *bitmaps[], int slots) static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; - pthread_t *vcpu_threads; struct kvm_vm *vm; unsigned long **bitmaps; uint64_t guest_num_pages; @@ -186,9 +182,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct timespec clear_dirty_log_total = (struct timespec){0}; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, - p->slots, p->backing_src); + p->slots, p->backing_src, + p->partition_vcpu_memory_access); - perf_test_args.wr_fract = p->wr_fract; + perf_test_set_wr_fract(vm, p->wr_fract); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -203,25 +200,15 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, &cap); } - vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); - TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - - perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, - p->partition_vcpu_memory_access); - - sync_global_to_guest(vm, perf_test_args); - /* Start the iterations */ iteration = 0; host_quit = false; clock_gettime(CLOCK_MONOTONIC, &start); - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) vcpu_last_completed_iteration[vcpu_id] = -1; - pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &perf_test_args.vcpu_args[vcpu_id]); - } + perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); /* Allow the vCPUs to populate memory */ pr_debug("Starting iteration %d - Populating\n", iteration); @@ -290,8 +277,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Tell the vcpu thread to quit */ host_quit = true; - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) - pthread_join(vcpu_threads[vcpu_id], NULL); + perf_test_join_vcpu_threads(nr_vcpus); avg = timespec_div(get_dirty_log_total, p->iterations); pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", @@ -306,7 +292,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) } free_bitmaps(bitmaps, p->slots); - free(vcpu_threads); perf_test_destroy_vm(vm); } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 792c60e1b17d..3fcd89e195c7 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -115,7 +115,7 @@ static void guest_code(void) addr = guest_test_virt_mem; addr += (READ_ONCE(random_array[i]) % guest_num_pages) * guest_page_size; - addr &= ~(host_page_size - 1); + addr = align_down(addr, host_page_size); *(uint64_t *)addr = READ_ONCE(iteration); } @@ -737,14 +737,14 @@ static void run_test(enum vm_guest_mode mode, void *arg) if (!p->phys_offset) { guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * guest_page_size; - guest_test_phys_mem &= ~(host_page_size - 1); + guest_test_phys_mem = align_down(guest_test_phys_mem, host_page_size); } else { guest_test_phys_mem = p->phys_offset; } #ifdef __s390x__ /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); + guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20); #endif pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 6a1a37f30494..da2b702da71a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -71,6 +71,15 @@ enum vm_guest_mode { #endif +#if defined(__x86_64__) +unsigned long vm_compute_max_gfn(struct kvm_vm *vm); +#else +static inline unsigned long vm_compute_max_gfn(struct kvm_vm *vm) +{ + return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; +} +#endif + #define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) #define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index df9f1a3a3ffb..a86f953d8d36 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -8,6 +8,8 @@ #ifndef SELFTEST_KVM_PERF_TEST_UTIL_H #define SELFTEST_KVM_PERF_TEST_UTIL_H +#include <pthread.h> + #include "kvm_util.h" /* Default guest test virtual memory offset */ @@ -18,6 +20,7 @@ #define PERF_TEST_MEM_SLOT_INDEX 1 struct perf_test_vcpu_args { + uint64_t gpa; uint64_t gva; uint64_t pages; @@ -27,7 +30,7 @@ struct perf_test_vcpu_args { struct perf_test_args { struct kvm_vm *vm; - uint64_t host_page_size; + uint64_t gpa; uint64_t guest_page_size; int wr_fract; @@ -36,19 +39,15 @@ struct perf_test_args { extern struct perf_test_args perf_test_args; -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -extern uint64_t guest_test_phys_mem; - struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, uint64_t vcpu_memory_bytes, int slots, - enum vm_mem_backing_src_type backing_src); + enum vm_mem_backing_src_type backing_src, + bool partition_vcpu_memory_access); void perf_test_destroy_vm(struct kvm_vm *vm); -void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, - uint64_t vcpu_memory_bytes, - bool partition_vcpu_memory_access); + +void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); + +void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); +void perf_test_join_vcpu_threads(int vcpus); #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index f8fddc84c0d3..99e0dcdc923f 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -104,6 +104,7 @@ size_t get_trans_hugepagesz(void); size_t get_def_hugetlb_pagesz(void); const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i); size_t get_backing_src_pagesz(uint32_t i); +bool is_backing_src_hugetlb(uint32_t i); void backing_src_help(const char *flag); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); long get_run_delay(void); @@ -117,4 +118,29 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t) return vm_mem_backing_src_alias(t)->flag & MAP_SHARED; } +/* Aligns x up to the next multiple of size. Size must be a power of 2. */ +static inline uint64_t align_up(uint64_t x, uint64_t size) +{ + uint64_t mask = size - 1; + + TEST_ASSERT(size != 0 && !(size & (size - 1)), + "size not a power of 2: %lu", size); + return ((x + mask) & ~mask); +} + +static inline uint64_t align_down(uint64_t x, uint64_t size) +{ + uint64_t x_aligned_up = align_up(x, size); + + if (x == x_aligned_up) + return x; + else + return x_aligned_up - size; +} + +static inline void *align_ptr_up(void *x, size_t size) +{ + return (void *)align_up((unsigned long)x, size); +} + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index f968dfd4ee88..aed9dc3ca1e9 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -12,6 +12,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/resource.h> #include "test_util.h" @@ -40,11 +41,40 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + /* + * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds + + * an arbitrary number for everything else. + */ + int nr_fds_wanted = kvm_max_vcpus + 100; + struct rlimit rl; pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + rl.rlim_max = nr_fds_wanted; + + int r = setrlimit(RLIMIT_NOFILE, &rl); + if (r < 0) { + printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n", + old_rlim_max, nr_fds_wanted); + exit(KSFT_SKIP); + } + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + + /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. * Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID * in this case. diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index 36407cb0ec85..ba1fdc3dcf4a 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -280,7 +280,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) #ifdef __s390x__ alignment = max(0x100000, alignment); #endif - guest_test_phys_mem &= ~(alignment - 1); + guest_test_phys_mem = align_down(guest_test_phys_mem, alignment); /* Set up the shared data structure test_args */ test_args.vm = vm; diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index eac44f5d0db0..13e8e3dcf984 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -157,8 +157,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) "memsize of 0,\n" " phdr index: %u p_memsz: 0x%" PRIx64, n1, (uint64_t) phdr.p_memsz); - vm_vaddr_t seg_vstart = phdr.p_vaddr; - seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1); + vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size); vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 14bb4d5b6bb7..daf6fdb217a7 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -22,15 +22,6 @@ static int vcpu_mmap_sz(void); -/* Aligns x up to the next multiple of size. Size must be a power of 2. */ -static void *align(void *x, size_t size) -{ - size_t mask = size - 1; - TEST_ASSERT(size != 0 && !(size & (size - 1)), - "size not a power of 2: %lu", size); - return (void *) (((size_t) x + mask) & ~mask); -} - int open_path_or_exit(const char *path, int flags) { int fd; @@ -191,15 +182,15 @@ const char *vm_guest_mode_string(uint32_t i) } const struct vm_guest_mode_params vm_guest_mode_params[] = { - { 52, 48, 0x1000, 12 }, - { 52, 48, 0x10000, 16 }, - { 48, 48, 0x1000, 12 }, - { 48, 48, 0x10000, 16 }, - { 40, 48, 0x1000, 12 }, - { 40, 48, 0x10000, 16 }, - { 0, 0, 0x1000, 12 }, - { 47, 64, 0x1000, 12 }, - { 44, 64, 0x1000, 12 }, + [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, + [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, + [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, + [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, + [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, + [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, + [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, + [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, + [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); @@ -311,7 +302,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) (1ULL << (vm->va_bits - 1)) >> vm->page_shift); /* Limit physical addresses to PA-bits. */ - vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; + vm->max_gfn = vm_compute_max_gfn(vm); /* Allocate and setup memory for guest. */ vm->vpages_mapped = sparsebit_alloc(); @@ -879,9 +870,17 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, alignment = 1; #endif + /* + * When using THP mmap is not guaranteed to returned a hugepage aligned + * address so we have to pad the mmap. Padding is not needed for HugeTLB + * because mmap will always return an address aligned to the HugeTLB + * page size. + */ if (src_type == VM_MEM_SRC_ANONYMOUS_THP) alignment = max(backing_src_pagesz, alignment); + ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); + /* Add enough memory to align up if necessary */ if (alignment > 1) region->mmap_size += alignment; @@ -914,8 +913,13 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, "test_malloc failed, mmap_start: %p errno: %i", region->mmap_start, errno); + TEST_ASSERT(!is_backing_src_hugetlb(src_type) || + region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), + "mmap_start %p is not aligned to HugeTLB page size 0x%lx", + region->mmap_start, backing_src_pagesz); + /* Align host address */ - region->host_mem = align(region->mmap_start, alignment); + region->host_mem = align_ptr_up(region->mmap_start, alignment); /* As needed perform madvise */ if ((src_type == VM_MEM_SRC_ANONYMOUS || @@ -958,7 +962,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, "mmap of alias failed, errno: %i", errno); /* Align host alias address */ - region->host_alias = align(region->mmap_alias, alignment); + region->host_alias = align_ptr_up(region->mmap_alias, alignment); } } diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 0ef80dbdc116..722df3a28791 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -10,21 +10,40 @@ struct perf_test_args perf_test_args; -uint64_t guest_test_phys_mem; - /* * Guest virtual memory offset of the testing memory slot. * Must not conflict with identity mapped test code. */ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +struct vcpu_thread { + /* The id of the vCPU. */ + int vcpu_id; + + /* The pthread backing the vCPU. */ + pthread_t thread; + + /* Set to true once the vCPU thread is up and running. */ + bool running; +}; + +/* The vCPU threads involved in this test. */ +static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS]; + +/* The function run by each vCPU thread, as provided by the test. */ +static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *); + +/* Set to true once all vCPU threads are up and running. */ +static bool all_vcpu_threads_running; + /* * Continuously write to the first 8 bytes of each page in the * specified region. */ static void guest_code(uint32_t vcpu_id) { - struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + struct perf_test_args *pta = &perf_test_args; + struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id]; uint64_t gva; uint64_t pages; int i; @@ -37,9 +56,9 @@ static void guest_code(uint32_t vcpu_id) while (true) { for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * perf_test_args.guest_page_size); + uint64_t addr = gva + (i * pta->guest_page_size); - if (i % perf_test_args.wr_fract == 0) + if (i % pta->wr_fract == 0) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -49,35 +68,81 @@ static void guest_code(uint32_t vcpu_id) } } +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, + uint64_t vcpu_memory_bytes, + bool partition_vcpu_memory_access) +{ + struct perf_test_args *pta = &perf_test_args; + struct perf_test_vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &pta->vcpu_args[vcpu_id]; + + vcpu_args->vcpu_id = vcpu_id; + if (partition_vcpu_memory_access) { + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + pta->guest_page_size; + vcpu_args->gpa = pta->gpa + (vcpu_id * vcpu_memory_bytes); + } else { + vcpu_args->gva = guest_test_virt_mem; + vcpu_args->pages = (vcpus * vcpu_memory_bytes) / + pta->guest_page_size; + vcpu_args->gpa = pta->gpa; + } + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_args->gpa, vcpu_args->gpa + + (vcpu_args->pages * pta->guest_page_size)); + } +} + struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, uint64_t vcpu_memory_bytes, int slots, - enum vm_mem_backing_src_type backing_src) + enum vm_mem_backing_src_type backing_src, + bool partition_vcpu_memory_access) { + struct perf_test_args *pta = &perf_test_args; struct kvm_vm *vm; uint64_t guest_num_pages; + uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); int i; pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - perf_test_args.host_page_size = getpagesize(); - perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; + /* By default vCPUs will write to memory. */ + pta->wr_fract = 1; + + /* + * Snapshot the non-huge page size. This is used by the guest code to + * access/dirty pages at the logging granularity. + */ + pta->guest_page_size = vm_guest_mode_params[mode].page_size; guest_num_pages = vm_adjust_num_guest_pages(mode, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); + (vcpus * vcpu_memory_bytes) / pta->guest_page_size); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0, "Guest memory size is not host page size aligned."); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % pta->guest_page_size == 0, "Guest memory size is not guest page size aligned."); TEST_ASSERT(guest_num_pages % slots == 0, "Guest memory cannot be evenly divided into %d slots.", slots); + /* + * Pass guest_num_pages to populate the page tables for test memory. + * The memory is also added to memslot 0, but that's a benign side + * effect as KVM allows aliasing HVAs in meslots. + */ vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, - 0, guest_code, NULL); + guest_num_pages, 0, guest_code, NULL); - perf_test_args.vm = vm; + pta->vm = vm; /* * If there should be more memory in the guest test region than there @@ -90,20 +155,18 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, guest_num_pages, vm_get_max_gfn(vm), vcpus, vcpu_memory_bytes); - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - perf_test_args.guest_page_size; - guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); + pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size; + pta->gpa = align_down(pta->gpa, backing_src_pagesz); #ifdef __s390x__ /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); + pta->gpa = align_down(pta->gpa, 1 << 20); #endif - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa); /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { uint64_t region_pages = guest_num_pages / slots; - vm_paddr_t region_start = guest_test_phys_mem + - region_pages * perf_test_args.guest_page_size * i; + vm_paddr_t region_start = pta->gpa + region_pages * pta->guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, PERF_TEST_MEM_SLOT_INDEX + i, @@ -111,10 +174,15 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, } /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages); + virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages); + + perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access); ucall_init(vm, NULL); + /* Export the shared variables to the guest. */ + sync_global_to_guest(vm, perf_test_args); + return vm; } @@ -124,36 +192,60 @@ void perf_test_destroy_vm(struct kvm_vm *vm) kvm_vm_free(vm); } -void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, - uint64_t vcpu_memory_bytes, - bool partition_vcpu_memory_access) +void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract) +{ + perf_test_args.wr_fract = wr_fract; + sync_global_to_guest(vm, perf_test_args); +} + +static void *vcpu_thread_main(void *data) +{ + struct vcpu_thread *vcpu = data; + + WRITE_ONCE(vcpu->running, true); + + /* + * Wait for all vCPU threads to be up and running before calling the test- + * provided vCPU thread function. This prevents thread creation (which + * requires taking the mmap_sem in write mode) from interfering with the + * guest faulting in its memory. + */ + while (!READ_ONCE(all_vcpu_threads_running)) + ; + + vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_id]); + + return NULL; +} + +void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)) { - vm_paddr_t vcpu_gpa; - struct perf_test_vcpu_args *vcpu_args; int vcpu_id; + vcpu_thread_fn = vcpu_fn; + WRITE_ONCE(all_vcpu_threads_running, false); + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + struct vcpu_thread *vcpu = &vcpu_threads[vcpu_id]; - vcpu_args->vcpu_id = vcpu_id; - if (partition_vcpu_memory_access) { - vcpu_args->gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args->pages = vcpu_memory_bytes / - perf_test_args.guest_page_size; - vcpu_gpa = guest_test_phys_mem + - (vcpu_id * vcpu_memory_bytes); - } else { - vcpu_args->gva = guest_test_virt_mem; - vcpu_args->pages = (vcpus * vcpu_memory_bytes) / - perf_test_args.guest_page_size; - vcpu_gpa = guest_test_phys_mem; - } + vcpu->vcpu_id = vcpu_id; + WRITE_ONCE(vcpu->running, false); - vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu); + } - pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + - (vcpu_args->pages * perf_test_args.guest_page_size)); + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + while (!READ_ONCE(vcpu_threads[vcpu_id].running)) + ; } + + WRITE_ONCE(all_vcpu_threads_running, true); +} + +void perf_test_join_vcpu_threads(int vcpus) +{ + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id].thread, NULL); } diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index b72429108993..6d23878bbfe1 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -283,6 +283,11 @@ size_t get_backing_src_pagesz(uint32_t i) } } +bool is_backing_src_hugetlb(uint32_t i) +{ + return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB); +} + static void print_available_backing_src_types(const char *prefix) { int i; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 82c39db91369..eef7b34756d5 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1431,3 +1431,71 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui return cpuid; } + +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 + +static inline unsigned x86_family(unsigned int eax) +{ + unsigned int x86; + + x86 = (eax >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (eax >> 20) & 0xff; + + return x86; +} + +unsigned long vm_compute_max_gfn(struct kvm_vm *vm) +{ + const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ + unsigned long ht_gfn, max_gfn, max_pfn; + uint32_t eax, ebx, ecx, edx, max_ext_leaf; + + max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; + + /* Avoid reserved HyperTransport region on AMD processors. */ + eax = ecx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx || + ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx || + edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) + return max_gfn; + + /* On parts with <40 physical address bits, the area is fully hidden */ + if (vm->pa_bits < 40) + return max_gfn; + + /* Before family 17h, the HyperTransport area is just below 1T. */ + ht_gfn = (1 << 28) - num_ht_pages; + eax = 1; + cpuid(&eax, &ebx, &ecx, &edx); + if (x86_family(eax) < 0x17) + goto done; + + /* + * Otherwise it's at the top of the physical address space, possibly + * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use + * the old conservative value if MAXPHYADDR is not enumerated. + */ + eax = 0x80000000; + cpuid(&eax, &ebx, &ecx, &edx); + max_ext_leaf = eax; + if (max_ext_leaf < 0x80000008) + goto done; + + eax = 0x80000008; + cpuid(&eax, &ebx, &ecx, &edx); + max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1; + if (max_ext_leaf >= 0x8000001f) { + eax = 0x8000001f; + cpuid(&eax, &ebx, &ecx, &edx); + max_pfn >>= (ebx >> 6) & 0x3f; + } + + ht_gfn = max_pfn - num_ht_pages; +done: + return min(max_gfn, ht_gfn - 1); +} diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 4cfcafea9f5a..1410d0a9141a 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -36,11 +36,9 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static bool run_vcpus = true; -static void *vcpu_worker(void *data) +static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) { int ret; - struct perf_test_vcpu_args *vcpu_args = - (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; @@ -59,8 +57,6 @@ static void *vcpu_worker(void *data) "Invalid guest sync status: exit_reason=%s\n", exit_reason_str(run->exit_reason)); } - - return NULL; } struct memslot_antagonist_args { @@ -80,7 +76,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, * Add the dummy memslot just below the perf_test_util memslot, which is * at the top of the guest physical address space. */ - gpa = guest_test_phys_mem - pages * vm_get_page_size(vm); + gpa = perf_test_args.gpa - pages * vm_get_page_size(vm); for (i = 0; i < nr_modifications; i++) { usleep(delay); @@ -100,29 +96,15 @@ struct test_params { static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; - pthread_t *vcpu_threads; struct kvm_vm *vm; - int vcpu_id; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, - VM_MEM_SRC_ANONYMOUS); - - perf_test_args.wr_fract = 1; - - vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); - TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - - perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, - p->partition_vcpu_memory_access); - - /* Export the shared variables to the guest */ - sync_global_to_guest(vm, perf_test_args); + VM_MEM_SRC_ANONYMOUS, + p->partition_vcpu_memory_access); pr_info("Finished creating vCPUs\n"); - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) - pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &perf_test_args.vcpu_args[vcpu_id]); + perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); pr_info("Started all vCPUs\n"); @@ -131,16 +113,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) run_vcpus = false; - /* Wait for the vcpu threads to quit */ - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) - pthread_join(vcpu_threads[vcpu_id], NULL); - + perf_test_join_vcpu_threads(nr_vcpus); pr_info("All vCPU threads joined\n"); - ucall_uninit(vm); - kvm_vm_free(vm); - - free(vcpu_threads); + perf_test_destroy_vm(vm); } static void help(char *name) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 91d88aaa9899..672915ce73d8 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -165,10 +165,10 @@ static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid, vcpu_set_cpuid(vm, VCPU_ID, cpuid); } -static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, - struct kvm_cpuid2 *best) +static void guest_test_msrs_access(void) { struct kvm_run *run; + struct kvm_vm *vm; struct ucall uc; int stage = 0, r; struct kvm_cpuid_entry2 feat = { @@ -180,11 +180,34 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, struct kvm_cpuid_entry2 dbg = { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }; - struct kvm_enable_cap cap = {0}; - - run = vcpu_state(vm, VCPU_ID); + struct kvm_cpuid2 *best; + vm_vaddr_t msr_gva; + struct kvm_enable_cap cap = { + .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, + .args = {1} + }; + struct msr_data *msr; while (true) { + vm = vm_create_default(VCPU_ID, 0, guest_msr); + + msr_gva = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); + msr = addr_gva2hva(vm, msr_gva); + + vcpu_args_set(vm, VCPU_ID, 1, msr_gva); + vcpu_enable_cap(vm, VCPU_ID, &cap); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + best = kvm_get_supported_hv_cpuid(); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); + + run = vcpu_state(vm, VCPU_ID); + switch (stage) { case 0: /* @@ -315,6 +338,7 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, * capability enabled and guest visible CPUID bit unset. */ cap.cap = KVM_CAP_HYPERV_SYNIC2; + cap.args[0] = 0; vcpu_enable_cap(vm, VCPU_ID, &cap); break; case 22: @@ -461,9 +485,9 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", - uc.args[1], stage); + TEST_ASSERT(uc.args[1] == 0, + "Unexpected stage: %ld (0 expected)\n", + uc.args[1]); break; case UCALL_ABORT: TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], @@ -474,13 +498,14 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, } stage++; + kvm_vm_free(vm); } } -static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall, - void *input, void *output, struct kvm_cpuid2 *best) +static void guest_test_hcalls_access(void) { struct kvm_run *run; + struct kvm_vm *vm; struct ucall uc; int stage = 0, r; struct kvm_cpuid_entry2 feat = { @@ -493,10 +518,38 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall struct kvm_cpuid_entry2 dbg = { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }; - - run = vcpu_state(vm, VCPU_ID); + struct kvm_enable_cap cap = { + .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, + .args = {1} + }; + vm_vaddr_t hcall_page, hcall_params; + struct hcall_data *hcall; + struct kvm_cpuid2 *best; while (true) { + vm = vm_create_default(VCPU_ID, 0, guest_hcall); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + + /* Hypercall input/output */ + hcall_page = vm_vaddr_alloc_pages(vm, 2); + hcall = addr_gva2hva(vm, hcall_page); + memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); + + hcall_params = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); + + vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params); + vcpu_enable_cap(vm, VCPU_ID, &cap); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + best = kvm_get_supported_hv_cpuid(); + + run = vcpu_state(vm, VCPU_ID); + switch (stage) { case 0: hcall->control = 0xdeadbeef; @@ -606,9 +659,9 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", - uc.args[1], stage); + TEST_ASSERT(uc.args[1] == 0, + "Unexpected stage: %ld (0 expected)\n", + uc.args[1]); break; case UCALL_ABORT: TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], @@ -619,66 +672,15 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall } stage++; + kvm_vm_free(vm); } } int main(void) { - struct kvm_cpuid2 *best; - struct kvm_vm *vm; - vm_vaddr_t msr_gva, hcall_page, hcall_params; - struct kvm_enable_cap cap = { - .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, - .args = {1} - }; - - /* Test MSRs */ - vm = vm_create_default(VCPU_ID, 0, guest_msr); - - msr_gva = vm_vaddr_alloc_page(vm); - memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); - vcpu_args_set(vm, VCPU_ID, 1, msr_gva); - vcpu_enable_cap(vm, VCPU_ID, &cap); - - vcpu_set_hv_cpuid(vm, VCPU_ID); - - best = kvm_get_supported_hv_cpuid(); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); - pr_info("Testing access to Hyper-V specific MSRs\n"); - guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva), - best); - kvm_vm_free(vm); - - /* Test hypercalls */ - vm = vm_create_default(VCPU_ID, 0, guest_hcall); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); - - /* Hypercall input/output */ - hcall_page = vm_vaddr_alloc_pages(vm, 2); - memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); - - hcall_params = vm_vaddr_alloc_page(vm); - memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); - - vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params); - vcpu_enable_cap(vm, VCPU_ID, &cap); - - vcpu_set_hv_cpuid(vm, VCPU_ID); - - best = kvm_get_supported_hv_cpuid(); + guest_test_msrs_access(); pr_info("Testing access to Hyper-V hypercalls\n"); - guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params), - addr_gva2hva(vm, hcall_page), - addr_gva2hva(vm, hcall_page) + getpagesize(), - best); - - kvm_vm_free(vm); + guest_test_hcalls_access(); } diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c index 5ba325cd64bf..29b18d565cf4 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c @@ -54,12 +54,15 @@ static struct kvm_vm *sev_vm_create(bool es) return vm; } -static struct kvm_vm *__vm_create(void) +static struct kvm_vm *aux_vm_create(bool with_vcpus) { struct kvm_vm *vm; int i; vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); + if (!with_vcpus) + return vm; + for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) vm_vcpu_add(vm, i); @@ -89,11 +92,11 @@ static void test_sev_migrate_from(bool es) { struct kvm_vm *src_vm; struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS]; - int i; + int i, ret; src_vm = sev_vm_create(es); for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i) - dst_vms[i] = __vm_create(); + dst_vms[i] = aux_vm_create(true); /* Initial migration from the src to the first dst. */ sev_migrate_from(dst_vms[0]->fd, src_vm->fd); @@ -102,7 +105,10 @@ static void test_sev_migrate_from(bool es) sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd); /* Migrate the guest back to the original VM. */ - sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd); + ret = __sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd); + TEST_ASSERT(ret == -1 && errno == EIO, + "VM that was migrated from should be dead. ret %d, errno: %d\n", ret, + errno); kvm_vm_free(src_vm); for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i) @@ -146,6 +152,8 @@ static void test_sev_migrate_locking(void) for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) pthread_join(pt[i], NULL); + for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) + kvm_vm_free(input[i].vm); } static void test_sev_migrate_parameters(void) @@ -157,12 +165,11 @@ static void test_sev_migrate_parameters(void) sev_vm = sev_vm_create(/* es= */ false); sev_es_vm = sev_vm_create(/* es= */ true); vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); - vm_no_sev = __vm_create(); + vm_no_sev = aux_vm_create(true); sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL); vm_vcpu_add(sev_es_vm_no_vmsa, 1); - ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd); TEST_ASSERT( ret == -1 && errno == EINVAL, @@ -191,13 +198,151 @@ static void test_sev_migrate_parameters(void) TEST_ASSERT(ret == -1 && errno == EINVAL, "Migrations require SEV enabled. ret %d, errno: %d\n", ret, errno); + + kvm_vm_free(sev_vm); + kvm_vm_free(sev_es_vm); + kvm_vm_free(sev_es_vm_no_vmsa); + kvm_vm_free(vm_no_vcpu); + kvm_vm_free(vm_no_sev); +} + +static int __sev_mirror_create(int dst_fd, int src_fd) +{ + struct kvm_enable_cap cap = { + .cap = KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, + .args = { src_fd } + }; + + return ioctl(dst_fd, KVM_ENABLE_CAP, &cap); +} + + +static void sev_mirror_create(int dst_fd, int src_fd) +{ + int ret; + + ret = __sev_mirror_create(dst_fd, src_fd); + TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno); +} + +static void test_sev_mirror(bool es) +{ + struct kvm_vm *src_vm, *dst_vm; + struct kvm_sev_launch_start start = { + .policy = es ? SEV_POLICY_ES : 0 + }; + int i; + + src_vm = sev_vm_create(es); + dst_vm = aux_vm_create(false); + + sev_mirror_create(dst_vm->fd, src_vm->fd); + + /* Check that we can complete creation of the mirror VM. */ + for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) + vm_vcpu_add(dst_vm, i); + sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_START, &start); + if (es) + sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); + + kvm_vm_free(src_vm); + kvm_vm_free(dst_vm); +} + +static void test_sev_mirror_parameters(void) +{ + struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu; + int ret; + + sev_vm = sev_vm_create(/* es= */ false); + sev_es_vm = sev_vm_create(/* es= */ true); + vm_with_vcpu = aux_vm_create(true); + vm_no_vcpu = aux_vm_create(false); + + ret = __sev_mirror_create(sev_vm->fd, sev_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able copy context to self. ret: %d, errno: %d\n", + ret, errno); + + ret = __sev_mirror_create(sev_vm->fd, sev_es_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n", + ret, errno); + + ret = __sev_mirror_create(sev_es_vm->fd, sev_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n", + ret, errno); + + ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "Copy context requires SEV enabled. ret %d, errno: %d\n", ret, + errno); + + ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n", + ret, errno); + + kvm_vm_free(sev_vm); + kvm_vm_free(sev_es_vm); + kvm_vm_free(vm_with_vcpu); + kvm_vm_free(vm_no_vcpu); +} + +static void test_sev_move_copy(void) +{ + struct kvm_vm *dst_vm, *sev_vm, *mirror_vm, *dst_mirror_vm; + int ret; + + sev_vm = sev_vm_create(/* es= */ false); + dst_vm = aux_vm_create(true); + mirror_vm = aux_vm_create(false); + dst_mirror_vm = aux_vm_create(false); + + sev_mirror_create(mirror_vm->fd, sev_vm->fd); + ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd); + TEST_ASSERT(ret == -1 && errno == EBUSY, + "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret, + errno); + + /* The mirror itself can be migrated. */ + sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd); + ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd); + TEST_ASSERT(ret == -1 && errno == EBUSY, + "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret, + errno); + + /* + * mirror_vm is not a mirror anymore, dst_mirror_vm is. Thus, + * the owner can be copied as soon as dst_mirror_vm is gone. + */ + kvm_vm_free(dst_mirror_vm); + sev_migrate_from(dst_vm->fd, sev_vm->fd); + + kvm_vm_free(mirror_vm); + kvm_vm_free(dst_vm); + kvm_vm_free(sev_vm); } int main(int argc, char *argv[]) { - test_sev_migrate_from(/* es= */ false); - test_sev_migrate_from(/* es= */ true); - test_sev_migrate_locking(); - test_sev_migrate_parameters(); + if (kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) { + test_sev_migrate_from(/* es= */ false); + test_sev_migrate_from(/* es= */ true); + test_sev_migrate_locking(); + test_sev_migrate_parameters(); + if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) + test_sev_move_copy(); + } + if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) { + test_sev_mirror(/* es= */ false); + test_sev_mirror(/* es= */ true); + test_sev_mirror_parameters(); + } return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c index df04f56ce859..30a81038df46 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c @@ -75,7 +75,7 @@ static void l1_guest_code(struct svm_test_data *svm) vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; /* No intercepts for real and virtual interrupts */ - vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR); + vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR)); /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */ vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT); diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c new file mode 100644 index 000000000000..e4bef2e05686 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 1 + +static void guest_ins_port80(uint8_t *buffer, unsigned int count) +{ + unsigned long end; + + if (count == 2) + end = (unsigned long)buffer + 1; + else + end = (unsigned long)buffer + 8192; + + asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory"); + GUEST_ASSERT_1(count == 0, count); + GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end); +} + +static void guest_code(void) +{ + uint8_t buffer[8192]; + int i; + + /* + * Special case tests. main() will adjust RCX 2 => 1 and 3 => 8192 to + * test that KVM doesn't explode when userspace modifies the "count" on + * a userspace I/O exit. KVM isn't required to play nice with the I/O + * itself as KVM doesn't support manipulating the count, it just needs + * to not explode or overflow a buffer. + */ + guest_ins_port80(buffer, 2); + guest_ins_port80(buffer, 3); + + /* Verify KVM fills the buffer correctly when not stuffing RCX. */ + memset(buffer, 0, sizeof(buffer)); + guest_ins_port80(buffer, 8192); + for (i = 0; i < 8192; i++) + GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_regs regs; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + int rc; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + run = vcpu_state(vm, VCPU_ID); + + memset(®s, 0, sizeof(regs)); + + while (1) { + rc = _vcpu_run(vm, VCPU_ID); + + TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + if (get_ucall(vm, VCPU_ID, &uc)) + break; + + TEST_ASSERT(run->io.port == 0x80, + "Expected I/O at port 0x80, got port 0x%x\n", run->io.port); + + /* + * Modify the rep string count in RCX: 2 => 1 and 3 => 8192. + * Note, this abuses KVM's batching of rep string I/O to avoid + * getting stuck in an infinite loop. That behavior isn't in + * scope from a testing perspective as it's not ABI in any way, + * i.e. it really is abusing internal KVM knowledge. + */ + vcpu_regs_get(vm, VCPU_ID, ®s); + if (regs.rcx == 2) + regs.rcx = 1; + if (regs.rcx == 3) + regs.rcx = 8192; + memset((void *)run + run->io.data_offset, 0xaa, 4096); + vcpu_regs_set(vm, VCPU_ID, ®s); + } + + switch (uc.cmd) { + case UCALL_DONE: + break; + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld : argN+1 = 0x%lx, argN+2 = 0x%lx", + (const char *)uc.args[0], __FILE__, uc.args[1], + uc.args[2], uc.args[3]); + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index eda0d2a51224..a0699f00b3d6 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -24,8 +24,12 @@ #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) +#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) +#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) + +#define EVTCHN_VECTOR 0x10 static struct kvm_vm *vm; @@ -56,15 +60,44 @@ struct vcpu_runstate_info { uint64_t time[4]; }; +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; + +struct vcpu_info { + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + unsigned long evtchn_pending_sel; + struct arch_vcpu_info arch; + struct pvclock_vcpu_time_info time; +}; /* 64 bytes (x86) */ + #define RUNSTATE_running 0 #define RUNSTATE_runnable 1 #define RUNSTATE_blocked 2 #define RUNSTATE_offline 3 +static void evtchn_handler(struct ex_regs *regs) +{ + struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; + vi->evtchn_upcall_pending = 0; + + GUEST_SYNC(0x20); +} + static void guest_code(void) { struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; + __asm__ __volatile__( + "sti\n" + "nop\n" + ); + + /* Trigger an interrupt injection */ + GUEST_SYNC(0); + /* Test having the host set runstates manually */ GUEST_SYNC(RUNSTATE_runnable); GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); @@ -153,7 +186,7 @@ int main(int argc, char *argv[]) struct kvm_xen_vcpu_attr vi = { .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, - .u.gpa = SHINFO_REGION_GPA + 0x40, + .u.gpa = VCPU_INFO_ADDR, }; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi); @@ -163,6 +196,16 @@ int main(int argc, char *argv[]) }; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock); + struct kvm_xen_hvm_attr vec = { + .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, + .u.vector = EVTCHN_VECTOR, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); + if (do_runstate_tests) { struct kvm_xen_vcpu_attr st = { .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, @@ -171,9 +214,14 @@ int main(int argc, char *argv[]) vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); } + struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); + vinfo->evtchn_upcall_pending = 0; + struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); rs->state = 0x5a; + bool evtchn_irq_expected = false; + for (;;) { volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); struct ucall uc; @@ -193,16 +241,21 @@ int main(int argc, char *argv[]) struct kvm_xen_vcpu_attr rst; long rundelay; - /* If no runstate support, bail out early */ - if (!do_runstate_tests) - goto done; - - TEST_ASSERT(rs->state_entry_time == rs->time[0] + - rs->time[1] + rs->time[2] + rs->time[3], - "runstate times don't add up"); + if (do_runstate_tests) + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); switch (uc.args[1]) { - case RUNSTATE_running...RUNSTATE_offline: + case 0: + evtchn_irq_expected = true; + vinfo->evtchn_upcall_pending = 1; + break; + + case RUNSTATE_runnable...RUNSTATE_offline: + TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); + if (!do_runstate_tests) + goto done; rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; rst.u.runstate.state = uc.args[1]; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); @@ -236,6 +289,10 @@ int main(int argc, char *argv[]) sched_yield(); } while (get_run_delay() < rundelay); break; + case 0x20: + TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); + evtchn_irq_expected = false; + break; } break; } diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 7615f29831eb..9897fa9ab953 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -34,6 +34,7 @@ TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh +TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 3313566ce906..a1da013d847b 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4002,8 +4002,8 @@ EOF ################################################################################ # main -TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter" -TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter" +TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter" +TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter" TESTS_OTHER="use_cases" PAUSE_ON_FAIL=no @@ -4077,3 +4077,11 @@ cleanup 2>/dev/null printf "\nTests passed: %3d\n" ${nsuccess} printf "Tests failed: %3d\n" ${nfail} + +if [ $nfail -ne 0 ]; then + exit 1 # KSFT_FAIL +elif [ $nsuccess -eq 0 ]; then + exit $ksft_skip +fi + +exit 0 # KSFT_PASS diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index b5a69ad191b0..d444ee6aa3cb 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -629,6 +629,66 @@ ipv6_fcnal() log_test $? 0 "Nexthops removed on admin down" } +ipv6_grp_refs() +{ + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP link set dev veth1 up" + run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10" + run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20" + run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10" + run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20" + run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10" + run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20" + run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10" + run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20" + run_cmd "$IP nexthop add id 102 group 100" + run_cmd "$IP route add 2001:db8:101::1/128 nhid 102" + + # create per-cpu dsts through nh 100 + run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1" + + # remove nh 100 from the group to delete the route potentially leaving + # a stale per-cpu dst which holds a reference to the nexthop's net + # device and to the IPv6 route + run_cmd "$IP nexthop replace id 102 group 101" + run_cmd "$IP route del 2001:db8:101::1/128" + + # add both nexthops to the group so a reference is taken on them + run_cmd "$IP nexthop replace id 102 group 100/101" + + # if the bug described in commit "net: nexthop: release IPv6 per-cpu + # dsts when replacing a nexthop group" exists at this point we have + # an unlinked IPv6 route (but not freed due to stale dst) with a + # reference over the group so we delete the group which will again + # only unlink it due to the route reference + run_cmd "$IP nexthop del id 102" + + # delete the nexthop with stale dst, since we have an unlinked + # group with a ref to it and an unlinked IPv6 route with ref to the + # group, the nh will only be unlinked and not freed so the stale dst + # remains forever and we get a net device refcount imbalance + run_cmd "$IP nexthop del id 100" + + # if a reference was lost this command will hang because the net device + # cannot be removed + timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1 + + # we can't cleanup if the command is hung trying to delete the netdev + if [ $? -eq 137 ]; then + return 1 + fi + + # cleanup + run_cmd "$IP link del veth1.20" + run_cmd "$IP nexthop flush" + + return 0 +} + ipv6_grp_fcnal() { local rc @@ -734,6 +794,9 @@ ipv6_grp_fcnal() run_cmd "$IP nexthop add id 108 group 31/24" log_test $? 2 "Nexthop group can not have a blackhole and another nexthop" + + ipv6_grp_refs + log_test $? 0 "Nexthop group replace refcounts" } ipv6_res_grp_fcnal() diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 5abe92d55b69..996af1ae3d3d 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -444,24 +444,63 @@ fib_rp_filter_test() setup set -e + ip netns add ns2 + ip netns set ns2 auto + + ip -netns ns2 link set dev lo up + + $IP link add name veth1 type veth peer name veth2 + $IP link set dev veth2 netns ns2 + $IP address add 192.0.2.1/24 dev veth1 + ip -netns ns2 address add 192.0.2.1/24 dev veth2 + $IP link set dev veth1 up + ip -netns ns2 link set dev veth2 up + $IP link set dev lo address 52:54:00:6a:c7:5e - $IP link set dummy0 address 52:54:00:6a:c7:5e - $IP link add dummy1 type dummy - $IP link set dummy1 address 52:54:00:6a:c7:5e - $IP link set dev dummy1 up + $IP link set dev veth1 address 52:54:00:6a:c7:5e + ip -netns ns2 link set dev lo address 52:54:00:6a:c7:5e + ip -netns ns2 link set dev veth2 address 52:54:00:6a:c7:5e + + # 1. (ns2) redirect lo's egress to veth2's egress + ip netns exec ns2 tc qdisc add dev lo parent root handle 1: fq_codel + ip netns exec ns2 tc filter add dev lo parent 1: protocol arp basic \ + action mirred egress redirect dev veth2 + ip netns exec ns2 tc filter add dev lo parent 1: protocol ip basic \ + action mirred egress redirect dev veth2 + + # 2. (ns1) redirect veth1's ingress to lo's ingress + $NS_EXEC tc qdisc add dev veth1 ingress + $NS_EXEC tc filter add dev veth1 ingress protocol arp basic \ + action mirred ingress redirect dev lo + $NS_EXEC tc filter add dev veth1 ingress protocol ip basic \ + action mirred ingress redirect dev lo + + # 3. (ns1) redirect lo's egress to veth1's egress + $NS_EXEC tc qdisc add dev lo parent root handle 1: fq_codel + $NS_EXEC tc filter add dev lo parent 1: protocol arp basic \ + action mirred egress redirect dev veth1 + $NS_EXEC tc filter add dev lo parent 1: protocol ip basic \ + action mirred egress redirect dev veth1 + + # 4. (ns2) redirect veth2's ingress to lo's ingress + ip netns exec ns2 tc qdisc add dev veth2 ingress + ip netns exec ns2 tc filter add dev veth2 ingress protocol arp basic \ + action mirred ingress redirect dev lo + ip netns exec ns2 tc filter add dev veth2 ingress protocol ip basic \ + action mirred ingress redirect dev lo + $NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1 - - $NS_EXEC tc qd add dev dummy1 parent root handle 1: fq_codel - $NS_EXEC tc filter add dev dummy1 parent 1: protocol arp basic action mirred egress redirect dev lo - $NS_EXEC tc filter add dev dummy1 parent 1: protocol ip basic action mirred egress redirect dev lo + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.accept_local=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1 set +e - run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 198.51.100.1" + run_cmd "ip netns exec ns2 ping -w1 -c1 192.0.2.1" log_test $? 0 "rp_filter passes local packets" - run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 127.0.0.1" + run_cmd "ip netns exec ns2 ping -w1 -c1 127.0.0.1" log_test $? 0 "rp_filter passes loopback packets" cleanup diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index a4bd1b087303..697994a9278b 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -6,6 +6,7 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m CONFIG_NET_ACT_VLAN=m diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index d9eca227136b..de19eb6c38f0 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -3,7 +3,7 @@ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ - gact_trap_test" + gact_trap_test mirred_egress_to_ingress_test" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -13,10 +13,12 @@ tcflags="skip_hw" h1_create() { simple_if_init $h1 192.0.2.1/24 + tc qdisc add dev $h1 clsact } h1_destroy() { + tc qdisc del dev $h1 clsact simple_if_fini $h1 192.0.2.1/24 } @@ -153,6 +155,49 @@ gact_trap_test() log_test "trap ($tcflags)" } +mirred_egress_to_ingress_test() +{ + RET=0 + + tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \ + ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action \ + ct commit nat src addr 192.0.2.2 pipe \ + ct clear pipe \ + ct commit nat dst addr 192.0.2.1 pipe \ + mirred ingress redirect dev $h1 + + tc filter add dev $swp1 protocol ip pref 11 handle 111 ingress flower \ + ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action drop + tc filter add dev $swp1 protocol ip pref 12 handle 112 ingress flower \ + ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 0 action pass + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t icmp "ping,id=42,seq=10" -q + + tc_check_packets "dev $h1 egress" 100 1 + check_err $? "didn't mirror first packet" + + tc_check_packets "dev $swp1 ingress" 111 1 + check_fail $? "didn't redirect first packet" + tc_check_packets "dev $swp1 ingress" 112 1 + check_err $? "didn't receive reply to first packet" + + ping 192.0.2.2 -I$h1 -c1 -w1 -q 1>/dev/null 2>&1 + + tc_check_packets "dev $h1 egress" 100 2 + check_err $? "didn't mirror second packet" + tc_check_packets "dev $swp1 ingress" 111 1 + check_fail $? "didn't redirect second packet" + tc_check_packets "dev $swp1 ingress" 112 2 + check_err $? "didn't receive reply to second packet" + + tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower + tc filter del dev $swp1 ingress protocol ip pref 11 handle 111 flower + tc filter del dev $swp1 ingress protocol ip pref 12 handle 112 flower + + log_test "mirred_egress_to_ingress ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh index fdeb44d621eb..3224651db97b 100755 --- a/tools/testing/selftests/net/gre_gso.sh +++ b/tools/testing/selftests/net/gre_gso.sh @@ -118,16 +118,18 @@ gre_gst_test_checks() local addr=$2 local proto=$3 - $NS_EXEC nc $proto -kl $port >/dev/null & + [ "$proto" == 6 ] && addr="[$addr]" + + $NS_EXEC socat - tcp${proto}-listen:$port,reuseaddr,fork >/dev/null & PID=$! while ! $NS_EXEC ss -ltn | grep -q $port; do ((i++)); sleep 0.01; done - cat $TMPFILE | timeout 1 nc $proto -N $addr $port + cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port log_test $? 0 "$name - copy file w/ TSO" ethtool -K veth0 tso off - cat $TMPFILE | timeout 1 nc $proto -N $addr $port + cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port log_test $? 0 "$name - copy file w/ GSO" ethtool -K veth0 tso on @@ -155,8 +157,8 @@ gre6_gso_test() sleep 2 - gre_gst_test_checks GREv6/v4 172.16.2.2 - gre_gst_test_checks GREv6/v6 2001:db8:1::2 -6 + gre_gst_test_checks GREv6/v4 172.16.2.2 4 + gre_gst_test_checks GREv6/v6 2001:db8:1::2 6 cleanup } @@ -212,8 +214,8 @@ if [ ! -x "$(command -v ip)" ]; then exit $ksft_skip fi -if [ ! -x "$(command -v nc)" ]; then - echo "SKIP: Could not run test without nc tool" +if [ ! -x "$(command -v socat)" ]; then + echo "SKIP: Could not run test without socat tool" exit $ksft_skip fi diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index e61fc4c32ba2..6e468e0f42f7 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -31,6 +31,8 @@ struct tls_crypto_info_keys { struct tls12_crypto_info_chacha20_poly1305 chacha20; struct tls12_crypto_info_sm4_gcm sm4gcm; struct tls12_crypto_info_sm4_ccm sm4ccm; + struct tls12_crypto_info_aes_ccm_128 aesccm128; + struct tls12_crypto_info_aes_gcm_256 aesgcm256; }; size_t len; }; @@ -61,6 +63,16 @@ static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, tls12->sm4ccm.info.version = tls_version; tls12->sm4ccm.info.cipher_type = cipher_type; break; + case TLS_CIPHER_AES_CCM_128: + tls12->len = sizeof(struct tls12_crypto_info_aes_ccm_128); + tls12->aesccm128.info.version = tls_version; + tls12->aesccm128.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_AES_GCM_256: + tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_256); + tls12->aesgcm256.info.version = tls_version; + tls12->aesgcm256.info.cipher_type = cipher_type; + break; default: break; } @@ -78,26 +90,21 @@ static void memrnd(void *s, size_t n) *byte++ = rand(); } -FIXTURE(tls_basic) -{ - int fd, cfd; - bool notls; -}; - -FIXTURE_SETUP(tls_basic) +static void ulp_sock_pair(struct __test_metadata *_metadata, + int *fd, int *cfd, bool *notls) { struct sockaddr_in addr; socklen_t len; int sfd, ret; - self->notls = false; + *notls = false; len = sizeof(addr); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); addr.sin_port = 0; - self->fd = socket(AF_INET, SOCK_STREAM, 0); + *fd = socket(AF_INET, SOCK_STREAM, 0); sfd = socket(AF_INET, SOCK_STREAM, 0); ret = bind(sfd, &addr, sizeof(addr)); @@ -108,26 +115,96 @@ FIXTURE_SETUP(tls_basic) ret = getsockname(sfd, &addr, &len); ASSERT_EQ(ret, 0); - ret = connect(self->fd, &addr, sizeof(addr)); + ret = connect(*fd, &addr, sizeof(addr)); ASSERT_EQ(ret, 0); - self->cfd = accept(sfd, &addr, &len); - ASSERT_GE(self->cfd, 0); + *cfd = accept(sfd, &addr, &len); + ASSERT_GE(*cfd, 0); close(sfd); - ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + ret = setsockopt(*fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); if (ret != 0) { ASSERT_EQ(errno, ENOENT); - self->notls = true; + *notls = true; printf("Failure setting TCP_ULP, testing without tls\n"); return; } - ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + ret = setsockopt(*cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); ASSERT_EQ(ret, 0); } +/* Produce a basic cmsg */ +static int tls_send_cmsg(int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + char cbuf[CMSG_SPACE(sizeof(char))]; + int cmsg_len = sizeof(char); + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec vec; + + vec.iov_base = data; + vec.iov_len = len; + memset(&msg, 0, sizeof(struct msghdr)); + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_TLS; + /* test sending non-record types. */ + cmsg->cmsg_type = TLS_SET_RECORD_TYPE; + cmsg->cmsg_len = CMSG_LEN(cmsg_len); + *CMSG_DATA(cmsg) = record_type; + msg.msg_controllen = cmsg->cmsg_len; + + return sendmsg(fd, &msg, flags); +} + +static int tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + char cbuf[CMSG_SPACE(sizeof(char))]; + struct cmsghdr *cmsg; + unsigned char ctype; + struct msghdr msg; + struct iovec vec; + int n; + + vec.iov_base = data; + vec.iov_len = len; + memset(&msg, 0, sizeof(struct msghdr)); + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + + n = recvmsg(fd, &msg, flags); + + cmsg = CMSG_FIRSTHDR(&msg); + EXPECT_NE(cmsg, NULL); + EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); + EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); + ctype = *((unsigned char *)CMSG_DATA(cmsg)); + EXPECT_EQ(ctype, record_type); + + return n; +} + +FIXTURE(tls_basic) +{ + int fd, cfd; + bool notls; +}; + +FIXTURE_SETUP(tls_basic) +{ + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); +} + FIXTURE_TEARDOWN(tls_basic) { close(self->fd); @@ -196,63 +273,48 @@ FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm) .cipher_type = TLS_CIPHER_SM4_CCM, }; +FIXTURE_VARIANT_ADD(tls, 12_aes_ccm) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_CCM_128, +}; + +FIXTURE_VARIANT_ADD(tls, 13_aes_ccm) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_CCM_128, +}; + +FIXTURE_VARIANT_ADD(tls, 12_aes_gcm_256) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_256, +}; + +FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_256, +}; + FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; - struct sockaddr_in addr; - socklen_t len; - int sfd, ret; - - self->notls = false; - len = sizeof(addr); + int ret; tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_ANY); - addr.sin_port = 0; - - self->fd = socket(AF_INET, SOCK_STREAM, 0); - sfd = socket(AF_INET, SOCK_STREAM, 0); + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); - ret = bind(sfd, &addr, sizeof(addr)); - ASSERT_EQ(ret, 0); - ret = listen(sfd, 10); - ASSERT_EQ(ret, 0); + if (self->notls) + return; - ret = getsockname(sfd, &addr, &len); + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len); ASSERT_EQ(ret, 0); - ret = connect(self->fd, &addr, sizeof(addr)); + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); ASSERT_EQ(ret, 0); - - ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); - if (ret != 0) { - self->notls = true; - printf("Failure setting TCP_ULP, testing without tls\n"); - } - - if (!self->notls) { - ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, - tls12.len); - ASSERT_EQ(ret, 0); - } - - self->cfd = accept(sfd, &addr, &len); - ASSERT_GE(self->cfd, 0); - - if (!self->notls) { - ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", - sizeof("tls")); - ASSERT_EQ(ret, 0); - - ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, - tls12.len); - ASSERT_EQ(ret, 0); - } - - close(sfd); } FIXTURE_TEARDOWN(tls) @@ -613,6 +675,95 @@ TEST_F(tls, splice_to_pipe) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +TEST_F(tls, splice_cmsg_to_pipe) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + char buf[10]; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1); + EXPECT_EQ(errno, EINVAL); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(errno, EIO); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); + EXPECT_EQ(memcmp(test_str, buf, send_len), 0); +} + +TEST_F(tls, splice_dec_cmsg_to_pipe) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + char buf[10]; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(errno, EIO); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1); + EXPECT_EQ(errno, EINVAL); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); + EXPECT_EQ(memcmp(test_str, buf, send_len), 0); +} + +TEST_F(tls, recv_and_splice) +{ + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int half = send_len / 2; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + /* Recv hald of the record, splice the other half */ + EXPECT_EQ(recv(self->cfd, mem_recv, half, MSG_WAITALL), half); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, half, SPLICE_F_NONBLOCK), + half); + EXPECT_EQ(read(p[0], &mem_recv[half], half), half); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + +TEST_F(tls, peek_and_splice) +{ + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int chunk = TLS_PAYLOAD_MAX_LEN / 4; + int n, i, p[2]; + + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + for (i = 0; i < 4; i++) + EXPECT_EQ(send(self->fd, &mem_send[chunk * i], chunk, 0), + chunk); + + EXPECT_EQ(recv(self->cfd, mem_recv, chunk * 5 / 2, + MSG_WAITALL | MSG_PEEK), + chunk * 5 / 2); + EXPECT_EQ(memcmp(mem_send, mem_recv, chunk * 5 / 2), 0); + + n = 0; + while (n < send_len) { + i = splice(self->cfd, NULL, p[1], NULL, send_len - n, 0); + EXPECT_GT(i, 0); + n += i; + } + EXPECT_EQ(n, send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + TEST_F(tls, recvmsg_single) { char const *test_str = "test_recvmsg_single"; @@ -1193,60 +1344,30 @@ TEST_F(tls, mutliproc_sendpage_writers) TEST_F(tls, control_msg) { - if (self->notls) - return; - - char cbuf[CMSG_SPACE(sizeof(char))]; - char const *test_str = "test_read"; - int cmsg_len = sizeof(char); + char *test_str = "test_read"; char record_type = 100; - struct cmsghdr *cmsg; - struct msghdr msg; int send_len = 10; - struct iovec vec; char buf[10]; - vec.iov_base = (char *)test_str; - vec.iov_len = 10; - memset(&msg, 0, sizeof(struct msghdr)); - msg.msg_iov = &vec; - msg.msg_iovlen = 1; - msg.msg_control = cbuf; - msg.msg_controllen = sizeof(cbuf); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_TLS; - /* test sending non-record types. */ - cmsg->cmsg_type = TLS_SET_RECORD_TYPE; - cmsg->cmsg_len = CMSG_LEN(cmsg_len); - *CMSG_DATA(cmsg) = record_type; - msg.msg_controllen = cmsg->cmsg_len; + if (self->notls) + SKIP(return, "no TLS support"); - EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, 0), + send_len); /* Should fail because we didn't provide a control message */ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); - vec.iov_base = buf; - EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL | MSG_PEEK), send_len); - - cmsg = CMSG_FIRSTHDR(&msg); - EXPECT_NE(cmsg, NULL); - EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); - EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - record_type = *((unsigned char *)CMSG_DATA(cmsg)); - EXPECT_EQ(record_type, 100); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL | MSG_PEEK), + send_len); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); /* Recv the message again without MSG_PEEK */ - record_type = 0; memset(buf, 0, sizeof(buf)); - EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len); - cmsg = CMSG_FIRSTHDR(&msg); - EXPECT_NE(cmsg, NULL); - EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); - EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - record_type = *((unsigned char *)CMSG_DATA(cmsg)); - EXPECT_EQ(record_type, 100); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } @@ -1301,6 +1422,160 @@ TEST_F(tls, shutdown_reuse) EXPECT_EQ(errno, EISCONN); } +FIXTURE(tls_err) +{ + int fd, cfd; + int fd2, cfd2; + bool notls; +}; + +FIXTURE_VARIANT(tls_err) +{ + uint16_t tls_version; +}; + +FIXTURE_VARIANT_ADD(tls_err, 12_aes_gcm) +{ + .tls_version = TLS_1_2_VERSION, +}; + +FIXTURE_VARIANT_ADD(tls_err, 13_aes_gcm) +{ + .tls_version = TLS_1_3_VERSION, +}; + +FIXTURE_SETUP(tls_err) +{ + struct tls_crypto_info_keys tls12; + int ret; + + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128, + &tls12); + + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); + ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls); + if (self->notls) + return; + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd2, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(tls_err) +{ + close(self->fd); + close(self->cfd); + close(self->fd2); + close(self->cfd2); +} + +TEST_F(tls_err, bad_rec) +{ + char buf[64]; + + if (self->notls) + SKIP(return, "no TLS support"); + + memset(buf, 0x55, sizeof(buf)); + EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf)); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EMSGSIZE); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EAGAIN); +} + +TEST_F(tls_err, bad_auth) +{ + char buf[128]; + int n; + + if (self->notls) + SKIP(return, "no TLS support"); + + memrnd(buf, sizeof(buf) / 2); + EXPECT_EQ(send(self->fd, buf, sizeof(buf) / 2, 0), sizeof(buf) / 2); + n = recv(self->cfd, buf, sizeof(buf), 0); + EXPECT_GT(n, sizeof(buf) / 2); + + buf[n - 1]++; + + EXPECT_EQ(send(self->fd2, buf, n, 0), n); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + +TEST_F(tls_err, bad_in_large_read) +{ + char txt[3][64]; + char cip[3][128]; + char buf[3 * 128]; + int i, n; + + if (self->notls) + SKIP(return, "no TLS support"); + + /* Put 3 records in the sockets */ + for (i = 0; i < 3; i++) { + memrnd(txt[i], sizeof(txt[i])); + EXPECT_EQ(send(self->fd, txt[i], sizeof(txt[i]), 0), + sizeof(txt[i])); + n = recv(self->cfd, cip[i], sizeof(cip[i]), 0); + EXPECT_GT(n, sizeof(txt[i])); + /* Break the third message */ + if (i == 2) + cip[2][n - 1]++; + EXPECT_EQ(send(self->fd2, cip[i], n, 0), n); + } + + /* We should be able to receive the first two messages */ + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt[0]) * 2); + EXPECT_EQ(memcmp(buf, txt[0], sizeof(txt[0])), 0); + EXPECT_EQ(memcmp(buf + sizeof(txt[0]), txt[1], sizeof(txt[1])), 0); + /* Third mesasge is bad */ + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + +TEST_F(tls_err, bad_cmsg) +{ + char *test_str = "test_read"; + int send_len = 10; + char cip[128]; + char buf[128]; + char txt[64]; + int n; + + if (self->notls) + SKIP(return, "no TLS support"); + + /* Queue up one data record */ + memrnd(txt, sizeof(txt)); + EXPECT_EQ(send(self->fd, txt, sizeof(txt), 0), sizeof(txt)); + n = recv(self->cfd, cip, sizeof(cip), 0); + EXPECT_GT(n, sizeof(txt)); + EXPECT_EQ(send(self->fd2, cip, n, 0), n); + + EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); + n = recv(self->cfd, cip, sizeof(cip), 0); + cip[n - 1]++; /* Break it */ + EXPECT_GT(n, send_len); + EXPECT_EQ(send(self->fd2, cip, n, 0), n); + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt)); + EXPECT_EQ(memcmp(buf, txt, sizeof(txt)), 0); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; @@ -1355,64 +1630,82 @@ TEST(non_established) { TEST(keysizes) { struct tls12_crypto_info_aes_gcm_256 tls12; - struct sockaddr_in addr; - int sfd, ret, fd, cfd; - socklen_t len; + int ret, fd, cfd; bool notls; - notls = false; - len = sizeof(addr); - memset(&tls12, 0, sizeof(tls12)); tls12.info.version = TLS_1_2_VERSION; tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256; - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_ANY); - addr.sin_port = 0; + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); - fd = socket(AF_INET, SOCK_STREAM, 0); - sfd = socket(AF_INET, SOCK_STREAM, 0); + if (!notls) { + ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, + sizeof(tls12)); + EXPECT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, + sizeof(tls12)); + EXPECT_EQ(ret, 0); + } + + close(fd); + close(cfd); +} + +TEST(tls_v6ops) { + struct tls_crypto_info_keys tls12; + struct sockaddr_in6 addr, addr2; + int sfd, ret, fd; + socklen_t len, len2; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + + addr.sin6_family = AF_INET6; + addr.sin6_addr = in6addr_any; + addr.sin6_port = 0; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + sfd = socket(AF_INET6, SOCK_STREAM, 0); ret = bind(sfd, &addr, sizeof(addr)); ASSERT_EQ(ret, 0); ret = listen(sfd, 10); ASSERT_EQ(ret, 0); + len = sizeof(addr); ret = getsockname(sfd, &addr, &len); ASSERT_EQ(ret, 0); ret = connect(fd, &addr, sizeof(addr)); ASSERT_EQ(ret, 0); + len = sizeof(addr); + ret = getsockname(fd, &addr, &len); + ASSERT_EQ(ret, 0); + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); - if (ret != 0) { - notls = true; - printf("Failure setting TCP_ULP, testing without tls\n"); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); } + ASSERT_EQ(ret, 0); - if (!notls) { - ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, - sizeof(tls12)); - EXPECT_EQ(ret, 0); - } + ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); - cfd = accept(sfd, &addr, &len); - ASSERT_GE(cfd, 0); + ret = setsockopt(fd, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); - if (!notls) { - ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", - sizeof("tls")); - EXPECT_EQ(ret, 0); + len2 = sizeof(addr2); + ret = getsockname(fd, &addr2, &len2); + ASSERT_EQ(ret, 0); - ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, - sizeof(tls12)); - EXPECT_EQ(ret, 0); - } + EXPECT_EQ(len2, len); + EXPECT_EQ(memcmp(&addr, &addr2, len), 0); - close(sfd); close(fd); - close(cfd); + close(sfd); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 8748199ac109..ffca314897c4 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -5,7 +5,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ - ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh + ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ + conntrack_vrf.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh new file mode 100755 index 000000000000..8b5ea9234588 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh @@ -0,0 +1,241 @@ +#!/bin/sh + +# This script demonstrates interaction of conntrack and vrf. +# The vrf driver calls the netfilter hooks again, with oif/iif +# pointing at the VRF device. +# +# For ingress, this means first iteration has iifname of lower/real +# device. In this script, thats veth0. +# Second iteration is iifname set to vrf device, tvrf in this script. +# +# For egress, this is reversed: first iteration has the vrf device, +# second iteration is done with the lower/real/veth0 device. +# +# test_ct_zone_in demonstrates unexpected change of nftables +# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack +# connection on VRF rcv" +# +# It was possible to assign conntrack zone to a packet (or mark it for +# `notracking`) in the prerouting chain before conntrack, based on real iif. +# +# After the change, the zone assignment is lost and the zone is assigned based +# on the VRF master interface (in case such a rule exists). +# assignment is lost. Instead, assignment based on the `iif` matching +# Thus it is impossible to distinguish packets based on the original +# interface. +# +# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem +# that was supposed to be fixed by the commit mentioned above to make sure +# that any fix to test case 1 won't break masquerade again. + +ksft_skip=4 + +IP0=172.30.30.1 +IP1=172.30.30.2 +PFXL=30 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" +ns1="ns1-$sfx" + +cleanup() +{ + ip netns pids $ns0 | xargs kill 2>/dev/null + ip netns pids $ns1 | xargs kill 2>/dev/null + + ip netns del $ns0 $ns1 +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add "$ns0" +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $ns0" + exit $ksft_skip +fi +ip netns add "$ns1" + +trap cleanup EXIT + +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0 +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + +ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not add veth device" + exit $ksft_skip +fi + +ip -net $ns0 li add tvrf type vrf table 9876 +if [ $? -ne 0 ];then + echo "SKIP: Could not add vrf device" + exit $ksft_skip +fi + +ip -net $ns0 li set lo up + +ip -net $ns0 li set veth0 master tvrf +ip -net $ns0 li set tvrf up +ip -net $ns0 li set veth0 up +ip -net $ns1 li set veth0 up + +ip -net $ns0 addr add $IP0/$PFXL dev veth0 +ip -net $ns1 addr add $IP1/$PFXL dev veth0 + +ip netns exec $ns1 iperf3 -s > /dev/null 2>&1& +if [ $? -ne 0 ];then + echo "SKIP: Could not start iperf3" + exit $ksft_skip +fi + +# test vrf ingress handling. +# The incoming connection should be placed in conntrack zone 1, +# as decided by the first iteration of the ruleset. +test_ct_zone_in() +{ +ip netns exec $ns0 nft -f - <<EOF +table testct { + chain rawpre { + type filter hook prerouting priority raw; + + iif { veth0, tvrf } counter meta nftrace set 1 + iif veth0 counter ct zone set 1 counter return + iif tvrf counter ct zone set 2 counter return + ip protocol icmp counter + notrack counter + } + + chain rawout { + type filter hook output priority raw; + + oif veth0 counter ct zone set 1 counter return + oif tvrf counter ct zone set 2 counter return + notrack counter + } +} +EOF + ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null + + # should be in zone 1, not zone 2 + count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) + if [ $count -eq 1 ]; then + echo "PASS: entry found in conntrack zone 1" + else + echo "FAIL: entry not found in conntrack zone 1" + count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) + if [ $count -eq 1 ]; then + echo "FAIL: entry found in zone 2 instead" + else + echo "FAIL: entry not in zone 1 or 2, dumping table" + ip netns exec $ns0 conntrack -L + ip netns exec $ns0 nft list ruleset + fi + fi +} + +# add masq rule that gets evaluated w. outif set to vrf device. +# This tests the first iteration of the packet through conntrack, +# oifname is the vrf device. +test_masquerade_vrf() +{ + local qdisc=$1 + + if [ "$qdisc" != "default" ]; then + tc -net $ns0 qdisc add dev tvrf root $qdisc + fi + + ip netns exec $ns0 conntrack -F 2>/dev/null + +ip netns exec $ns0 nft -f - <<EOF +flush ruleset +table ip nat { + chain rawout { + type filter hook output priority raw; + + oif tvrf ct state untracked counter + } + chain postrouting2 { + type filter hook postrouting priority mangle; + + oif tvrf ct state untracked counter + } + chain postrouting { + type nat hook postrouting priority 0; + # NB: masquerade should always be combined with 'oif(name) bla', + # lack of this is intentional here, we want to exercise double-snat. + ip saddr 172.30.30.0/30 counter masquerade random + } +} +EOF + ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null + if [ $? -ne 0 ]; then + echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' && + ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]' + if [ $? -eq 0 ]; then + echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" + else + echo "FAIL: vrf rules have unexpected counter value" + ret=1 + fi + + if [ "$qdisc" != "default" ]; then + tc -net $ns0 qdisc del dev tvrf root + fi +} + +# add masq rule that gets evaluated w. outif set to veth device. +# This tests the 2nd iteration of the packet through conntrack, +# oifname is the lower device (veth0 in this case). +test_masquerade_veth() +{ + ip netns exec $ns0 conntrack -F 2>/dev/null +ip netns exec $ns0 nft -f - <<EOF +flush ruleset +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; + meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random + } +} +EOF + ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null + if [ $? -ne 0 ]; then + echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' + if [ $? -eq 0 ]; then + echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device" + else + echo "FAIL: vrf masq rule has unexpected counter value" + ret=1 + fi +} + +test_ct_zone_in +test_masquerade_vrf "default" +test_masquerade_vrf "pfifo" +test_masquerade_veth + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index 5a4938d6dcf2..ed61f6cab60f 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -23,8 +23,8 @@ TESTS="reported_issues correctness concurrency timeout" # Set types, defined by TYPE_ variables below TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto - net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port - net_port_mac_proto_net" + net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp + net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below BUGS="flush_remove_add" @@ -277,6 +277,23 @@ perf_entries 1000 perf_proto ipv4 " +TYPE_mac_net=" +display mac,net +type_spec ether_addr . ipv4_addr +chain_spec ether saddr . ip saddr +dst +src mac addr4 +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + TYPE_net_mac_icmp=" display net,mac - ICMP type_spec ipv4_addr . ether_addr @@ -984,7 +1001,8 @@ format() { fi done for f in ${src}; do - __expr="${__expr} . " + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + __start="$(eval format_"${f}" "${srcstart}")" __end="$(eval format_"${f}" "${srcend}")" diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index da1c1e4b6c86..d88867d2fed7 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -759,19 +759,21 @@ test_port_shadow() local result="" local logmsg="" - echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 & - nc_r=$! + # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. + echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405 - echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 & - nc_c=$! + echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 & + sc_r=$! - # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. - echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null + echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport & + sc_c=$! + + sleep 0.3 # ns1 tries to connect to ns0:1405. With default settings this should connect # to client, it matches the conntrack entry created above. - result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405) + result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404) if [ "$result" = "$expect" ] ;then echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}" @@ -780,7 +782,7 @@ test_port_shadow() ret=1 fi - kill $nc_r $nc_c 2>/dev/null + kill $sc_r $sc_c 2>/dev/null # flush udp entries for next test round, if any ip netns exec "$ns0" conntrack -F >/dev/null 2>&1 @@ -816,11 +818,10 @@ table $family raw { chain prerouting { type filter hook prerouting priority -300; policy accept; meta iif veth0 udp dport 1405 notrack - udp dport 1405 notrack } chain output { type filter hook output priority -300; policy accept; - udp sport 1405 notrack + meta oif veth0 udp sport 1405 notrack } } EOF @@ -851,6 +852,18 @@ test_port_shadowing() { local family="ip" + conntrack -h >/dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run nat port shadowing test without conntrack tool" + return + fi + + socat -h > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run nat port shadowing test without socat tool" + return + fi + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh index 3d202b90b33d..7d27f1f3bc01 100755 --- a/tools/testing/selftests/netfilter/nft_queue.sh +++ b/tools/testing/selftests/netfilter/nft_queue.sh @@ -16,6 +16,10 @@ timeout=4 cleanup() { + ip netns pids ${ns1} | xargs kill 2>/dev/null + ip netns pids ${ns2} | xargs kill 2>/dev/null + ip netns pids ${nsrouter} | xargs kill 2>/dev/null + ip netns del ${ns1} ip netns del ${ns2} ip netns del ${nsrouter} @@ -332,6 +336,55 @@ EOF echo "PASS: tcp via loopback and re-queueing" } +test_icmp_vrf() { + ip -net $ns1 link add tvrf type vrf table 9876 + if [ $? -ne 0 ];then + echo "SKIP: Could not add vrf device" + return + fi + + ip -net $ns1 li set eth0 master tvrf + ip -net $ns1 li set tvrf up + + ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876 +ip netns exec ${ns1} nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain output { + type filter hook output priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } + chain post { + type filter hook postrouting priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } +} +EOF + ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout & + local nfqpid=$! + + sleep 1 + ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null + + for n in output post; do + for d in tvrf eth0; do + ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1" + if [ $? -ne 0 ] ; then + echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2 + ip netns exec ${ns1} nft list ruleset + ret=1 + return + fi + done + done + + wait $nfqpid + [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf" + wait 2>/dev/null +} + ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null @@ -372,5 +425,6 @@ test_queue 20 test_tcp_forward test_tcp_localhost test_tcp_localhost_requeue +test_icmp_vrf exit $ret diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh index ac646376eb01..04633119b29a 100755 --- a/tools/testing/selftests/netfilter/nft_zones_many.sh +++ b/tools/testing/selftests/netfilter/nft_zones_many.sh @@ -18,11 +18,17 @@ cleanup() ip netns del $ns } -ip netns add $ns -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace $gw" - exit $ksft_skip -fi +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "nft --version" "run test without nft tool" +checktool "ip -Version" "run test without ip tool" +checktool "socat -V" "run test without socat tool" +checktool "ip netns add $ns" "create net namespace" trap cleanup EXIT @@ -71,7 +77,8 @@ EOF local start=$(date +%s%3N) i=$((i + 10000)) j=$((j + 1)) - dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null + # nft rule in output places each packet in a different zone. + dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 if [ $? -ne 0 ] ;then ret=1 break diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index b71828df5a6d..a3239d5e40c7 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -60,6 +60,8 @@ CONFIG_NET_IFE_SKBTCINDEX=m CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_FQ_PIE=m +CONFIG_NETDEVSIM=m # ## Network testing diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 503982b8f295..91832400ddbd 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -68,7 +68,7 @@ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ "$TC action flush action bpf" diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json index 88a20c781e49..c6046096d9db 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json @@ -15,7 +15,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "4", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -37,7 +37,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-9,a-f][0-9,a-f]{0,2}", "matchCount": "256", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -60,7 +60,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", "expExitCode": "2", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "4", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -82,7 +82,7 @@ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq", "expExitCode": "2", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "0", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -106,7 +106,7 @@ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq", "expExitCode": "2", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "0", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -128,7 +128,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", "expExitCode": "2", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "0", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index a3e43189d940..ee22e3447ec7 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -716,6 +716,7 @@ def set_operation_mode(pm, parser, args, remaining): list_test_cases(alltests) exit(0) + exit_code = 0 # KSFT_PASS if len(alltests): req_plugins = pm.get_required_plugins(alltests) try: @@ -724,6 +725,8 @@ def set_operation_mode(pm, parser, args, remaining): print('The following plugins were not found:') print('{}'.format(pde.missing_pg)) catresults = test_runner(pm, args, alltests) + if catresults.count_failures() != 0: + exit_code = 1 # KSFT_FAIL if args.format == 'none': print('Test results output suppression requested\n') else: @@ -748,6 +751,8 @@ def set_operation_mode(pm, parser, args, remaining): gid=int(os.getenv('SUDO_GID'))) else: print('No tests found\n') + exit_code = 4 # KSFT_SKIP + exit(exit_code) def main(): """ @@ -767,8 +772,5 @@ def main(): set_operation_mode(pm, parser, args, remaining) - exit(0) - - if __name__ == "__main__": main() diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index 7fe38c76db44..afb0cd86fa3d 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -1,5 +1,6 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +modprobe netdevsim ./tdc.py -c actions --nobuildebpf ./tdc.py -c qdisc diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index ebc4ee0fe179..8a9461aa0878 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -276,7 +276,11 @@ n0 ping -W 1 -c 1 192.168.241.2 n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7 ip2 link del wg0 ip2 link del wg1 -! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel +read _ _ tx_bytes_before < <(n0 wg show wg1 transfer) +! n0 ping -W 1 -c 10 -f 192.168.241.2 || false +sleep 1 +read _ _ tx_bytes_after < <(n0 wg show wg1 transfer) +(( tx_bytes_after - tx_bytes_before < 70000 )) ip0 link del wg1 ip1 link del wg0 @@ -609,6 +613,28 @@ ip0 link set wg0 up kill $ncat_pid ip0 link del wg0 +# Ensure that dst_cache references don't outlive netns lifetime +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers +ip1 link add veth1 type veth peer name veth2 +ip1 link set veth2 netns $netns2 +ip1 addr add fd00:aa::1/64 dev veth1 +ip2 addr add fd00:aa::2/64 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +ip1 -6 route add default dev veth1 via fd00:aa::2 +ip2 -6 route add default dev veth2 via fd00:aa::1 +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 +n1 ping6 -c 1 fd00::2 +pp ip netns delete $netns1 +pp ip netns delete $netns2 +pp ip netns add $netns1 +pp ip netns add $netns2 + # Ensure there aren't circular reference loops ip1 link add wg1 type wireguard ip2 link add wg2 type wireguard @@ -627,7 +653,7 @@ while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do done < /dev/kmsg alldeleted=1 for object in "${!objects[@]}"; do - if [[ ${objects["$object"]} != *createddestroyed ]]; then + if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then echo "Error: $object: merely ${objects["$object"]}" >&3 alldeleted=0 fi diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index fe07d97df9fa..2b321b8a96cf 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -47,7 +47,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_TRACE_IRQFLAGS=y CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_LIST=y -CONFIG_DEBUG_PI_LIST=y +CONFIG_DEBUG_PLIST=y CONFIG_PROVE_RCU=y CONFIG_SPARSE_RCU_POINTER=y CONFIG_RCU_CPU_STALL_TIMEOUT=21 diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 74db83a0aedd..a9b5a520a1d2 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -66,6 +66,7 @@ CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 +CONFIG_LOG_BUF_SHIFT=18 CONFIG_PRINTK_TIME=y CONFIG_BLK_DEV_INITRD=y CONFIG_LEGACY_VSYSCALL_NONE=y |