diff options
Diffstat (limited to 'tools')
38 files changed, 1299 insertions, 114 deletions
diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index efb6c3f5f2a9..5a37ccbec54f 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -6,7 +6,7 @@ #include <stdio.h> #include <unistd.h> -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 struct module; diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index 95e2b7924925..ba04771cb3a3 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -190,7 +190,7 @@ __asm__ (".section .text\n" ".option norelax\n" "lla gp, __global_pointer$\n" ".option pop\n" - "ld a0, 0(sp)\n" // argc (a0) was in the stack + "lw a0, 0(sp)\n" // argc (a0) was in the stack "add a1, sp, "SZREG"\n" // argv (a1) = sp "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ... "add a2, a2, "SZREG"\n" // + SZREG (skip null) diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 08491070387b..ce3ee03aa679 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -692,12 +692,12 @@ void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, { #ifndef my_syscall6 /* Function not implemented. */ - return -ENOSYS; + return (void *)-ENOSYS; #else int n; -#if defined(__i386__) +#if defined(__NR_mmap2) n = __NR_mmap2; offset >>= 12; #else diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 93bf93a59c99..d8ae4e944467 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -97,7 +97,7 @@ struct perf_record_throttle { }; #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 +#define KSYM_NAME_LEN 512 #endif struct perf_record_ksymbol { diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h index 72ab9870454b..542f9b059c3b 100644 --- a/tools/lib/symbol/kallsyms.h +++ b/tools/lib/symbol/kallsyms.h @@ -7,7 +7,7 @@ #include <linux/types.h> #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 +#define KSYM_NAME_LEN 512 #endif static inline u8 kallsyms2elf_binding(char type) diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt index 8a9d5d2787f9..26554b1c5575 100644 --- a/tools/memory-model/Documentation/litmus-tests.txt +++ b/tools/memory-model/Documentation/litmus-tests.txt @@ -946,22 +946,39 @@ Limitations of the Linux-kernel memory model (LKMM) include: carrying a dependency, then the compiler can break that dependency by substituting a constant of that value. - Conversely, LKMM sometimes doesn't recognize that a particular - optimization is not allowed, and as a result, thinks that a - dependency is not present (because the optimization would break it). - The memory model misses some pretty obvious control dependencies - because of this limitation. A simple example is: + Conversely, LKMM will sometimes overestimate the amount of + reordering compilers and CPUs can carry out, leading it to miss + some pretty obvious cases of ordering. A simple example is: r1 = READ_ONCE(x); if (r1 == 0) smp_mb(); WRITE_ONCE(y, 1); - There is a control dependency from the READ_ONCE to the WRITE_ONCE, - even when r1 is nonzero, but LKMM doesn't realize this and thinks - that the write may execute before the read if r1 != 0. (Yes, that - doesn't make sense if you think about it, but the memory model's - intelligence is limited.) + The WRITE_ONCE() does not depend on the READ_ONCE(), and as a + result, LKMM does not claim ordering. However, even though no + dependency is present, the WRITE_ONCE() will not be executed before + the READ_ONCE(). There are two reasons for this: + + The presence of the smp_mb() in one of the branches + prevents the compiler from moving the WRITE_ONCE() + up before the "if" statement, since the compiler has + to assume that r1 will sometimes be 0 (but see the + comment below); + + CPUs do not execute stores before po-earlier conditional + branches, even in cases where the store occurs after the + two arms of the branch have recombined. + + It is clear that it is not dangerous in the slightest for LKMM to + make weaker guarantees than architectures. In fact, it is + desirable, as it gives compilers room for making optimizations. + For instance, suppose that a 0 value in r1 would trigger undefined + behavior elsewhere. Then a clever compiler might deduce that r1 + can never be 0 in the if condition. As a result, said clever + compiler might deem it safe to optimize away the smp_mb(), + eliminating the branch and any ordering an architecture would + guarantee otherwise. 2. Multiple access sizes for a single variable are not supported, and neither are misaligned or partially overlapping accesses. diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9216060c3408..a8cf38639fe8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1073,6 +1073,9 @@ static const char *uaccess_safe_builtin[] = { "copy_mc_fragile_handle_tail", "copy_mc_enhanced_fast_string", "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ + "clear_user_erms", + "clear_user_rep_good", + "clear_user_original", NULL }; @@ -3316,6 +3319,10 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, next_insn = next_insn_to_validate(file, insn); if (func && insn->func && func != insn->func->pfunc) { + /* Ignore KCFI type preambles, which always fall through */ + if (!strncmp(func->name, "__cfi_", 6)) + return 0; + WARN("%s() falls through to next function %s()", func->name, insn->func->name); return 1; @@ -4114,6 +4121,7 @@ static int validate_ibt(struct objtool_file *file) !strcmp(sec->name, "__ex_table") || !strcmp(sec->name, "__jump_table") || !strcmp(sec->name, "__mcount_loc") || + !strcmp(sec->name, ".kcfi_traps") || strstr(sec->name, "__patchable_function_entries")) continue; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index c25e957c1e52..7e24b09b1163 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -619,6 +619,11 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, Elf64_Xword entsize = symtab->sh.sh_entsize; int max_idx, idx = sym->idx; Elf_Scn *s, *t = NULL; + bool is_special_shndx = sym->sym.st_shndx >= SHN_LORESERVE && + sym->sym.st_shndx != SHN_XINDEX; + + if (is_special_shndx) + shndx = sym->sym.st_shndx; s = elf_getscn(elf->elf, symtab->idx); if (!s) { @@ -704,7 +709,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab, } /* setup extended section index magic and write the symbol */ - if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) { + if ((shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) || is_special_shndx) { sym->sym.st_shndx = shndx; if (!shndx_data) shndx = 0; diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index dfb6173b2a82..9e9a2b67de19 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -114,8 +114,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest for (i = 0; i < nsyscalls; ++i) for (j = 0; j < expected_nr_events[i]; ++j) { - int foo = syscalls[i](); - ++foo; + syscalls[i](); } md = &evlist->mmap[0]; diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 6a001fcfed68..4952abe716f3 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -332,7 +332,7 @@ out_delete_evlist: out: if (err == -EACCES) return TEST_SKIP; - if (err < 0) + if (err < 0 || errs != 0) return TEST_FAIL; return TEST_OK; } diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 00c7285ce1ac..301f95427159 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -61,7 +61,7 @@ test_register_capture() { echo "Register capture test [Skipped missing registers]" return fi - if ! perf record -o - --intr-regs=di,r8,dx,cx -e cpu/br_inst_retired.near_call/p \ + if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \ -c 1000 --per-thread true 2> /dev/null \ | perf script -F ip,sym,iregs -i - 2> /dev/null \ | egrep -q "DI:" diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index c644f94a6500..ec801cffae6b 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -12,7 +12,8 @@ if ! [ -x "$(command -v cc)" ]; then fi # skip the test if the hardware doesn't support branch stack sampling -perf record -b -o- -B true > /dev/null 2>&1 || exit 2 +# and if the architecture doesn't support filter types: any,save_type,u +perf record -b -o- -B --branch-filter any,save_type,u true > /dev/null 2>&1 || exit 2 TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX) diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 4fd8d703ff19..8ab035b55875 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -43,10 +43,11 @@ static bool is_ignored_symbol(const char *name, char type) /* Symbol names that begin with the following are ignored.*/ static const char * const ignored_prefixes[] = { "$", /* local symbols for ARM, MIPS, etc. */ - ".LASANPC", /* s390 kasan local symbols */ + ".L", /* local labels, .LBB,.Ltmpxxx,.L__unnamed_xx,.LASANPC, etc. */ "__crc_", /* modversions */ "__efistub_", /* arm64 EFI stub namespace */ - "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */ + "__kvm_nvhe_$", /* arm64 local symbols in non-VHE KVM namespace */ + "__kvm_nvhe_.L", /* arm64 local symbols in non-VHE KVM namespace */ "__AArch64ADRPThunk_", /* arm64 lld */ "__ARMV5PILongThunk_", /* arm lld */ "__ARMV7PILongThunk_", diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 9dfae1bda9cc..485e1a343165 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -269,7 +269,7 @@ CFLAGS_expr-flex.o += $(flex_flags) bison_flags := -DYYENABLE_NLS=0 BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35) ifeq ($(BISON_GE_35),1) - bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum + bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum -Wno-unused-but-set-variable -Wno-unknown-warning-option else bison_flags += -w endif diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 22dcfe07e886..906476a839e1 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -498,7 +498,7 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) { union perf_mem_data_src data_src = { 0 }; - bool is_neoverse = is_midr_in_range(midr, neoverse_spe); + bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); if (record->op == ARM_SPE_LD) data_src.mem_op = PERF_MEM_OP_LOAD; diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c index c72f8ad96f75..9aa8cdd93de4 100644 --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c @@ -48,6 +48,7 @@ const volatile __u32 num_cpus = 1; int enabled = 0; int use_cgroup_v2 = 0; +int perf_subsys_id = -1; static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) { @@ -58,7 +59,15 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) int level; int cnt; - cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_event_cgrp_id], cgroup); + if (perf_subsys_id == -1) { +#if __has_builtin(__builtin_preserve_enum_value) + perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id, + perf_event_cgrp_id); +#else + perf_subsys_id = perf_event_cgrp_id; +#endif + } + cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_subsys_id], cgroup); level = BPF_CORE_READ(cgrp, level); for (cnt = 0; i < MAX_LEVELS; i++) { diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index c4ba2bcf179f..38e3b287dbb2 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -94,6 +94,8 @@ const volatile bool has_prev_state = false; const volatile bool needs_cgroup = false; const volatile bool uses_cgroup_v1 = false; +int perf_subsys_id = -1; + /* * Old kernel used to call it task_struct->state and now it's '__state'. * Use BPF CO-RE "ignored suffix rule" to deal with it like below: @@ -119,11 +121,19 @@ static inline __u64 get_cgroup_id(struct task_struct *t) { struct cgroup *cgrp; - if (uses_cgroup_v1) - cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup); - else - cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp); + if (!uses_cgroup_v1) + return BPF_CORE_READ(t, cgroups, dfl_cgrp, kn, id); + + if (perf_subsys_id == -1) { +#if __has_builtin(__builtin_preserve_enum_value) + perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id, + perf_event_cgrp_id); +#else + perf_subsys_id = perf_event_cgrp_id; +#endif + } + cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_subsys_id], cgroup); return BPF_CORE_READ(cgrp, kn, id); } diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index 284f8eabd3b9..7c9f9150bad5 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -33,7 +33,8 @@ static void config_hybrid_attr(struct perf_event_attr *attr, * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. */ attr->type = type; - attr->config = attr->config | ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT); + attr->config = (attr->config & PERF_HW_EVENT_MASK) | + ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT); } static int create_event_hybrid(__u32 config_type, int *idx, @@ -48,13 +49,25 @@ static int create_event_hybrid(__u32 config_type, int *idx, __u64 config = attr->config; config_hybrid_attr(attr, config_type, pmu->type); + + /* + * Some hybrid hardware cache events are only available on one CPU + * PMU. For example, the 'L1-dcache-load-misses' is only available + * on cpu_core, while the 'L1-icache-loads' is only available on + * cpu_atom. We need to remove "not supported" hybrid cache events. + */ + if (attr->type == PERF_TYPE_HW_CACHE + && !is_event_supported(attr->type, attr->config)) + return 0; + evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, pmu, config_terms); - if (evsel) + if (evsel) { evsel->pmu_name = strdup(pmu->name); - else + if (!evsel->pmu_name) + return -ENOMEM; + } else return -ENOMEM; - attr->type = type; attr->config = config; return 0; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f05e15acd33f..f3b2c2a87456 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -28,6 +28,7 @@ #include "util/parse-events-hybrid.h" #include "util/pmu-hybrid.h" #include "tracepoint.h" +#include "thread_map.h" #define MAX_NAME_LEN 100 @@ -157,6 +158,44 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) +bool is_event_supported(u8 type, u64 config) +{ + bool ret = true; + int open_return; + struct evsel *evsel; + struct perf_event_attr attr = { + .type = type, + .config = config, + .disabled = 1, + }; + struct perf_thread_map *tmap = thread_map__new_by_tid(0); + + if (tmap == NULL) + return false; + + evsel = evsel__new(&attr); + if (evsel) { + open_return = evsel__open(evsel, NULL, tmap); + ret = open_return >= 0; + + if (open_return == -EACCES) { + /* + * This happens if the paranoid value + * /proc/sys/kernel/perf_event_paranoid is set to 2 + * Re-run with exclude_kernel set; we don't do that + * by default as some ARM machines do not support it. + * + */ + evsel->core.attr.exclude_kernel = 1; + ret = evsel__open(evsel, NULL, tmap) >= 0; + } + evsel__delete(evsel); + } + + perf_thread_map__put(tmap); + return ret; +} + const char *event_type(int type) { switch (type) { diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 7e6a601d9cd0..07df7bb7b042 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -19,6 +19,7 @@ struct option; struct perf_pmu; bool have_tracepoints(struct list_head *evlist); +bool is_event_supported(u8 type, u64 config); const char *event_type(int type); diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index ba1ab5134685..c4d5d87fae2f 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -22,7 +22,6 @@ #include "probe-file.h" #include "string2.h" #include "strlist.h" -#include "thread_map.h" #include "tracepoint.h" #include "pfm.h" #include "pmu-hybrid.h" @@ -239,44 +238,6 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, strlist__delete(sdtlist); } -static bool is_event_supported(u8 type, unsigned int config) -{ - bool ret = true; - int open_return; - struct evsel *evsel; - struct perf_event_attr attr = { - .type = type, - .config = config, - .disabled = 1, - }; - struct perf_thread_map *tmap = thread_map__new_by_tid(0); - - if (tmap == NULL) - return false; - - evsel = evsel__new(&attr); - if (evsel) { - open_return = evsel__open(evsel, NULL, tmap); - ret = open_return >= 0; - - if (open_return == -EACCES) { - /* - * This happens if the paranoid value - * /proc/sys/kernel/perf_event_paranoid is set to 2 - * Re-run with exclude_kernel set; we don't do that - * by default as some ARM machines do not support it. - * - */ - evsel->core.attr.exclude_kernel = 1; - ret = evsel__open(evsel, NULL, tmap) >= 0; - } - evsel__delete(evsel); - } - - perf_thread_map__put(tmap); - return ret; -} - int print_hwcache_events(const char *event_glob, bool name_only) { unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0; diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build index c92326c2233a..0f5ba28339cf 100644 --- a/tools/perf/util/scripting-engines/Build +++ b/tools/perf/util/scripting-engines/Build @@ -3,4 +3,4 @@ perf-$(CONFIG_LIBPYTHON) += trace-event-python.o CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum -CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-error=deprecated-declarations +CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c index d79c335594b2..52aa0351533c 100644 --- a/tools/power/acpi/tools/pfrut/pfrut.c +++ b/tools/power/acpi/tools/pfrut/pfrut.c @@ -190,7 +190,7 @@ int main(int argc, char *argv[]) void *addr_map_capsule; struct stat st; char *log_buf; - int ret = 0; + int ret; if (getuid() != 0) { printf("Please run the tool as root - Exiting.\n"); diff --git a/tools/testing/selftests/bpf/prog_tests/deny_namespace.c b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c new file mode 100644 index 000000000000..1bc6241b755b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <test_progs.h> +#include "test_deny_namespace.skel.h" +#include <sched.h> +#include "cap_helpers.h" +#include <stdio.h> + +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +/* negative return value -> some internal error + * positive return value -> userns creation failed + * 0 -> userns creation succeeded + */ +static int create_user_ns(void) +{ + pid_t pid; + + pid = fork(); + if (pid < 0) + return -1; + + if (pid == 0) { + if (unshare(CLONE_NEWUSER)) + _exit(EXIT_FAILURE); + _exit(EXIT_SUCCESS); + } + + return wait_for_pid(pid); +} + +static void test_userns_create_bpf(void) +{ + __u32 cap_mask = 1ULL << CAP_SYS_ADMIN; + __u64 old_caps = 0; + + cap_enable_effective(cap_mask, &old_caps); + + ASSERT_OK(create_user_ns(), "priv new user ns"); + + cap_disable_effective(cap_mask, &old_caps); + + ASSERT_EQ(create_user_ns(), EPERM, "unpriv new user ns"); + + if (cap_mask & old_caps) + cap_enable_effective(cap_mask, NULL); +} + +static void test_unpriv_userns_create_no_bpf(void) +{ + __u32 cap_mask = 1ULL << CAP_SYS_ADMIN; + __u64 old_caps = 0; + + cap_disable_effective(cap_mask, &old_caps); + + ASSERT_OK(create_user_ns(), "no-bpf unpriv new user ns"); + + if (cap_mask & old_caps) + cap_enable_effective(cap_mask, NULL); +} + +void test_deny_namespace(void) +{ + struct test_deny_namespace *skel = NULL; + int err; + + if (test__start_subtest("unpriv_userns_create_no_bpf")) + test_unpriv_userns_create_no_bpf(); + + skel = test_deny_namespace__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel load")) + goto close_prog; + + err = test_deny_namespace__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto close_prog; + + if (test__start_subtest("userns_create_bpf")) + test_userns_create_bpf(); + + test_deny_namespace__detach(skel); + +close_prog: + test_deny_namespace__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c new file mode 100644 index 000000000000..09ad5a4ebd1f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <errno.h> +#include <linux/capability.h> + +struct kernel_cap_struct { + __u32 cap[_LINUX_CAPABILITY_U32S_3]; +} __attribute__((preserve_access_index)); + +struct cred { + struct kernel_cap_struct cap_effective; +} __attribute__((preserve_access_index)); + +char _license[] SEC("license") = "GPL"; + +SEC("lsm.s/userns_create") +int BPF_PROG(test_userns_create, const struct cred *cred, int ret) +{ + struct kernel_cap_struct caps = cred->cap_effective; + int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN); + __u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN); + + if (ret) + return 0; + + ret = -EPERM; + if (caps.cap[cap_index] & cap_mask) + return 0; + + return -EPERM; +} diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 4c122f1b1737..6448cb9f710f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -48,6 +48,8 @@ LIBKVM += lib/rbtree.c LIBKVM += lib/sparsebit.c LIBKVM += lib/test_util.c +LIBKVM_STRING += lib/string_override.c + LIBKVM_x86_64 += lib/x86_64/apic.c LIBKVM_x86_64 += lib/x86_64/handlers.S LIBKVM_x86_64 += lib/x86_64/perf_test_util.c @@ -220,7 +222,8 @@ LIBKVM_C := $(filter %.c,$(LIBKVM)) LIBKVM_S := $(filter %.S,$(LIBKVM)) LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) -LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) +LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.* @@ -231,6 +234,12 @@ $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ +# Compile the string overrides as freestanding to prevent the compiler from +# generating self-referential code, e.g. without "freestanding" the compiler may +# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion. +$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ + x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) $(TEST_GEN_PROGS): $(LIBKVM_OBJS) $(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 1c2749b1481a..76c583a07ea2 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -31,8 +31,9 @@ * These limitations are worked around in this test by using a large enough * region of memory for each vCPU such that the number of translations cached in * the TLB and the number of pages held in pagevecs are a small fraction of the - * overall workload. And if either of those conditions are not true this test - * will fail rather than silently passing. + * overall workload. And if either of those conditions are not true (for example + * in nesting, where TLB size is unlimited) this test will print a warning + * rather than silently passing. */ #include <inttypes.h> #include <limits.h> @@ -172,17 +173,23 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, vcpu_idx, no_pfn, pages); /* - * Test that at least 90% of memory has been marked idle (the rest might - * not be marked idle because the pages have not yet made it to an LRU - * list or the translations are still cached in the TLB). 90% is + * Check that at least 90% of memory has been marked idle (the rest + * might not be marked idle because the pages have not yet made it to an + * LRU list or the translations are still cached in the TLB). 90% is * arbitrary; high enough that we ensure most memory access went through * access tracking but low enough as to not make the test too brittle * over time and across architectures. + * + * Note that when run in nested virtualization, this check will trigger + * much more frequently because TLB size is unlimited and since no flush + * happens, much more pages are cached there and guest won't see the + * "idle" bit cleared. */ - TEST_ASSERT(still_idle < pages / 10, - "vCPU%d: Too many pages still idle (%"PRIu64 " out of %" - PRIu64 ").\n", - vcpu_idx, still_idle, pages); + if (still_idle < pages / 10) + printf("WARNING: vCPU%d: Too many pages still idle (%" PRIu64 + "out of %" PRIu64 "), this will affect performance results" + ".\n", + vcpu_idx, still_idle, pages); close(page_idle_fd); close(pagemap_fd); diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 99fa1410964c..790c6d1ecb34 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -617,6 +617,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t memslot); void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t addr, uint64_t size); +bool kvm_vm_has_ept(struct kvm_vm *vm); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index 71ade6100fd3..2bd25b191d15 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -22,7 +22,7 @@ static void test_dump_stack(void) * Build and run this command: * * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \ - * grep -v test_dump_stack | cat -n 1>&2 + * cat -n 1>&2 * * Note that the spacing is different and there's no newline. */ @@ -36,18 +36,24 @@ static void test_dump_stack(void) n * (((sizeof(void *)) * 2) + 1) + /* Null terminator: */ 1]; - char *c; + char *c = cmd; n = backtrace(stack, n); - c = &cmd[0]; - c += sprintf(c, "%s", addr2line); /* - * Skip the first 3 frames: backtrace, test_dump_stack, and - * test_assert. We hope that backtrace isn't inlined and the other two - * we've declared noinline. + * Skip the first 2 frames, which should be test_dump_stack() and + * test_assert(); both of which are declared noinline. Bail if the + * resulting stack trace would be empty. Otherwise, addr2line will block + * waiting for addresses to be passed in via stdin. */ + if (n <= 2) { + fputs(" (stack trace empty)\n", stderr); + return; + } + + c += sprintf(c, "%s", addr2line); for (i = 2; i < n; i++) c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1); + c += sprintf(c, "%s", pipeline); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-result" diff --git a/tools/testing/selftests/kvm/lib/string_override.c b/tools/testing/selftests/kvm/lib/string_override.c new file mode 100644 index 000000000000..632398adc229 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/string_override.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stddef.h> + +/* + * Override the "basic" built-in string helpers so that they can be used in + * guest code. KVM selftests don't support dynamic loading in guest code and + * will jump into the weeds if the compiler decides to insert an out-of-line + * call via the PLT. + */ +int memcmp(const void *cs, const void *ct, size_t count) +{ + const unsigned char *su1, *su2; + int res = 0; + + for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) { + if ((res = *su1 - *su2) != 0) + break; + } + return res; +} + +void *memcpy(void *dest, const void *src, size_t count) +{ + char *tmp = dest; + const char *s = src; + + while (count--) + *tmp++ = *s++; + return dest; +} + +void *memset(void *s, int c, size_t count) +{ + char *xs = s; + + while (count--) + *xs++ = c; + return s; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 80a568c439b8..d21049c38fc5 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -5,6 +5,8 @@ * Copyright (C) 2018, Google LLC. */ +#include <asm/msr-index.h> + #include "test_util.h" #include "kvm_util.h" #include "processor.h" @@ -542,9 +544,27 @@ void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); } +bool kvm_vm_has_ept(struct kvm_vm *vm) +{ + struct kvm_vcpu *vcpu; + uint64_t ctrl; + + vcpu = list_first_entry(&vm->vcpus, struct kvm_vcpu, list); + TEST_ASSERT(vcpu, "Cannot determine EPT support without vCPUs.\n"); + + ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; + if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) + return false; + + ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; + return ctrl & SECONDARY_EXEC_ENABLE_EPT; +} + void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot) { + TEST_REQUIRE(kvm_vm_has_ept(vm)); + vmx->eptp = (void *)vm_vaddr_alloc_page(vm); vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c index b1905d280ef5..e0004bd26536 100644 --- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c @@ -14,6 +14,9 @@ #include "kvm_util.h" #include "processor.h" +/* VMCALL and VMMCALL are both 3-byte opcodes. */ +#define HYPERCALL_INSN_SIZE 3 + static bool ud_expected; static void guest_ud_handler(struct ex_regs *regs) @@ -22,7 +25,7 @@ static void guest_ud_handler(struct ex_regs *regs) GUEST_DONE(); } -extern unsigned char svm_hypercall_insn; +extern uint8_t svm_hypercall_insn[HYPERCALL_INSN_SIZE]; static uint64_t svm_do_sched_yield(uint8_t apic_id) { uint64_t ret; @@ -39,7 +42,7 @@ static uint64_t svm_do_sched_yield(uint8_t apic_id) return ret; } -extern unsigned char vmx_hypercall_insn; +extern uint8_t vmx_hypercall_insn[HYPERCALL_INSN_SIZE]; static uint64_t vmx_do_sched_yield(uint8_t apic_id) { uint64_t ret; @@ -56,30 +59,20 @@ static uint64_t vmx_do_sched_yield(uint8_t apic_id) return ret; } -static void assert_hypercall_insn(unsigned char *exp_insn, unsigned char *obs_insn) -{ - uint32_t exp = 0, obs = 0; - - memcpy(&exp, exp_insn, sizeof(exp)); - memcpy(&obs, obs_insn, sizeof(obs)); - - GUEST_ASSERT_EQ(exp, obs); -} - static void guest_main(void) { - unsigned char *native_hypercall_insn, *hypercall_insn; + uint8_t *native_hypercall_insn, *hypercall_insn; uint8_t apic_id; apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); if (is_intel_cpu()) { - native_hypercall_insn = &vmx_hypercall_insn; - hypercall_insn = &svm_hypercall_insn; + native_hypercall_insn = vmx_hypercall_insn; + hypercall_insn = svm_hypercall_insn; svm_do_sched_yield(apic_id); } else if (is_amd_cpu()) { - native_hypercall_insn = &svm_hypercall_insn; - hypercall_insn = &vmx_hypercall_insn; + native_hypercall_insn = svm_hypercall_insn; + hypercall_insn = vmx_hypercall_insn; vmx_do_sched_yield(apic_id); } else { GUEST_ASSERT(0); @@ -87,8 +80,13 @@ static void guest_main(void) return; } + /* + * The hypercall didn't #UD (guest_ud_handler() signals "done" if a #UD + * occurs). Verify that a #UD is NOT expected and that KVM patched in + * the native hypercall. + */ GUEST_ASSERT(!ud_expected); - assert_hypercall_insn(native_hypercall_insn, hypercall_insn); + GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn, HYPERCALL_INSN_SIZE)); GUEST_DONE(); } diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 65e53eb0840b..607b8d7e3ea3 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -75,7 +75,9 @@ USERCOPY_KERNEL STACKLEAK_ERASING OK: the rest of the thread stack is properly erased CFI_FORWARD_PROTO CFI_BACKWARD call trace:|ok: control flow unchanged -FORTIFIED_STRSCPY -FORTIFIED_OBJECT -FORTIFIED_SUBOBJECT +FORTIFY_STRSCPY detected buffer overflow +FORTIFY_STR_OBJECT detected buffer overflow +FORTIFY_STR_MEMBER detected buffer overflow +FORTIFY_MEM_OBJECT detected buffer overflow +FORTIFY_MEM_MEMBER detected field-spanning write PPC_SLB_MULTIHIT Recovered diff --git a/tools/testing/selftests/nolibc/.gitignore b/tools/testing/selftests/nolibc/.gitignore new file mode 100644 index 000000000000..4696df589d68 --- /dev/null +++ b/tools/testing/selftests/nolibc/.gitignore @@ -0,0 +1,4 @@ +/initramfs/ +/nolibc-test +/run.out +/sysroot/ diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile new file mode 100644 index 000000000000..69ea659caca9 --- /dev/null +++ b/tools/testing/selftests/nolibc/Makefile @@ -0,0 +1,135 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for nolibc tests +include ../../../scripts/Makefile.include + +# we're in ".../tools/testing/selftests/nolibc" +ifeq ($(srctree),) +srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR))) +endif + +ifeq ($(ARCH),) +include $(srctree)/scripts/subarch.include +ARCH = $(SUBARCH) +endif + +# kernel image names by architecture +IMAGE_i386 = arch/x86/boot/bzImage +IMAGE_x86 = arch/x86/boot/bzImage +IMAGE_arm64 = arch/arm64/boot/Image +IMAGE_arm = arch/arm/boot/zImage +IMAGE_mips = vmlinuz +IMAGE_riscv = arch/riscv/boot/Image +IMAGE = $(IMAGE_$(ARCH)) +IMAGE_NAME = $(notdir $(IMAGE)) + +# default kernel configurations that appear to be usable +DEFCONFIG_i386 = defconfig +DEFCONFIG_x86 = defconfig +DEFCONFIG_arm64 = defconfig +DEFCONFIG_arm = multi_v7_defconfig +DEFCONFIG_mips = malta_defconfig +DEFCONFIG_riscv = defconfig +DEFCONFIG = $(DEFCONFIG_$(ARCH)) + +# optional tests to run (default = all) +TEST = + +# QEMU_ARCH: arch names used by qemu +QEMU_ARCH_i386 = i386 +QEMU_ARCH_x86 = x86_64 +QEMU_ARCH_arm64 = aarch64 +QEMU_ARCH_arm = arm +QEMU_ARCH_mips = mipsel # works with malta_defconfig +QEMU_ARCH_riscv = riscv64 +QEMU_ARCH = $(QEMU_ARCH_$(ARCH)) + +# QEMU_ARGS : some arch-specific args to pass to qemu +QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS = $(QEMU_ARGS_$(ARCH)) + +# OUTPUT is only set when run from the main makefile, otherwise +# it defaults to this nolibc directory. +OUTPUT ?= $(CURDIR)/ + +ifeq ($(V),1) +Q= +else +Q=@ +endif + +CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables +LDFLAGS := -s + +help: + @echo "Supported targets under selftests/nolibc:" + @echo " all call the \"run\" target below" + @echo " help this help" + @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" + @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" + @echo " initramfs prepare the initramfs with nolibc-test" + @echo " defconfig create a fresh new default config (uses \$$ARCH)" + @echo " kernel (re)build the kernel with the initramfs (uses \$$ARCH)" + @echo " run runs the kernel in QEMU after building it (uses \$$ARCH, \$$TEST)" + @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$ARCH, \$$TEST)" + @echo " clean clean the sysroot, initramfs, build and output files" + @echo "" + @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST." + @echo "" + @echo "Currently using the following variables:" + @echo " ARCH = $(ARCH)" + @echo " CROSS_COMPILE = $(CROSS_COMPILE)" + @echo " CC = $(CC)" + @echo " OUTPUT = $(OUTPUT)" + @echo " TEST = $(TEST)" + @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$ARCH]" + @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$ARCH]" + @echo "" + +all: run + +sysroot: sysroot/$(ARCH)/include + +sysroot/$(ARCH)/include: + $(QUIET_MKDIR)mkdir -p sysroot + $(Q)$(MAKE) -C ../../../include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone + $(Q)mv sysroot/sysroot sysroot/$(ARCH) + +nolibc-test: nolibc-test.c sysroot/$(ARCH)/include + $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ + -nostdlib -static -Isysroot/$(ARCH)/include $< -lgcc + +initramfs: nolibc-test + $(QUIET_MKDIR)mkdir -p initramfs + $(call QUIET_INSTALL, initramfs/init) + $(Q)cp nolibc-test initramfs/init + +defconfig: + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare + +kernel: initramfs + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs + +# run the tests after building the kernel +run: kernel + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)grep -w FAIL "$(CURDIR)/run.out" && echo "See all results in $(CURDIR)/run.out" || echo "$$(grep -c ^[0-9].*OK $(CURDIR)/run.out) test(s) passed." + +# re-run the tests from an existing kernel +rerun: + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)grep -w FAIL "$(CURDIR)/run.out" && echo "See all results in $(CURDIR)/run.out" || echo "$$(grep -c ^[0-9].*OK $(CURDIR)/run.out) test(s) passed." + +clean: + $(call QUIET_CLEAN, sysroot) + $(Q)rm -rf sysroot + $(call QUIET_CLEAN, nolibc-test) + $(Q)rm -f nolibc-test + $(call QUIET_CLEAN, initramfs) + $(Q)rm -rf initramfs + $(call QUIET_CLEAN, run.out) + $(Q)rm -rf run.out diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c new file mode 100644 index 000000000000..78bced95ac63 --- /dev/null +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -0,0 +1,757 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +/* platform-specific include files coming from the compiler */ +#include <limits.h> + +/* libc-specific include files + * The program may be built in 3 ways: + * $(CC) -nostdlib -include /path/to/nolibc.h => NOLIBC already defined + * $(CC) -nostdlib -I/path/to/nolibc/sysroot => _NOLIBC_* guards are present + * $(CC) with default libc => NOLIBC* never defined + */ +#ifndef NOLIBC +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifndef _NOLIBC_STDIO_H +/* standard libcs need more includes */ +#include <linux/reboot.h> +#include <sys/io.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/reboot.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/sysmacros.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <sched.h> +#include <signal.h> +#include <stdarg.h> +#include <unistd.h> +#endif +#endif + +/* will be used by nolibc by getenv() */ +char **environ; + +/* definition of a series of tests */ +struct test { + const char *name; // test name + int (*func)(int min, int max); // handler +}; + +#ifndef _NOLIBC_STDLIB_H +char *itoa(int i) +{ + static char buf[12]; + int ret; + + ret = snprintf(buf, sizeof(buf), "%d", i); + return (ret >= 0 && ret < sizeof(buf)) ? buf : "#err"; +} +#endif + +#define CASE_ERR(err) \ + case err: return #err + +/* returns the error name (e.g. "ENOENT") for common errors, "SUCCESS" for 0, + * or the decimal value for less common ones. + */ +const char *errorname(int err) +{ + switch (err) { + case 0: return "SUCCESS"; + CASE_ERR(EPERM); + CASE_ERR(ENOENT); + CASE_ERR(ESRCH); + CASE_ERR(EINTR); + CASE_ERR(EIO); + CASE_ERR(ENXIO); + CASE_ERR(E2BIG); + CASE_ERR(ENOEXEC); + CASE_ERR(EBADF); + CASE_ERR(ECHILD); + CASE_ERR(EAGAIN); + CASE_ERR(ENOMEM); + CASE_ERR(EACCES); + CASE_ERR(EFAULT); + CASE_ERR(ENOTBLK); + CASE_ERR(EBUSY); + CASE_ERR(EEXIST); + CASE_ERR(EXDEV); + CASE_ERR(ENODEV); + CASE_ERR(ENOTDIR); + CASE_ERR(EISDIR); + CASE_ERR(EINVAL); + CASE_ERR(ENFILE); + CASE_ERR(EMFILE); + CASE_ERR(ENOTTY); + CASE_ERR(ETXTBSY); + CASE_ERR(EFBIG); + CASE_ERR(ENOSPC); + CASE_ERR(ESPIPE); + CASE_ERR(EROFS); + CASE_ERR(EMLINK); + CASE_ERR(EPIPE); + CASE_ERR(EDOM); + CASE_ERR(ERANGE); + CASE_ERR(ENOSYS); + default: + return itoa(err); + } +} + +static int pad_spc(int llen, int cnt, const char *fmt, ...) +{ + va_list args; + int len; + int ret; + + for (len = 0; len < cnt - llen; len++) + putchar(' '); + + va_start(args, fmt); + ret = vfprintf(stdout, fmt, args); + va_end(args); + return ret < 0 ? ret : ret + len; +} + +/* The tests below are intended to be used by the macroes, which evaluate + * expression <expr>, print the status to stdout, and update the "ret" + * variable to count failures. The functions themselves return the number + * of failures, thus either 0 or 1. + */ + +#define EXPECT_ZR(cond, expr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_zr(expr, llen); } while (0) + +static int expect_zr(int expr, int llen) +{ + int ret = !(expr == 0); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_NZ(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_nz(expr, llen; } while (0) + +static int expect_nz(int expr, int llen) +{ + int ret = !(expr != 0); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_EQ(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_eq(expr, llen, val); } while (0) + +static int expect_eq(int expr, int llen, int val) +{ + int ret = !(expr == val); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_NE(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_ne(expr, llen, val); } while (0) + +static int expect_ne(int expr, int llen, int val) +{ + int ret = !(expr != val); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_GE(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_ge(expr, llen, val); } while (0) + +static int expect_ge(int expr, int llen, int val) +{ + int ret = !(expr >= val); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_GT(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_gt(expr, llen, val); } while (0) + +static int expect_gt(int expr, int llen, int val) +{ + int ret = !(expr > val); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_LE(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_le(expr, llen, val); } while (0) + +static int expect_le(int expr, int llen, int val) +{ + int ret = !(expr <= val); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_LT(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_lt(expr, llen, val); } while (0) + +static int expect_lt(int expr, int llen, int val) +{ + int ret = !(expr < val); + + llen += printf(" = %d ", expr); + pad_spc(llen, 40, ret ? "[FAIL]\n" : " [OK]\n"); + return ret; +} + + +#define EXPECT_SYSZR(cond, expr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_syszr(expr, llen); } while (0) + +static int expect_syszr(int expr, int llen) +{ + int ret = 0; + + if (expr) { + ret = 1; + llen += printf(" = %d %s ", expr, errorname(errno)); + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += printf(" = %d ", expr); + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_SYSEQ(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_syseq(expr, llen, val); } while (0) + +static int expect_syseq(int expr, int llen, int val) +{ + int ret = 0; + + if (expr != val) { + ret = 1; + llen += printf(" = %d %s ", expr, errorname(errno)); + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += printf(" = %d ", expr); + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_SYSNE(cond, expr, val) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_sysne(expr, llen, val); } while (0) + +static int expect_sysne(int expr, int llen, int val) +{ + int ret = 0; + + if (expr == val) { + ret = 1; + llen += printf(" = %d %s ", expr, errorname(errno)); + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += printf(" = %d ", expr); + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_SYSER(cond, expr, expret, experr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_syserr(expr, expret, experr, llen); } while (0) + +static int expect_syserr(int expr, int expret, int experr, int llen) +{ + int ret = 0; + int _errno = errno; + + llen += printf(" = %d %s ", expr, errorname(_errno)); + if (expr != expret || _errno != experr) { + ret = 1; + llen += printf(" != (%d %s) ", expret, errorname(experr)); + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_PTRZR(cond, expr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_ptrzr(expr, llen); } while (0) + +static int expect_ptrzr(const void *expr, int llen) +{ + int ret = 0; + + llen += printf(" = <%p> ", expr); + if (expr) { + ret = 1; + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_PTRNZ(cond, expr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_ptrnz(expr, llen); } while (0) + +static int expect_ptrnz(const void *expr, int llen) +{ + int ret = 0; + + llen += printf(" = <%p> ", expr); + if (!expr) { + ret = 1; + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_STRZR(cond, expr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_strzr(expr, llen); } while (0) + +static int expect_strzr(const char *expr, int llen) +{ + int ret = 0; + + llen += printf(" = <%s> ", expr); + if (expr) { + ret = 1; + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_STRNZ(cond, expr) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_strnz(expr, llen); } while (0) + +static int expect_strnz(const char *expr, int llen) +{ + int ret = 0; + + llen += printf(" = <%s> ", expr); + if (!expr) { + ret = 1; + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_STREQ(cond, expr, cmp) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_streq(expr, llen, cmp); } while (0) + +static int expect_streq(const char *expr, int llen, const char *cmp) +{ + int ret = 0; + + llen += printf(" = <%s> ", expr); + if (strcmp(expr, cmp) != 0) { + ret = 1; + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +#define EXPECT_STRNE(cond, expr, cmp) \ + do { if (!cond) pad_spc(llen, 40, "[SKIPPED]\n"); else ret += expect_strne(expr, llen, cmp); } while (0) + +static int expect_strne(const char *expr, int llen, const char *cmp) +{ + int ret = 0; + + llen += printf(" = <%s> ", expr); + if (strcmp(expr, cmp) == 0) { + ret = 1; + llen += pad_spc(llen, 40, "[FAIL]\n"); + } else { + llen += pad_spc(llen, 40, " [OK]\n"); + } + return ret; +} + + +/* declare tests based on line numbers. There must be exactly one test per line. */ +#define CASE_TEST(name) \ + case __LINE__: llen += printf("%d %s", test, #name); + + +/* used by some syscall tests below */ +int test_getdents64(const char *dir) +{ + char buffer[4096]; + int fd, ret; + int err; + + ret = fd = open(dir, O_RDONLY | O_DIRECTORY, 0); + if (ret < 0) + return ret; + + ret = getdents64(fd, (void *)buffer, sizeof(buffer)); + err = errno; + close(fd); + + errno = err; + return ret; +} + +/* Run syscall tests between IDs <min> and <max>. + * Return 0 on success, non-zero on failure. + */ +int run_syscall(int min, int max) +{ + struct stat stat_buf; + int proc; + int test; + int tmp; + int ret = 0; + void *p1, *p2; + + /* <proc> indicates whether or not /proc is mounted */ + proc = stat("/proc", &stat_buf) == 0; + + for (test = min; test >= 0 && test <= max; test++) { + int llen = 0; // line length + + /* avoid leaving empty lines below, this will insert holes into + * test numbers. + */ + switch (test + __LINE__ + 1) { + CASE_TEST(getpid); EXPECT_SYSNE(1, getpid(), -1); break; + CASE_TEST(getppid); EXPECT_SYSNE(1, getppid(), -1); break; +#ifdef NOLIBC + CASE_TEST(gettid); EXPECT_SYSNE(1, gettid(), -1); break; +#endif + CASE_TEST(getpgid_self); EXPECT_SYSNE(1, getpgid(0), -1); break; + CASE_TEST(getpgid_bad); EXPECT_SYSER(1, getpgid(-1), -1, ESRCH); break; + CASE_TEST(kill_0); EXPECT_SYSZR(1, kill(getpid(), 0)); break; + CASE_TEST(kill_CONT); EXPECT_SYSZR(1, kill(getpid(), 0)); break; + CASE_TEST(kill_BADPID); EXPECT_SYSER(1, kill(INT_MAX, 0), -1, ESRCH); break; + CASE_TEST(sbrk); if ((p1 = p2 = sbrk(4096)) != (void *)-1) p2 = sbrk(-4096); EXPECT_SYSZR(1, (p2 == (void *)-1) || p2 == p1); break; + CASE_TEST(brk); EXPECT_SYSZR(1, brk(sbrk(0))); break; + CASE_TEST(chdir_root); EXPECT_SYSZR(1, chdir("/")); break; + CASE_TEST(chdir_dot); EXPECT_SYSZR(1, chdir(".")); break; + CASE_TEST(chdir_blah); EXPECT_SYSER(1, chdir("/blah"), -1, ENOENT); break; + CASE_TEST(chmod_net); EXPECT_SYSZR(proc, chmod("/proc/self/net", 0555)); break; + CASE_TEST(chmod_self); EXPECT_SYSER(proc, chmod("/proc/self", 0555), -1, EPERM); break; + CASE_TEST(chown_self); EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break; + CASE_TEST(chroot_root); EXPECT_SYSZR(1, chroot("/")); break; + CASE_TEST(chroot_blah); EXPECT_SYSER(1, chroot("/proc/self/blah"), -1, ENOENT); break; + CASE_TEST(chroot_exe); EXPECT_SYSER(proc, chroot("/proc/self/exe"), -1, ENOTDIR); break; + CASE_TEST(close_m1); EXPECT_SYSER(1, close(-1), -1, EBADF); break; + CASE_TEST(close_dup); EXPECT_SYSZR(1, close(dup(0))); break; + CASE_TEST(dup_0); tmp = dup(0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; + CASE_TEST(dup_m1); tmp = dup(-1); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break; + CASE_TEST(dup2_0); tmp = dup2(0, 100); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; + CASE_TEST(dup2_m1); tmp = dup2(-1, 100); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break; + CASE_TEST(dup3_0); tmp = dup3(0, 100, 0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; + CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break; + CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break; + CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break; + CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break; + CASE_TEST(gettimeofday_null); EXPECT_SYSZR(1, gettimeofday(NULL, NULL)); break; +#ifdef NOLIBC + CASE_TEST(gettimeofday_bad1); EXPECT_SYSER(1, gettimeofday((void *)1, NULL), -1, EFAULT); break; + CASE_TEST(gettimeofday_bad2); EXPECT_SYSER(1, gettimeofday(NULL, (void *)1), -1, EFAULT); break; + CASE_TEST(gettimeofday_bad2); EXPECT_SYSER(1, gettimeofday(NULL, (void *)1), -1, EFAULT); break; +#endif + CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break; + CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break; + CASE_TEST(link_root1); EXPECT_SYSER(1, link("/", "/"), -1, EEXIST); break; + CASE_TEST(link_blah); EXPECT_SYSER(1, link("/proc/self/blah", "/blah"), -1, ENOENT); break; + CASE_TEST(link_dir); EXPECT_SYSER(1, link("/", "/blah"), -1, EPERM); break; + CASE_TEST(link_cross); EXPECT_SYSER(proc, link("/proc/self/net", "/blah"), -1, EXDEV); break; + CASE_TEST(lseek_m1); EXPECT_SYSER(1, lseek(-1, 0, SEEK_SET), -1, EBADF); break; + CASE_TEST(lseek_0); EXPECT_SYSER(1, lseek(0, 0, SEEK_SET), -1, ESPIPE); break; + CASE_TEST(mkdir_root); EXPECT_SYSER(1, mkdir("/", 0755), -1, EEXIST); break; + CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break; + CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break; + CASE_TEST(poll_null); EXPECT_SYSZR(1, poll(NULL, 0, 0)); break; + CASE_TEST(poll_stdout); EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break; + CASE_TEST(poll_fault); EXPECT_SYSER(1, poll((void *)1, 1, 0), -1, EFAULT); break; + CASE_TEST(read_badf); EXPECT_SYSER(1, read(-1, &tmp, 1), -1, EBADF); break; + CASE_TEST(sched_yield); EXPECT_SYSZR(1, sched_yield()); break; + CASE_TEST(select_null); EXPECT_SYSZR(1, ({ struct timeval tv = { 0 }; select(0, NULL, NULL, NULL, &tv); })); break; + CASE_TEST(select_stdout); EXPECT_SYSNE(1, ({ fd_set fds; FD_ZERO(&fds); FD_SET(1, &fds); select(2, NULL, &fds, NULL, NULL); }), -1); break; + CASE_TEST(select_fault); EXPECT_SYSER(1, select(1, (void *)1, NULL, NULL, 0), -1, EFAULT); break; + CASE_TEST(stat_blah); EXPECT_SYSER(1, stat("/proc/self/blah", &stat_buf), -1, ENOENT); break; + CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break; + CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break; + CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break; + CASE_TEST(unlink_blah); EXPECT_SYSER(1, unlink("/proc/self/blah"), -1, ENOENT); break; + CASE_TEST(wait_child); EXPECT_SYSER(1, wait(&tmp), -1, ECHILD); break; + CASE_TEST(waitpid_min); EXPECT_SYSER(1, waitpid(INT_MIN, &tmp, WNOHANG), -1, ESRCH); break; + CASE_TEST(waitpid_child); EXPECT_SYSER(1, waitpid(getpid(), &tmp, WNOHANG), -1, ECHILD); break; + CASE_TEST(write_badf); EXPECT_SYSER(1, write(-1, &tmp, 1), -1, EBADF); break; + CASE_TEST(write_zero); EXPECT_SYSZR(1, write(1, &tmp, 0)); break; + case __LINE__: + return ret; /* must be last */ + /* note: do not set any defaults so as to permit holes above */ + } + } + return ret; +} + +int run_stdlib(int min, int max) +{ + int test; + int tmp; + int ret = 0; + void *p1, *p2; + + for (test = min; test >= 0 && test <= max; test++) { + int llen = 0; // line length + + /* avoid leaving empty lines below, this will insert holes into + * test numbers. + */ + switch (test + __LINE__ + 1) { + CASE_TEST(getenv_TERM); EXPECT_STRNZ(1, getenv("TERM")); break; + CASE_TEST(getenv_blah); EXPECT_STRZR(1, getenv("blah")); break; + CASE_TEST(setcmp_blah_blah); EXPECT_EQ(1, strcmp("blah", "blah"), 0); break; + CASE_TEST(setcmp_blah_blah2); EXPECT_NE(1, strcmp("blah", "blah2"), 0); break; + CASE_TEST(setncmp_blah_blah); EXPECT_EQ(1, strncmp("blah", "blah", 10), 0); break; + CASE_TEST(setncmp_blah_blah4); EXPECT_EQ(1, strncmp("blah", "blah4", 4), 0); break; + CASE_TEST(setncmp_blah_blah5); EXPECT_NE(1, strncmp("blah", "blah5", 5), 0); break; + CASE_TEST(setncmp_blah_blah6); EXPECT_NE(1, strncmp("blah", "blah6", 6), 0); break; + CASE_TEST(strchr_foobar_o); EXPECT_STREQ(1, strchr("foobar", 'o'), "oobar"); break; + CASE_TEST(strchr_foobar_z); EXPECT_STRZR(1, strchr("foobar", 'z')); break; + CASE_TEST(strrchr_foobar_o); EXPECT_STREQ(1, strrchr("foobar", 'o'), "obar"); break; + CASE_TEST(strrchr_foobar_z); EXPECT_STRZR(1, strrchr("foobar", 'z')); break; + case __LINE__: + return ret; /* must be last */ + /* note: do not set any defaults so as to permit holes above */ + } + } + return ret; +} + +/* prepare what needs to be prepared for pid 1 (stdio, /dev, /proc, etc) */ +int prepare(void) +{ + struct stat stat_buf; + + /* It's possible that /dev doesn't even exist or was not mounted, so + * we'll try to create it, mount it, or create minimal entries into it. + * We want at least /dev/null and /dev/console. + */ + if (stat("/dev/.", &stat_buf) == 0 || mkdir("/dev", 0755) == 0) { + if (stat("/dev/console", &stat_buf) != 0 || + stat("/dev/null", &stat_buf) != 0) { + /* try devtmpfs first, otherwise fall back to manual creation */ + if (mount("/dev", "/dev", "devtmpfs", 0, 0) != 0) { + mknod("/dev/console", 0600 | S_IFCHR, makedev(5, 1)); + mknod("/dev/null", 0666 | S_IFCHR, makedev(1, 3)); + } + } + } + + /* If no /dev/console was found before calling init, stdio is closed so + * we need to reopen it from /dev/console. If it failed above, it will + * still fail here and we cannot emit a message anyway. + */ + if (close(dup(1)) == -1) { + int fd = open("/dev/console", O_RDWR); + + if (fd >= 0) { + if (fd != 0) + dup2(fd, 0); + if (fd != 1) + dup2(fd, 1); + if (fd != 2) + dup2(fd, 2); + if (fd > 2) + close(fd); + puts("\nSuccessfully reopened /dev/console."); + } + } + + /* try to mount /proc if not mounted. Silently fail otherwise */ + if (stat("/proc/.", &stat_buf) == 0 || mkdir("/proc", 0755) == 0) { + if (stat("/proc/self", &stat_buf) != 0) + mount("/proc", "/proc", "proc", 0, 0); + } + + return 0; +} + +/* This is the definition of known test names, with their functions */ +static struct test test_names[] = { + /* add new tests here */ + { .name = "syscall", .func = run_syscall }, + { .name = "stdlib", .func = run_stdlib }, + { 0 } +}; + +int main(int argc, char **argv, char **envp) +{ + int min = 0; + int max = __INT_MAX__; + int ret = 0; + int err; + int idx; + char *test; + + environ = envp; + + /* when called as init, it's possible that no console was opened, for + * example if no /dev file system was provided. We'll check that fd#1 + * was opened, and if not we'll attempt to create and open /dev/console + * and /dev/null that we'll use for later tests. + */ + if (getpid() == 1) + prepare(); + + /* the definition of a series of tests comes from either argv[1] or the + * "NOLIBC_TEST" environment variable. It's made of a comma-delimited + * series of test names and optional ranges: + * syscall:5-15[:.*],stdlib:8-10 + */ + test = argv[1]; + if (!test) + test = getenv("NOLIBC_TEST"); + + if (test) { + char *comma, *colon, *dash, *value; + + do { + comma = strchr(test, ','); + if (comma) + *(comma++) = '\0'; + + colon = strchr(test, ':'); + if (colon) + *(colon++) = '\0'; + + for (idx = 0; test_names[idx].name; idx++) { + if (strcmp(test, test_names[idx].name) == 0) + break; + } + + if (test_names[idx].name) { + /* The test was named, it will be called at least + * once. We may have an optional range at <colon> + * here, which defaults to the full range. + */ + do { + min = 0; max = __INT_MAX__; + value = colon; + if (value && *value) { + colon = strchr(value, ':'); + if (colon) + *(colon++) = '\0'; + + dash = strchr(value, '-'); + if (dash) + *(dash++) = '\0'; + + /* support :val: :min-max: :min-: :-max: */ + if (*value) + min = atoi(value); + if (!dash) + max = min; + else if (*dash) + max = atoi(dash); + + value = colon; + } + + /* now's time to call the test */ + printf("Running test '%s'\n", test_names[idx].name); + err = test_names[idx].func(min, max); + ret += err; + printf("Errors during this test: %d\n\n", err); + } while (colon && *colon); + } else + printf("Ignoring unknown test name '%s'\n", test); + + test = comma; + } while (test && *test); + } else { + /* no test mentioned, run everything */ + for (idx = 0; test_names[idx].name; idx++) { + printf("Running test '%s'\n", test_names[idx].name); + err = test_names[idx].func(min, max); + ret += err; + printf("Errors during this test: %d\n\n", err); + } + } + + printf("Total number of errors: %d\n", ret); + + if (getpid() == 1) { + /* we're running as init, there's no other process on the + * system, thus likely started from a VM for a quick check. + * Exiting will provoke a kernel panic that may be reported + * as an error by Qemu or the hypervisor, while stopping + * cleanly will often be reported as a success. This allows + * to use the output of this program for bisecting kernels. + */ + printf("Leaving init with final status: %d\n", !!ret); + if (ret == 0) + reboot(LINUX_REBOOT_CMD_POWER_OFF); +#if defined(__x86_64__) + /* QEMU started with "-device isa-debug-exit -no-reboot" will + * exit with status code 2N+1 when N is written to 0x501. We + * hard-code the syscall here as it's arch-dependent. + */ +#if defined(_NOLIBC_SYS_H) + else if (my_syscall3(__NR_ioperm, 0x501, 1, 1) == 0) +#else + else if (ioperm(0x501, 1, 1) == 0) +#endif + asm volatile ("outb %%al, %%dx" :: "d"(0x501), "a"(0)); + /* if it does nothing, fall back to the regular panic */ +#endif + } + + printf("Exiting with status %d\n", !!ret); + return !!ret; +} diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 363b98228301..5d3440f474dd 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -14,6 +14,7 @@ struct virtio_device { u64 features; struct list_head vqs; spinlock_t vqs_list_lock; + const struct virtio_config_ops *config; }; struct virtqueue { @@ -23,7 +24,9 @@ struct virtqueue { struct virtio_device *vdev; unsigned int index; unsigned int num_free; + unsigned int num_max; void *priv; + bool reset; }; /* Interfaces exported by virtio_ring. */ diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index f2640e505c4e..2a8a70e2a950 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -3,6 +3,11 @@ #include <linux/virtio.h> #include <uapi/linux/virtio_config.h> +struct virtio_config_ops { + int (*disable_vq_and_reset)(struct virtqueue *vq); + int (*enable_vq_after_reset)(struct virtqueue *vq); +}; + /* * __virtio_test_bit - helper to test feature bits. For use by transports. * Devices should normally use virtio_has_feature, |