diff options
Diffstat (limited to 'tools')
59 files changed, 4170 insertions, 449 deletions
diff --git a/tools/Makefile b/tools/Makefile index f10b64d8c674..daa8fb3e4363 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -85,7 +85,7 @@ tmon: FORCE freefall: FORCE $(call descend,laptop/$@) -all: acpi cgroup cpupower hv firewire lguest \ +all: acpi cgroup cpupower gpio hv firewire lguest \ perf selftests turbostat usb \ virtio vm net x86_energy_perf_policy \ tmon freefall objtool @@ -96,7 +96,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install objtool_install: +cgroup_install firewire_install gpio_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install objtool_install: $(call descend,$(@:_install=),install) selftests_install: @@ -114,7 +114,8 @@ freefall_install: kvm_stat_install: $(call descend,kvm/$(@:_install=),install) -install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ +install: acpi_install cgroup_install cpupower_install gpio_install \ + hv_install firewire_install lguest_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install net_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install diff --git a/tools/gpio/Build b/tools/gpio/Build new file mode 100644 index 000000000000..620c1937d957 --- /dev/null +++ b/tools/gpio/Build @@ -0,0 +1,3 @@ +lsgpio-y += lsgpio.o gpio-utils.o +gpio-hammer-y += gpio-hammer.o gpio-utils.o +gpio-event-mon-y += gpio-event-mon.o gpio-utils.o diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index c155d6bc47a7..250a891e6ef0 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -1,12 +1,75 @@ +include ../scripts/Makefile.include + +bindir ?= /usr/bin + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +# Do not use make's built-in rules +# (this improves performance and avoids hard-to-debug behaviour); +MAKEFLAGS += -r + CC = $(CROSS_COMPILE)gcc -CFLAGS += -O2 -Wall -g -D_GNU_SOURCE +LD = $(CROSS_COMPILE)ld +CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include + +ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon +ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) + +all: $(ALL_PROGRAMS) -all: lsgpio +export srctree OUTPUT CC LD CFLAGS +include $(srctree)/tools/build/Makefile.include -lsgpio: lsgpio.o gpio-utils.o +# +# We need the following to be outside of kernel tree +# +$(OUTPUT)include/linux/gpio.h: ../../include/uapi/linux/gpio.h + mkdir -p $(OUTPUT)include/linux 2>&1 || true + ln -sf $(CURDIR)/../../include/uapi/linux/gpio.h $@ -%.o: %.c gpio-utils.h +prepare: $(OUTPUT)include/linux/gpio.h + +# +# lsgpio +# +LSGPIO_IN := $(OUTPUT)lsgpio-in.o +$(LSGPIO_IN): prepare FORCE + $(Q)$(MAKE) $(build)=lsgpio +$(OUTPUT)lsgpio: $(LSGPIO_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ + +# +# gpio-hammer +# +GPIO_HAMMER_IN := $(OUTPUT)gpio-hammer-in.o +$(GPIO_HAMMER_IN): prepare FORCE + $(Q)$(MAKE) $(build)=gpio-hammer +$(OUTPUT)gpio-hammer: $(GPIO_HAMMER_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ + +# +# gpio-event-mon +# +GPIO_EVENT_MON_IN := $(OUTPUT)gpio-event-mon-in.o +$(GPIO_EVENT_MON_IN): prepare FORCE + $(Q)$(MAKE) $(build)=gpio-event-mon +$(OUTPUT)gpio-event-mon: $(GPIO_EVENT_MON_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ -.PHONY: clean clean: - rm -f *.o lsgpio + rm -f $(ALL_PROGRAMS) + rm -f $(OUTPUT)include/linux/gpio.h + find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete + +install: $(ALL_PROGRAMS) + install -d -m 755 $(DESTDIR)$(bindir); \ + for program in $(ALL_PROGRAMS); do \ + install $$program $(DESTDIR)$(bindir); \ + done + +FORCE: + +.PHONY: all install clean FORCE prepare diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c new file mode 100644 index 000000000000..448ed96b3b4f --- /dev/null +++ b/tools/gpio/gpio-event-mon.c @@ -0,0 +1,192 @@ +/* + * gpio-hammer - example swiss army knife to shake GPIO lines on a system + * + * Copyright (C) 2016 Linus Walleij + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * Usage: + * gpio-event-mon -n <device-name> -o <offset> + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdio.h> +#include <dirent.h> +#include <errno.h> +#include <string.h> +#include <poll.h> +#include <fcntl.h> +#include <getopt.h> +#include <inttypes.h> +#include <sys/ioctl.h> +#include <linux/gpio.h> + +int monitor_device(const char *device_name, + unsigned int line, + u_int32_t handleflags, + u_int32_t eventflags, + unsigned int loops) +{ + struct gpioevent_request req; + struct gpiohandle_data data; + char *chrdev_name; + int fd; + int ret; + int i = 0; + + ret = asprintf(&chrdev_name, "/dev/%s", device_name); + if (ret < 0) + return -ENOMEM; + + fd = open(chrdev_name, 0); + if (fd == -1) { + ret = -errno; + fprintf(stderr, "Failed to open %s\n", chrdev_name); + goto exit_close_error; + } + + req.lineoffset = line; + req.handleflags = handleflags; + req.eventflags = eventflags; + strcpy(req.consumer_label, "gpio-event-mon"); + + ret = ioctl(fd, GPIO_GET_LINEEVENT_IOCTL, &req); + if (ret == -1) { + ret = -errno; + fprintf(stderr, "Failed to issue GET EVENT " + "IOCTL (%d)\n", + ret); + goto exit_close_error; + } + + /* Read initial states */ + ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data); + if (ret == -1) { + ret = -errno; + fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE " + "VALUES IOCTL (%d)\n", + ret); + goto exit_close_error; + } + + fprintf(stdout, "Monitoring line %d on %s\n", line, device_name); + fprintf(stdout, "Initial line value: %d\n", data.values[0]); + + while (1) { + struct gpioevent_data event; + + ret = read(req.fd, &event, sizeof(event)); + if (ret == -1) { + if (errno == -EAGAIN) { + fprintf(stderr, "nothing available\n"); + continue; + } else { + ret = -errno; + fprintf(stderr, "Failed to read event (%d)\n", + ret); + break; + } + } + + if (ret != sizeof(event)) { + fprintf(stderr, "Reading event failed\n"); + ret = -EIO; + break; + } + fprintf(stdout, "GPIO EVENT %" PRIu64 ": ", event.timestamp); + switch (event.id) { + case GPIOEVENT_EVENT_RISING_EDGE: + fprintf(stdout, "rising edge"); + break; + case GPIOEVENT_EVENT_FALLING_EDGE: + fprintf(stdout, "falling edge"); + break; + default: + fprintf(stdout, "unknown event"); + } + fprintf(stdout, "\n"); + + i++; + if (i == loops) + break; + } + +exit_close_error: + if (close(fd) == -1) + perror("Failed to close GPIO character device file"); + free(chrdev_name); + return ret; +} + +void print_usage(void) +{ + fprintf(stderr, "Usage: gpio-event-mon [options]...\n" + "Listen to events on GPIO lines, 0->1 1->0\n" + " -n <name> Listen on GPIOs on a named device (must be stated)\n" + " -o <n> Offset to monitor\n" + " -d Set line as open drain\n" + " -s Set line as open source\n" + " -r Listen for rising edges\n" + " -f Listen for falling edges\n" + " [-c <n>] Do <n> loops (optional, infinite loop if not stated)\n" + " -? This helptext\n" + "\n" + "Example:\n" + "gpio-event-mon -n gpiochip0 -o 4 -r -f\n" + ); +} + +int main(int argc, char **argv) +{ + const char *device_name = NULL; + unsigned int line = -1; + unsigned int loops = 0; + u_int32_t handleflags = GPIOHANDLE_REQUEST_INPUT; + u_int32_t eventflags = 0; + int c; + + while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) { + switch (c) { + case 'c': + loops = strtoul(optarg, NULL, 10); + break; + case 'n': + device_name = optarg; + break; + case 'o': + line = strtoul(optarg, NULL, 10); + break; + case 'd': + handleflags |= GPIOHANDLE_REQUEST_OPEN_DRAIN; + break; + case 's': + handleflags |= GPIOHANDLE_REQUEST_OPEN_SOURCE; + break; + case 'r': + eventflags |= GPIOEVENT_REQUEST_RISING_EDGE; + break; + case 'f': + eventflags |= GPIOEVENT_REQUEST_FALLING_EDGE; + break; + case '?': + print_usage(); + return -1; + } + } + + if (!device_name || line == -1) { + print_usage(); + return -1; + } + if (!eventflags) { + printf("No flags specified, listening on both rising and " + "falling edges\n"); + eventflags = GPIOEVENT_REQUEST_BOTH_EDGES; + } + return monitor_device(device_name, line, handleflags, + eventflags, loops); +} diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c new file mode 100644 index 000000000000..37b3f141053d --- /dev/null +++ b/tools/gpio/gpio-hammer.c @@ -0,0 +1,189 @@ +/* + * gpio-hammer - example swiss army knife to shake GPIO lines on a system + * + * Copyright (C) 2016 Linus Walleij + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * Usage: + * gpio-hammer -n <device-name> -o <offset1> -o <offset2> + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdio.h> +#include <dirent.h> +#include <errno.h> +#include <string.h> +#include <poll.h> +#include <fcntl.h> +#include <getopt.h> +#include <sys/ioctl.h> +#include <linux/gpio.h> + +int hammer_device(const char *device_name, unsigned int *lines, int nlines, + unsigned int loops) +{ + struct gpiohandle_request req; + struct gpiohandle_data data; + char *chrdev_name; + char swirr[] = "-\\|/"; + int fd; + int ret; + int i, j; + unsigned int iteration = 0; + + ret = asprintf(&chrdev_name, "/dev/%s", device_name); + if (ret < 0) + return -ENOMEM; + + fd = open(chrdev_name, 0); + if (fd == -1) { + ret = -errno; + fprintf(stderr, "Failed to open %s\n", chrdev_name); + goto exit_close_error; + } + + /* Request lines as output */ + for (i = 0; i < nlines; i++) + req.lineoffsets[i] = lines[i]; + req.flags = GPIOHANDLE_REQUEST_OUTPUT; /* Request as output */ + strcpy(req.consumer_label, "gpio-hammer"); + req.lines = nlines; + ret = ioctl(fd, GPIO_GET_LINEHANDLE_IOCTL, &req); + if (ret == -1) { + ret = -errno; + fprintf(stderr, "Failed to issue GET LINEHANDLE " + "IOCTL (%d)\n", + ret); + goto exit_close_error; + } + + /* Read initial states */ + ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data); + if (ret == -1) { + ret = -errno; + fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE " + "VALUES IOCTL (%d)\n", + ret); + goto exit_close_error; + } + fprintf(stdout, "Hammer lines ["); + for (i = 0; i < nlines; i++) { + fprintf(stdout, "%d", lines[i]); + if (i != (nlines - 1)) + fprintf(stdout, ", "); + } + fprintf(stdout, "] on %s, initial states: [", device_name); + for (i = 0; i < nlines; i++) { + fprintf(stdout, "%d", data.values[i]); + if (i != (nlines - 1)) + fprintf(stdout, ", "); + } + fprintf(stdout, "]\n"); + + /* Hammertime! */ + j = 0; + while (1) { + /* Invert all lines so we blink */ + for (i = 0; i < nlines; i++) + data.values[i] = !data.values[i]; + + ret = ioctl(req.fd, GPIOHANDLE_SET_LINE_VALUES_IOCTL, &data); + if (ret == -1) { + ret = -errno; + fprintf(stderr, "Failed to issue GPIOHANDLE SET LINE " + "VALUES IOCTL (%d)\n", + ret); + goto exit_close_error; + } + /* Re-read values to get status */ + ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data); + if (ret == -1) { + ret = -errno; + fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE " + "VALUES IOCTL (%d)\n", + ret); + goto exit_close_error; + } + + fprintf(stdout, "[%c] ", swirr[j]); + j++; + if (j == sizeof(swirr)-1) + j = 0; + + fprintf(stdout, "["); + for (i = 0; i < nlines; i++) { + fprintf(stdout, "%d: %d", lines[i], data.values[i]); + if (i != (nlines - 1)) + fprintf(stdout, ", "); + } + fprintf(stdout, "]\r"); + fflush(stdout); + sleep(1); + iteration++; + if (loops && iteration == loops) + break; + } + fprintf(stdout, "\n"); + ret = 0; + +exit_close_error: + if (close(fd) == -1) + perror("Failed to close GPIO character device file"); + free(chrdev_name); + return ret; +} + +void print_usage(void) +{ + fprintf(stderr, "Usage: gpio-hammer [options]...\n" + "Hammer GPIO lines, 0->1->0->1...\n" + " -n <name> Hammer GPIOs on a named device (must be stated)\n" + " -o <n> Offset[s] to hammer, at least one, several can be stated\n" + " [-c <n>] Do <n> loops (optional, infinite loop if not stated)\n" + " -? This helptext\n" + "\n" + "Example:\n" + "gpio-hammer -n gpiochip0 -o 4\n" + ); +} + +int main(int argc, char **argv) +{ + const char *device_name = NULL; + unsigned int lines[GPIOHANDLES_MAX]; + unsigned int loops = 0; + int nlines; + int c; + int i; + + i = 0; + while ((c = getopt(argc, argv, "c:n:o:?")) != -1) { + switch (c) { + case 'c': + loops = strtoul(optarg, NULL, 10); + break; + case 'n': + device_name = optarg; + break; + case 'o': + lines[i] = strtoul(optarg, NULL, 10); + i++; + break; + case '?': + print_usage(); + return -1; + } + } + nlines = i; + + if (!device_name || !nlines) { + print_usage(); + return -1; + } + return hammer_device(device_name, lines, nlines, loops); +} diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh new file mode 100755 index 000000000000..8e960234013d --- /dev/null +++ b/tools/hv/bondvf.sh @@ -0,0 +1,193 @@ +#!/bin/bash + +# This example script creates bonding network devices based on synthetic NIC +# (the virtual network adapter usually provided by Hyper-V) and the matching +# VF NIC (SRIOV virtual function). So the synthetic NIC and VF NIC can +# function as one network device, and fail over to the synthetic NIC if VF is +# down. +# +# Usage: +# - After configured vSwitch and vNIC with SRIOV, start Linux virtual +# machine (VM) +# - Run this scripts on the VM. It will create configuration files in +# distro specific directory. +# - Reboot the VM, so that the bonding config are enabled. +# +# The config files are DHCP by default. You may edit them if you need to change +# to Static IP or change other settings. +# + +sysdir=/sys/class/net +netvsc_cls={f8615163-df3e-46c5-913f-f2d2f965ed0e} +bondcnt=0 + +# Detect Distro +if [ -f /etc/redhat-release ]; +then + cfgdir=/etc/sysconfig/network-scripts + distro=redhat +elif grep -q 'Ubuntu' /etc/issue +then + cfgdir=/etc/network + distro=ubuntu +elif grep -q 'SUSE' /etc/issue +then + cfgdir=/etc/sysconfig/network + distro=suse +else + echo "Unsupported Distro" + exit 1 +fi + +echo Detected Distro: $distro, or compatible + +# Get a list of ethernet names +list_eth=(`cd $sysdir && ls -d */ | cut -d/ -f1 | grep -v bond`) +eth_cnt=${#list_eth[@]} + +echo List of net devices: + +# Get the MAC addresses +for (( i=0; i < $eth_cnt; i++ )) +do + list_mac[$i]=`cat $sysdir/${list_eth[$i]}/address` + echo ${list_eth[$i]}, ${list_mac[$i]} +done + +# Find NIC with matching MAC +for (( i=0; i < $eth_cnt-1; i++ )) +do + for (( j=i+1; j < $eth_cnt; j++ )) + do + if [ "${list_mac[$i]}" = "${list_mac[$j]}" ] + then + list_match[$i]=${list_eth[$j]} + break + fi + done +done + +function create_eth_cfg_redhat { + local fn=$cfgdir/ifcfg-$1 + + rm -f $fn + echo DEVICE=$1 >>$fn + echo TYPE=Ethernet >>$fn + echo BOOTPROTO=none >>$fn + echo ONBOOT=yes >>$fn + echo NM_CONTROLLED=no >>$fn + echo PEERDNS=yes >>$fn + echo IPV6INIT=yes >>$fn + echo MASTER=$2 >>$fn + echo SLAVE=yes >>$fn +} + +function create_eth_cfg_pri_redhat { + create_eth_cfg_redhat $1 $2 +} + +function create_bond_cfg_redhat { + local fn=$cfgdir/ifcfg-$1 + + rm -f $fn + echo DEVICE=$1 >>$fn + echo TYPE=Bond >>$fn + echo BOOTPROTO=dhcp >>$fn + echo ONBOOT=yes >>$fn + echo NM_CONTROLLED=no >>$fn + echo PEERDNS=yes >>$fn + echo IPV6INIT=yes >>$fn + echo BONDING_MASTER=yes >>$fn + echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn +} + +function create_eth_cfg_ubuntu { + local fn=$cfgdir/interfaces + + echo $'\n'auto $1 >>$fn + echo iface $1 inet manual >>$fn + echo bond-master $2 >>$fn +} + +function create_eth_cfg_pri_ubuntu { + local fn=$cfgdir/interfaces + + create_eth_cfg_ubuntu $1 $2 + echo bond-primary $1 >>$fn +} + +function create_bond_cfg_ubuntu { + local fn=$cfgdir/interfaces + + echo $'\n'auto $1 >>$fn + echo iface $1 inet dhcp >>$fn + echo bond-mode active-backup >>$fn + echo bond-miimon 100 >>$fn + echo bond-slaves none >>$fn +} + +function create_eth_cfg_suse { + local fn=$cfgdir/ifcfg-$1 + + rm -f $fn + echo BOOTPROTO=none >>$fn + echo STARTMODE=auto >>$fn +} + +function create_eth_cfg_pri_suse { + create_eth_cfg_suse $1 +} + +function create_bond_cfg_suse { + local fn=$cfgdir/ifcfg-$1 + + rm -f $fn + echo BOOTPROTO=dhcp >>$fn + echo STARTMODE=auto >>$fn + echo BONDING_MASTER=yes >>$fn + echo BONDING_SLAVE_0=$2 >>$fn + echo BONDING_SLAVE_1=$3 >>$fn + echo BONDING_MODULE_OPTS=\'mode=active-backup miimon=100 primary=$2\' >>$fn +} + +function create_bond { + local bondname=bond$bondcnt + local primary + local secondary + + local class_id1=`cat $sysdir/$1/device/class_id 2>/dev/null` + local class_id2=`cat $sysdir/$2/device/class_id 2>/dev/null` + + if [ "$class_id1" = "$netvsc_cls" ] + then + primary=$2 + secondary=$1 + elif [ "$class_id2" = "$netvsc_cls" ] + then + primary=$1 + secondary=$2 + else + return 0 + fi + + echo $'\nBond name:' $bondname + + echo configuring $primary + create_eth_cfg_pri_$distro $primary $bondname + + echo configuring $secondary + create_eth_cfg_$distro $secondary $bondname + + echo creating: $bondname with primary slave: $primary + create_bond_cfg_$distro $bondname $primary $secondary + + let bondcnt=bondcnt+1 +} + +for (( i=0; i < $eth_cnt-1; i++ )) +do + if [ -n "${list_match[$i]}" ] + then + create_bond ${list_eth[$i]} ${list_match[$i]} + fi +done diff --git a/tools/iio/Makefile b/tools/iio/Makefile index 3a7a54f59713..5446d625e17d 100644 --- a/tools/iio/Makefile +++ b/tools/iio/Makefile @@ -1,16 +1,31 @@ CC = $(CROSS_COMPILE)gcc CFLAGS += -Wall -g -D_GNU_SOURCE -all: iio_event_monitor lsiio generic_buffer +BINDIR=usr/bin +INSTALL_PROGRAM=install -m 755 -p +DEL_FILE=rm -f + +all: iio_event_monitor lsiio iio_generic_buffer iio_event_monitor: iio_event_monitor.o iio_utils.o lsiio: lsiio.o iio_utils.o -generic_buffer: generic_buffer.o iio_utils.o +iio_generic_buffer: iio_generic_buffer.o iio_utils.o %.o: %.c iio_utils.h +install: + - mkdir -p $(INSTALL_ROOT)/$(BINDIR) + - $(INSTALL_PROGRAM) "iio_event_monitor" "$(INSTALL_ROOT)/$(BINDIR)/iio_event_monitor" + - $(INSTALL_PROGRAM) "lsiio" "$(INSTALL_ROOT)/$(BINDIR)/lsiio" + - $(INSTALL_PROGRAM) "iio_generic_buffer" "$(INSTALL_ROOT)/$(BINDIR)/iio_generic_buffer" + +uninstall: + $(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/iio_event_monitor" + $(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/lsiio" + $(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/iio_generic_buffer" + .PHONY: clean clean: - rm -f *.o iio_event_monitor lsiio generic_buffer + rm -f *.o iio_event_monitor lsiio iio_generic_buffer diff --git a/tools/iio/generic_buffer.c b/tools/iio/iio_generic_buffer.c index 2429c78de940..0e8a1f7a292d 100644 --- a/tools/iio/generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -32,6 +32,8 @@ #include <endian.h> #include <getopt.h> #include <inttypes.h> +#include <stdbool.h> +#include <signal.h> #include "iio_utils.h" /** @@ -249,11 +251,82 @@ void print_usage(void) " -e Disable wait for event (new data)\n" " -g Use trigger-less mode\n" " -l <n> Set buffer length to n samples\n" - " -n <name> Set device name (mandatory)\n" - " -t <name> Set trigger name\n" + " --device-name -n <name>\n" + " --device-num -N <num>\n" + " Set device by name or number (mandatory)\n" + " --trigger-name -t <name>\n" + " --trigger-num -T <num>\n" + " Set trigger by name or number\n" " -w <n> Set delay between reads in us (event-less mode)\n"); } +enum autochan autochannels = AUTOCHANNELS_DISABLED; +char *dev_dir_name = NULL; +char *buf_dir_name = NULL; +bool current_trigger_set = false; + +void cleanup(void) +{ + int ret; + + /* Disable trigger */ + if (dev_dir_name && current_trigger_set) { + /* Disconnect the trigger - just write a dummy name. */ + ret = write_sysfs_string("trigger/current_trigger", + dev_dir_name, "NULL"); + if (ret < 0) + fprintf(stderr, "Failed to disable trigger: %s\n", + strerror(-ret)); + current_trigger_set = false; + } + + /* Disable buffer */ + if (buf_dir_name) { + ret = write_sysfs_int("enable", buf_dir_name, 0); + if (ret < 0) + fprintf(stderr, "Failed to disable buffer: %s\n", + strerror(-ret)); + } + + /* Disable channels if auto-enabled */ + if (dev_dir_name && autochannels == AUTOCHANNELS_ACTIVE) { + ret = enable_disable_all_channels(dev_dir_name, 0); + if (ret) + fprintf(stderr, "Failed to disable all channels\n"); + autochannels = AUTOCHANNELS_DISABLED; + } +} + +void sig_handler(int signum) +{ + fprintf(stderr, "Caught signal %d\n", signum); + cleanup(); + exit(-signum); +} + +void register_cleanup(void) +{ + struct sigaction sa = { .sa_handler = sig_handler }; + const int signums[] = { SIGINT, SIGTERM, SIGABRT }; + int ret, i; + + for (i = 0; i < ARRAY_SIZE(signums); ++i) { + ret = sigaction(signums[i], &sa, NULL); + if (ret) { + perror("Failed to register signal handler"); + exit(-1); + } + } +} + +static const struct option longopts[] = { + { "device-name", 1, 0, 'n' }, + { "device-num", 1, 0, 'N' }, + { "trigger-name", 1, 0, 't' }, + { "trigger-num", 1, 0, 'T' }, + { }, +}; + int main(int argc, char **argv) { unsigned long num_loops = 2; @@ -261,26 +334,25 @@ int main(int argc, char **argv) unsigned long buf_len = 128; int ret, c, i, j, toread; - int fp; + int fp = -1; - int num_channels; + int num_channels = 0; char *trigger_name = NULL, *device_name = NULL; - char *dev_dir_name, *buf_dir_name; - int datardytrigger = 1; - char *data; + char *data = NULL; ssize_t read_size; - int dev_num, trig_num; - char *buffer_access; + int dev_num = -1, trig_num = -1; + char *buffer_access = NULL; int scan_size; int noevents = 0; int notrigger = 0; - enum autochan autochannels = AUTOCHANNELS_DISABLED; char *dummy; struct iio_channel_info *channels; - while ((c = getopt(argc, argv, "ac:egl:n:t:w:")) != -1) { + register_cleanup(); + + while ((c = getopt_long(argc, argv, "ac:egl:n:N:t:T:w:", longopts, NULL)) != -1) { switch (c) { case 'a': autochannels = AUTOCHANNELS_ENABLED; @@ -288,8 +360,10 @@ int main(int argc, char **argv) case 'c': errno = 0; num_loops = strtoul(optarg, &dummy, 10); - if (errno) - return -errno; + if (errno) { + ret = -errno; + goto error; + } break; case 'e': @@ -301,49 +375,102 @@ int main(int argc, char **argv) case 'l': errno = 0; buf_len = strtoul(optarg, &dummy, 10); - if (errno) - return -errno; + if (errno) { + ret = -errno; + goto error; + } break; case 'n': - device_name = optarg; + device_name = strdup(optarg); + break; + case 'N': + errno = 0; + dev_num = strtoul(optarg, &dummy, 10); + if (errno) { + ret = -errno; + goto error; + } break; case 't': - trigger_name = optarg; - datardytrigger = 0; + trigger_name = strdup(optarg); break; - case 'w': + case 'T': errno = 0; - timedelay = strtoul(optarg, &dummy, 10); + trig_num = strtoul(optarg, &dummy, 10); if (errno) return -errno; break; + case 'w': + errno = 0; + timedelay = strtoul(optarg, &dummy, 10); + if (errno) { + ret = -errno; + goto error; + } + break; case '?': print_usage(); - return -1; + ret = -1; + goto error; } } - if (!device_name) { - fprintf(stderr, "Device name not set\n"); - print_usage(); - return -1; - } - /* Find the device requested */ - dev_num = find_type_by_name(device_name, "iio:device"); - if (dev_num < 0) { - fprintf(stderr, "Failed to find the %s\n", device_name); - return dev_num; + if (dev_num < 0 && !device_name) { + fprintf(stderr, "Device not set\n"); + print_usage(); + ret = -1; + goto error; + } else if (dev_num >= 0 && device_name) { + fprintf(stderr, "Only one of --device-num or --device-name needs to be set\n"); + print_usage(); + ret = -1; + goto error; + } else if (dev_num < 0) { + dev_num = find_type_by_name(device_name, "iio:device"); + if (dev_num < 0) { + fprintf(stderr, "Failed to find the %s\n", device_name); + ret = dev_num; + goto error; + } } - printf("iio device number being used is %d\n", dev_num); ret = asprintf(&dev_dir_name, "%siio:device%d", iio_dir, dev_num); if (ret < 0) return -ENOMEM; + /* Fetch device_name if specified by number */ + if (!device_name) { + device_name = malloc(IIO_MAX_NAME_LENGTH); + if (!device_name) { + ret = -ENOMEM; + goto error; + } + ret = read_sysfs_string("name", dev_dir_name, device_name); + if (ret < 0) { + fprintf(stderr, "Failed to read name of device %d\n", dev_num); + goto error; + } + } - if (!notrigger) { + if (notrigger) { + printf("trigger-less mode selected\n"); + } if (trig_num >= 0) { + char *trig_dev_name; + ret = asprintf(&trig_dev_name, "%strigger%d", iio_dir, trig_num); + if (ret < 0) { + return -ENOMEM; + } + trigger_name = malloc(IIO_MAX_NAME_LENGTH); + ret = read_sysfs_string("name", trig_dev_name, trigger_name); + free(trig_dev_name); + if (ret < 0) { + fprintf(stderr, "Failed to read trigger%d name from\n", trig_num); + return ret; + } + printf("iio trigger number being used is %d\n", trig_num); + } else { if (!trigger_name) { /* * Build the trigger name. If it is device associated @@ -354,7 +481,7 @@ int main(int argc, char **argv) "%s-dev%d", device_name, dev_num); if (ret < 0) { ret = -ENOMEM; - goto error_free_dev_dir_name; + goto error; } } @@ -367,7 +494,7 @@ int main(int argc, char **argv) "%s-trigger", device_name); if (ret < 0) { ret = -ENOMEM; - goto error_free_dev_dir_name; + goto error; } } @@ -376,12 +503,10 @@ int main(int argc, char **argv) fprintf(stderr, "Failed to find the trigger %s\n", trigger_name); ret = trig_num; - goto error_free_triggername; + goto error; } printf("iio trigger number being used is %d\n", trig_num); - } else { - printf("trigger-less mode selected\n"); } /* @@ -392,7 +517,7 @@ int main(int argc, char **argv) if (ret) { fprintf(stderr, "Problem reading scan element information\n" "diag %s\n", dev_dir_name); - goto error_free_triggername; + goto error; } if (num_channels && autochannels == AUTOCHANNELS_ENABLED) { fprintf(stderr, "Auto-channels selected but some channels " @@ -407,7 +532,7 @@ int main(int argc, char **argv) ret = enable_disable_all_channels(dev_dir_name, 1); if (ret) { fprintf(stderr, "Failed to enable all channels\n"); - goto error_free_triggername; + goto error; } /* This flags that we need to disable the channels again */ @@ -419,12 +544,12 @@ int main(int argc, char **argv) fprintf(stderr, "Problem reading scan element " "information\n" "diag %s\n", dev_dir_name); - goto error_disable_channels; + goto error; } if (!num_channels) { fprintf(stderr, "Still no channels after " "auto-enabling, giving up\n"); - goto error_disable_channels; + goto error; } } @@ -436,7 +561,7 @@ int main(int argc, char **argv) "/*_en or pass -a to autoenable channels and " "try again.\n", dev_dir_name); ret = -ENOENT; - goto error_free_triggername; + goto error; } /* @@ -448,7 +573,7 @@ int main(int argc, char **argv) "%siio:device%d/buffer", iio_dir, dev_num); if (ret < 0) { ret = -ENOMEM; - goto error_free_channels; + goto error; } if (!notrigger) { @@ -463,34 +588,34 @@ int main(int argc, char **argv) if (ret < 0) { fprintf(stderr, "Failed to write current_trigger file\n"); - goto error_free_buf_dir_name; + goto error; } } /* Setup ring buffer parameters */ ret = write_sysfs_int("length", buf_dir_name, buf_len); if (ret < 0) - goto error_free_buf_dir_name; + goto error; /* Enable the buffer */ ret = write_sysfs_int("enable", buf_dir_name, 1); if (ret < 0) { fprintf(stderr, "Failed to enable buffer: %s\n", strerror(-ret)); - goto error_free_buf_dir_name; + goto error; } scan_size = size_from_channelarray(channels, num_channels); data = malloc(scan_size * buf_len); if (!data) { ret = -ENOMEM; - goto error_free_buf_dir_name; + goto error; } ret = asprintf(&buffer_access, "/dev/iio:device%d", dev_num); if (ret < 0) { ret = -ENOMEM; - goto error_free_data; + goto error; } /* Attempt to open non blocking the access dev */ @@ -498,7 +623,7 @@ int main(int argc, char **argv) if (fp == -1) { /* TODO: If it isn't there make the node */ ret = -errno; fprintf(stderr, "Failed to open %s\n", buffer_access); - goto error_free_buffer_access; + goto error; } for (j = 0; j < num_loops; j++) { @@ -511,7 +636,7 @@ int main(int argc, char **argv) ret = poll(&pfd, 1, -1); if (ret < 0) { ret = -errno; - goto error_close_buffer_access; + goto error; } else if (ret == 0) { continue; } @@ -536,45 +661,21 @@ int main(int argc, char **argv) num_channels); } - /* Stop the buffer */ - ret = write_sysfs_int("enable", buf_dir_name, 0); - if (ret < 0) - goto error_close_buffer_access; +error: + cleanup(); - if (!notrigger) - /* Disconnect the trigger - just write a dummy name. */ - ret = write_sysfs_string("trigger/current_trigger", - dev_dir_name, "NULL"); - if (ret < 0) - fprintf(stderr, "Failed to write to %s\n", - dev_dir_name); - -error_close_buffer_access: - if (close(fp) == -1) + if (fp >= 0 && close(fp) == -1) perror("Failed to close buffer"); - -error_free_buffer_access: free(buffer_access); -error_free_data: free(data); -error_free_buf_dir_name: free(buf_dir_name); -error_free_channels: for (i = num_channels - 1; i >= 0; i--) { free(channels[i].name); free(channels[i].generic_name); } free(channels); -error_free_triggername: - if (datardytrigger) - free(trigger_name); -error_disable_channels: - if (autochannels == AUTOCHANNELS_ACTIVE) { - ret = enable_disable_all_channels(dev_dir_name, 0); - if (ret) - fprintf(stderr, "Failed to disable all channels\n"); - } -error_free_dev_dir_name: + free(trigger_name); + free(device_name); free(dev_dir_name); return ret; diff --git a/tools/lib/api/Build b/tools/lib/api/Build index 954c644f7ad9..6e2373db5598 100644 --- a/tools/lib/api/Build +++ b/tools/lib/api/Build @@ -2,3 +2,8 @@ libapi-y += fd/ libapi-y += fs/ libapi-y += cpu.o libapi-y += debug.o +libapi-y += str_error_r.o + +$(OUTPUT)str_error_r.o: ../str_error_r.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 32e6b6bc6f7d..b699aea9a025 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -37,6 +37,10 @@ #include "libbpf.h" #include "bpf.h" +#ifndef EM_BPF +#define EM_BPF 247 +#endif + #define __printf(a, b) __attribute__((format(printf, a, b))) __printf(1, 2) @@ -439,7 +443,8 @@ static int bpf_object__elf_init(struct bpf_object *obj) } ep = &obj->efile.ehdr; - if ((ep->e_type != ET_REL) || (ep->e_machine != 0)) { + /* Old LLVM set e_machine to EM_NONE */ + if ((ep->e_type != ET_REL) || (ep->e_machine && (ep->e_machine != EM_BPF))) { pr_warning("%s is not an eBPF object file\n", obj->path); err = -LIBBPF_ERRNO__FORMAT; diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore index a0b3128bb31f..d3102c865a95 100644 --- a/tools/objtool/.gitignore +++ b/tools/objtool/.gitignore @@ -1,2 +1,3 @@ arch/x86/insn/inat-tables.c objtool +fixdep diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 0b437700f688..041b493ad3ab 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -51,7 +51,7 @@ $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN) diff -I'^#include' arch/x86/insn/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ - || echo "Warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true + || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@ diff --git a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk index 093a892026f9..a3d2c62fd805 100644 --- a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk +++ b/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk @@ -72,12 +72,14 @@ BEGIN { lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 - # All opcodes starting with lower-case 'v' or with (v1) superscript + # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript # accepts VEX prefix - vexok_opcode_expr = "^v.*" + vexok_opcode_expr = "^[vk].*" vexok_expr = "\\(v1\\)" # All opcodes with (v) superscript supports *only* VEX prefix vexonly_expr = "\\(v\\)" + # All opcodes with (ev) superscript supports *only* EVEX prefix + evexonly_expr = "\\(ev\\)" prefix_expr = "\\(Prefix\\)" prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" @@ -95,6 +97,7 @@ BEGIN { prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" + prefix_num["EVEX"] = "INAT_PFX_EVEX" clear_vars() } @@ -319,7 +322,9 @@ function convert_operands(count,opnd, i,j,imm,mod) flags = add_flags(flags, "INAT_MODRM") # check VEX codes - if (match(ext, vexonly_expr)) + if (match(ext, evexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") + else if (match(ext, vexonly_expr)) flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) flags = add_flags(flags, "INAT_VEXOK") diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/insn/inat.h index 611645e903a8..125ecd2a300d 100644 --- a/tools/objtool/arch/x86/insn/inat.h +++ b/tools/objtool/arch/x86/insn/inat.h @@ -48,6 +48,7 @@ /* AVX VEX prefixes */ #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ +#define INAT_PFX_EVEX 15 /* EVEX prefix */ #define INAT_LSTPFX_MAX 3 #define INAT_LGCPFX_MAX 11 @@ -89,6 +90,7 @@ #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) +#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) @@ -141,7 +143,13 @@ static inline int inat_last_prefix_id(insn_attr_t attr) static inline int inat_is_vex_prefix(insn_attr_t attr) { attr &= INAT_PFX_MASK; - return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 || + attr == INAT_PFX_EVEX; +} + +static inline int inat_is_evex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX; } static inline int inat_is_vex3_prefix(insn_attr_t attr) @@ -216,6 +224,11 @@ static inline int inat_accept_vex(insn_attr_t attr) static inline int inat_must_vex(insn_attr_t attr) { - return attr & INAT_VEXONLY; + return attr & (INAT_VEXONLY | INAT_EVEXONLY); +} + +static inline int inat_must_evex(insn_attr_t attr) +{ + return attr & INAT_EVEXONLY; } #endif diff --git a/tools/objtool/arch/x86/insn/insn.c b/tools/objtool/arch/x86/insn/insn.c index 9f26eae6c9f0..ca983e2bea8b 100644 --- a/tools/objtool/arch/x86/insn/insn.c +++ b/tools/objtool/arch/x86/insn/insn.c @@ -155,14 +155,24 @@ found: /* * In 32-bits mode, if the [7:6] bits (mod bits of * ModRM) on the second byte are not 11b, it is - * LDS or LES. + * LDS or LES or BOUND. */ if (X86_MODRM_MOD(b2) != 3) goto vex_end; } insn->vex_prefix.bytes[0] = b; insn->vex_prefix.bytes[1] = b2; - if (inat_is_vex3_prefix(attr)) { + if (inat_is_evex_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + b2 = peek_nbyte_next(insn_byte_t, insn, 3); + insn->vex_prefix.bytes[3] = b2; + insn->vex_prefix.nbytes = 4; + insn->next_byte += 4; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else if (inat_is_vex3_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); insn->vex_prefix.bytes[2] = b2; insn->vex_prefix.nbytes = 3; @@ -221,7 +231,9 @@ void insn_get_opcode(struct insn *insn) m = insn_vex_m_bits(insn); p = insn_vex_p_bits(insn); insn->attr = inat_get_avx_attribute(op, m, p); - if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) + if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || + (!inat_accept_vex(insn->attr) && + !inat_is_group(insn->attr))) insn->attr = 0; /* This instruction is bad */ goto end; /* VEX has only 1 byte for opcode */ } diff --git a/tools/objtool/arch/x86/insn/insn.h b/tools/objtool/arch/x86/insn/insn.h index dd12da0f4593..e23578c7b1be 100644 --- a/tools/objtool/arch/x86/insn/insn.h +++ b/tools/objtool/arch/x86/insn/insn.h @@ -91,6 +91,7 @@ struct insn { #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ /* VEX bit fields */ +#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ #define X86_VEX2_M 1 /* VEX2.M always 1 */ #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ @@ -133,6 +134,13 @@ static inline int insn_is_avx(struct insn *insn) return (insn->vex_prefix.value != 0); } +static inline int insn_is_evex(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.nbytes == 4); +} + /* Ensure this instruction is decoded completely */ static inline int insn_complete(struct insn *insn) { @@ -144,8 +152,10 @@ static inline insn_byte_t insn_vex_m_bits(struct insn *insn) { if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ return X86_VEX2_M; - else + else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */ return X86_VEX3_M(insn->vex_prefix.bytes[1]); + else /* EVEX */ + return X86_EVEX_M(insn->vex_prefix.bytes[1]); } static inline insn_byte_t insn_vex_p_bits(struct insn *insn) diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt index d388de72eaca..767be7c76034 100644 --- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt +++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt @@ -13,12 +13,17 @@ # opcode: escape # escaped-name # EndTable # +# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix +# mnemonics that begin with lowercase 'k' accept a VEX prefix +# #<group maps> # GrpTable: GrpXXX # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] # EndTable # # AVX Superscripts +# (ev): this opcode requires EVEX prefix. +# (evo): this opcode is changed by EVEX prefix (EVEX opcode) # (v): this opcode requires VEX prefix. # (v1): this opcode only supports 128bit VEX. # @@ -137,7 +142,7 @@ AVXcode: # 0x60 - 0x6f 60: PUSHA/PUSHAD (i64) 61: POPA/POPAD (i64) -62: BOUND Gv,Ma (i64) +62: BOUND Gv,Ma (i64) | EVEX (Prefix) 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) 64: SEG=FS (Prefix) 65: SEG=GS (Prefix) @@ -399,17 +404,17 @@ AVXcode: 1 3f: # 0x0f 0x40-0x4f 40: CMOVO Gv,Ev -41: CMOVNO Gv,Ev -42: CMOVB/C/NAE Gv,Ev +41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66) +42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66) 43: CMOVAE/NB/NC Gv,Ev -44: CMOVE/Z Gv,Ev -45: CMOVNE/NZ Gv,Ev -46: CMOVBE/NA Gv,Ev -47: CMOVA/NBE Gv,Ev +44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66) +45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66) +46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66) +47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66) 48: CMOVS Gv,Ev 49: CMOVNS Gv,Ev -4a: CMOVP/PE Gv,Ev -4b: CMOVNP/PO Gv,Ev +4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66) +4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk 4c: CMOVL/NGE Gv,Ev 4d: CMOVNL/GE Gv,Ev 4e: CMOVLE/NG Gv,Ev @@ -426,7 +431,7 @@ AVXcode: 1 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) -5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) +5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) @@ -447,7 +452,7 @@ AVXcode: 1 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) -6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) +6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev) # 0x0f 0x70-0x7f 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) 71: Grp12 (1A) @@ -458,14 +463,14 @@ AVXcode: 1 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. 77: emms | vzeroupper | vzeroall -78: VMREAD Ey,Gy -79: VMWRITE Gy,Ey -7a: -7b: +78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev) +79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev) +7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev) +7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev) 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) -7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) +7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) # 0x0f 0x80-0x8f # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). 80: JO Jz (f64) @@ -485,16 +490,16 @@ AVXcode: 1 8e: JLE/JNG Jz (f64) 8f: JNLE/JG Jz (f64) # 0x0f 0x90-0x9f -90: SETO Eb -91: SETNO Eb -92: SETB/C/NAE Eb -93: SETAE/NB/NC Eb +90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) +91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) +92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2) +93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2) 94: SETE/Z Eb 95: SETNE/NZ Eb 96: SETBE/NA Eb 97: SETA/NBE Eb -98: SETS Eb -99: SETNS Eb +98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66) +99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66) 9a: SETP/PE Eb 9b: SETNP/PO Eb 9c: SETL/NGE Eb @@ -564,11 +569,11 @@ d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) -db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) +db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo) dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) -df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) +df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo) # 0x0f 0xe0-0xef e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) @@ -576,16 +581,16 @@ e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) -e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) +e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2) e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) -eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) +eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo) ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) -ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) +ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo) # 0x0f 0xf0-0xff f0: vlddqu Vx,Mx (F2) f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) @@ -626,81 +631,105 @@ AVXcode: 2 0e: vtestps Vx,Wx (66),(v) 0f: vtestpd Vx,Wx (66),(v) # 0x0f 0x38 0x10-0x1f -10: pblendvb Vdq,Wdq (66) -11: -12: -13: vcvtph2ps Vx,Wx,Ib (66),(v) -14: blendvps Vdq,Wdq (66) -15: blendvpd Vdq,Wdq (66) -16: vpermps Vqq,Hqq,Wqq (66),(v) +10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev) +11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev) +12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev) +13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev) +14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo) +15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo) +16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo) 17: vptest Vx,Wx (66) 18: vbroadcastss Vx,Wd (66),(v) -19: vbroadcastsd Vqq,Wq (66),(v) -1a: vbroadcastf128 Vqq,Mdq (66),(v) -1b: +19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo) +1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo) +1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev) 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) -1f: +1f: vpabsq Vx,Wx (66),(ev) # 0x0f 0x38 0x20-0x2f -20: vpmovsxbw Vx,Ux/Mq (66),(v1) -21: vpmovsxbd Vx,Ux/Md (66),(v1) -22: vpmovsxbq Vx,Ux/Mw (66),(v1) -23: vpmovsxwd Vx,Ux/Mq (66),(v1) -24: vpmovsxwq Vx,Ux/Md (66),(v1) -25: vpmovsxdq Vx,Ux/Mq (66),(v1) -26: -27: -28: vpmuldq Vx,Hx,Wx (66),(v1) -29: vpcmpeqq Vx,Hx,Wx (66),(v1) -2a: vmovntdqa Vx,Mx (66),(v1) +20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev) +21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev) +22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev) +23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev) +24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev) +25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev) +26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev) +27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev) +28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev) +29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev) +2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev) 2b: vpackusdw Vx,Hx,Wx (66),(v1) -2c: vmaskmovps Vx,Hx,Mx (66),(v) -2d: vmaskmovpd Vx,Hx,Mx (66),(v) +2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo) +2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo) 2e: vmaskmovps Mx,Hx,Vx (66),(v) 2f: vmaskmovpd Mx,Hx,Vx (66),(v) # 0x0f 0x38 0x30-0x3f -30: vpmovzxbw Vx,Ux/Mq (66),(v1) -31: vpmovzxbd Vx,Ux/Md (66),(v1) -32: vpmovzxbq Vx,Ux/Mw (66),(v1) -33: vpmovzxwd Vx,Ux/Mq (66),(v1) -34: vpmovzxwq Vx,Ux/Md (66),(v1) -35: vpmovzxdq Vx,Ux/Mq (66),(v1) -36: vpermd Vqq,Hqq,Wqq (66),(v) +30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev) +31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev) +32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev) +33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev) +34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev) +35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev) +36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo) 37: vpcmpgtq Vx,Hx,Wx (66),(v1) -38: vpminsb Vx,Hx,Wx (66),(v1) -39: vpminsd Vx,Hx,Wx (66),(v1) -3a: vpminuw Vx,Hx,Wx (66),(v1) -3b: vpminud Vx,Hx,Wx (66),(v1) +38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev) +39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev) +3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev) +3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo) 3c: vpmaxsb Vx,Hx,Wx (66),(v1) -3d: vpmaxsd Vx,Hx,Wx (66),(v1) +3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo) 3e: vpmaxuw Vx,Hx,Wx (66),(v1) -3f: vpmaxud Vx,Hx,Wx (66),(v1) +3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo) # 0x0f 0x38 0x40-0x8f -40: vpmulld Vx,Hx,Wx (66),(v1) +40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo) 41: vphminposuw Vdq,Wdq (66),(v1) -42: -43: -44: +42: vgetexpps/d Vx,Wx (66),(ev) +43: vgetexpss/d Vx,Hx,Wx (66),(ev) +44: vplzcntd/q Vx,Wx (66),(ev) 45: vpsrlvd/q Vx,Hx,Wx (66),(v) -46: vpsravd Vx,Hx,Wx (66),(v) +46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) 47: vpsllvd/q Vx,Hx,Wx (66),(v) -# Skip 0x48-0x57 +# Skip 0x48-0x4b +4c: vrcp14ps/d Vpd,Wpd (66),(ev) +4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) +4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) +4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev) +# Skip 0x50-0x57 58: vpbroadcastd Vx,Wx (66),(v) -59: vpbroadcastq Vx,Wx (66),(v) -5a: vbroadcasti128 Vqq,Mdq (66),(v) -# Skip 0x5b-0x77 +59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) +5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) +5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) +# Skip 0x5c-0x63 +64: vpblendmd/q Vx,Hx,Wx (66),(ev) +65: vblendmps/d Vx,Hx,Wx (66),(ev) +66: vpblendmb/w Vx,Hx,Wx (66),(ev) +# Skip 0x67-0x74 +75: vpermi2b/w Vx,Hx,Wx (66),(ev) +76: vpermi2d/q Vx,Hx,Wx (66),(ev) +77: vpermi2ps/d Vx,Hx,Wx (66),(ev) 78: vpbroadcastb Vx,Wx (66),(v) 79: vpbroadcastw Vx,Wx (66),(v) -# Skip 0x7a-0x7f +7a: vpbroadcastb Vx,Rv (66),(ev) +7b: vpbroadcastw Vx,Rv (66),(ev) +7c: vpbroadcastd/q Vx,Rv (66),(ev) +7d: vpermt2b/w Vx,Hx,Wx (66),(ev) +7e: vpermt2d/q Vx,Hx,Wx (66),(ev) +7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 80: INVEPT Gy,Mdq (66) 81: INVPID Gy,Mdq (66) 82: INVPCID Gy,Mdq (66) +83: vpmultishiftqb Vx,Hx,Wx (66),(ev) +88: vexpandps/d Vpd,Wpd (66),(ev) +89: vpexpandd/q Vx,Wx (66),(ev) +8a: vcompressps/d Wx,Vx (66),(ev) +8b: vpcompressd/q Wx,Vx (66),(ev) 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) +8d: vpermb/w Vx,Hx,Wx (66),(ev) 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) # 0x0f 0x38 0x90-0xbf (FMA) -90: vgatherdd/q Vx,Hx,Wx (66),(v) -91: vgatherqd/q Vx,Hx,Wx (66),(v) +90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo) +91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo) 92: vgatherdps/d Vx,Hx,Wx (66),(v) 93: vgatherqps/d Vx,Hx,Wx (66),(v) 94: @@ -715,6 +744,10 @@ AVXcode: 2 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) +a0: vpscatterdd/q Wx,Vx (66),(ev) +a1: vpscatterqd/q Wx,Vx (66),(ev) +a2: vscatterdps/d Wx,Vx (66),(ev) +a3: vscatterqps/d Wx,Vx (66),(ev) a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) @@ -725,6 +758,8 @@ ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) +b4: vpmadd52luq Vx,Hx,Wx (66),(ev) +b5: vpmadd52huq Vx,Hx,Wx (66),(ev) b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) @@ -736,12 +771,15 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) # 0x0f 0x38 0xc0-0xff -c8: sha1nexte Vdq,Wdq +c4: vpconflictd/q Vx,Wx (66),(ev) +c6: Grp18 (1A) +c7: Grp19 (1A) +c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev) c9: sha1msg1 Vdq,Wdq -ca: sha1msg2 Vdq,Wdq -cb: sha256rnds2 Vdq,Wdq -cc: sha256msg1 Vdq,Wdq -cd: sha256msg2 Vdq,Wdq +ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev) +cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) +cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) +cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev) db: VAESIMC Vdq,Wdq (66),(v1) dc: VAESENC Vdq,Hdq,Wdq (66),(v1) dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) @@ -763,15 +801,15 @@ AVXcode: 3 00: vpermq Vqq,Wqq,Ib (66),(v) 01: vpermpd Vqq,Wqq,Ib (66),(v) 02: vpblendd Vx,Hx,Wx,Ib (66),(v) -03: +03: valignd/q Vx,Hx,Wx,Ib (66),(ev) 04: vpermilps Vx,Wx,Ib (66),(v) 05: vpermilpd Vx,Wx,Ib (66),(v) 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 07: -08: vroundps Vx,Wx,Ib (66) -09: vroundpd Vx,Wx,Ib (66) -0a: vroundss Vss,Wss,Ib (66),(v1) -0b: vroundsd Vsd,Wsd,Ib (66),(v1) +08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) +09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) +0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) +0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) 0c: vblendps Vx,Hx,Wx,Ib (66) 0d: vblendpd Vx,Hx,Wx,Ib (66) 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) @@ -780,26 +818,51 @@ AVXcode: 3 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) 16: vpextrd/q Ey,Vdq,Ib (66),(v1) 17: vextractps Ed,Vdq,Ib (66),(v1) -18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) -19: vextractf128 Wdq,Vqq,Ib (66),(v) +18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) +19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo) +1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) +1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev) 1d: vcvtps2ph Wx,Vx,Ib (66),(v) +1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev) +1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev) 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) -38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) -39: vextracti128 Wdq,Vqq,Ib (66),(v) +23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) +25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) +26: vgetmantps/d Vx,Wx,Ib (66),(ev) +27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) +30: kshiftrb/w Vk,Uk,Ib (66),(v) +31: kshiftrd/q Vk,Uk,Ib (66),(v) +32: kshiftlb/w Vk,Uk,Ib (66),(v) +33: kshiftld/q Vk,Uk,Ib (66),(v) +38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) +39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo) +3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) +3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev) +3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev) +3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev) 40: vdpps Vx,Hx,Wx,Ib (66) 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) -42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) +42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo) +43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) 4a: vblendvps Vx,Hx,Wx,Lx (66),(v) 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) +50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev) +51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) +54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) +55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) +56: vreduceps/d Vx,Wx,Ib (66),(ev) +57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +66: vfpclassps/d Vk,Wx,Ib (66),(ev) +67: vfpclassss/d Vk,Wx,Ib (66),(ev) cc: sha1rnds4 Vdq,Wdq,Ib df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) f0: RORX Gy,Ey,Ib (F2),(v) @@ -927,8 +990,10 @@ GrpTable: Grp12 EndTable GrpTable: Grp13 +0: vprord/q Hx,Wx,Ib (66),(ev) +1: vprold/q Hx,Wx,Ib (66),(ev) 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) -4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) +4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo) 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) EndTable @@ -947,7 +1012,7 @@ GrpTable: Grp15 4: XSAVE 5: XRSTOR | lfence (11B) 6: XSAVEOPT | clwb (66) | mfence (11B) -7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) +7: clflush | clflushopt (66) | sfence (11B) EndTable GrpTable: Grp16 @@ -963,6 +1028,20 @@ GrpTable: Grp17 3: BLSI By,Ey (v) EndTable +GrpTable: Grp18 +1: vgatherpf0dps/d Wx (66),(ev) +2: vgatherpf1dps/d Wx (66),(ev) +5: vscatterpf0dps/d Wx (66),(ev) +6: vscatterpf1dps/d Wx (66),(ev) +EndTable + +GrpTable: Grp19 +1: vgatherpf0qps/d Wx (66),(ev) +2: vgatherpf1qps/d Wx (66),(ev) +5: vscatterpf0qps/d Wx (66),(ev) +6: vscatterpf1qps/d Wx (66),(ev) +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 4ed30f45c6da..bd09d0effef8 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -107,6 +107,12 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file, insn->offset < func->offset + func->len; \ insn = list_next_entry(insn, list)) +#define func_for_each_insn_continue_reverse(file, func, insn) \ + for (insn = list_prev_entry(insn, list); \ + &insn->list != &file->insn_list && \ + insn->sec == func->sec && insn->offset >= func->offset; \ + insn = list_prev_entry(insn, list)) + #define sec_for_each_insn_from(file, insn) \ for (; insn; insn = next_insn_same_sec(file, insn)) @@ -123,10 +129,14 @@ static bool ignore_func(struct objtool_file *file, struct symbol *func) /* check for STACK_FRAME_NON_STANDARD */ if (file->whitelist && file->whitelist->rela) - list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) - if (rela->sym->sec == func->sec && + list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) { + if (rela->sym->type == STT_SECTION && + rela->sym->sec == func->sec && rela->addend == func->offset) return true; + if (rela->sym->type == STT_FUNC && rela->sym == func) + return true; + } /* check if it has a context switching instruction */ func_for_each_insn(file, func, insn) @@ -660,65 +670,95 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func, return 0; } -static int add_func_switch_tables(struct objtool_file *file, - struct symbol *func) +/* + * find_switch_table() - Given a dynamic jump, find the switch jump table in + * .rodata associated with it. + * + * There are 3 basic patterns: + * + * 1. jmpq *[rodata addr](,%reg,8) + * + * This is the most common case by far. It jumps to an address in a simple + * jump table which is stored in .rodata. + * + * 2. jmpq *[rodata addr](%rip) + * + * This is caused by a rare GCC quirk, currently only seen in three driver + * functions in the kernel, only with certain obscure non-distro configs. + * + * As part of an optimization, GCC makes a copy of an existing switch jump + * table, modifies it, and then hard-codes the jump (albeit with an indirect + * jump) to use a single entry in the table. The rest of the jump table and + * some of its jump targets remain as dead code. + * + * In such a case we can just crudely ignore all unreachable instruction + * warnings for the entire object file. Ideally we would just ignore them + * for the function, but that would require redesigning the code quite a + * bit. And honestly that's just not worth doing: unreachable instruction + * warnings are of questionable value anyway, and this is such a rare issue. + * + * 3. mov [rodata addr],%reg1 + * ... some instructions ... + * jmpq *(%reg1,%reg2,8) + * + * This is a fairly uncommon pattern which is new for GCC 6. As of this + * writing, there are 11 occurrences of it in the allmodconfig kernel. + * + * TODO: Once we have DWARF CFI and smarter instruction decoding logic, + * ensure the same register is used in the mov and jump instructions. + */ +static struct rela *find_switch_table(struct objtool_file *file, + struct symbol *func, + struct instruction *insn) { - struct instruction *insn, *prev_jump; - struct rela *text_rela, *rodata_rela, *prev_rela = NULL; - int ret; + struct rela *text_rela, *rodata_rela; - prev_jump = NULL; + text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); + if (text_rela && text_rela->sym == file->rodata->sym) { + /* case 1 */ + rodata_rela = find_rela_by_dest(file->rodata, + text_rela->addend); + if (rodata_rela) + return rodata_rela; - func_for_each_insn(file, func, insn) { - if (insn->type != INSN_JUMP_DYNAMIC) - continue; + /* case 2 */ + rodata_rela = find_rela_by_dest(file->rodata, + text_rela->addend + 4); + if (!rodata_rela) + return NULL; + file->ignore_unreachables = true; + return rodata_rela; + } + + /* case 3 */ + func_for_each_insn_continue_reverse(file, func, insn) { + if (insn->type == INSN_JUMP_UNCONDITIONAL || + insn->type == INSN_JUMP_DYNAMIC) + break; text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); - if (!text_rela || text_rela->sym != file->rodata->sym) - continue; + if (text_rela && text_rela->sym == file->rodata->sym) + return find_rela_by_dest(file->rodata, + text_rela->addend); + } - /* common case: jmpq *[addr](,%rax,8) */ - rodata_rela = find_rela_by_dest(file->rodata, - text_rela->addend); + return NULL; +} - /* - * rare case: jmpq *[addr](%rip) - * - * This check is for a rare gcc quirk, currently only seen in - * three driver functions in the kernel, only with certain - * obscure non-distro configs. - * - * As part of an optimization, gcc makes a copy of an existing - * switch jump table, modifies it, and then hard-codes the jump - * (albeit with an indirect jump) to use a single entry in the - * table. The rest of the jump table and some of its jump - * targets remain as dead code. - * - * In such a case we can just crudely ignore all unreachable - * instruction warnings for the entire object file. Ideally we - * would just ignore them for the function, but that would - * require redesigning the code quite a bit. And honestly - * that's just not worth doing: unreachable instruction - * warnings are of questionable value anyway, and this is such - * a rare issue. - * - * kbuild reports: - * - https://lkml.kernel.org/r/201603231906.LWcVUpxm%25fengguang.wu@intel.com - * - https://lkml.kernel.org/r/201603271114.K9i45biy%25fengguang.wu@intel.com - * - https://lkml.kernel.org/r/201603291058.zuJ6ben1%25fengguang.wu@intel.com - * - * gcc bug: - * - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70604 - */ - if (!rodata_rela) { - rodata_rela = find_rela_by_dest(file->rodata, - text_rela->addend + 4); - if (rodata_rela) - file->ignore_unreachables = true; - } +static int add_func_switch_tables(struct objtool_file *file, + struct symbol *func) +{ + struct instruction *insn, *prev_jump = NULL; + struct rela *rela, *prev_rela = NULL; + int ret; - if (!rodata_rela) + func_for_each_insn(file, func, insn) { + if (insn->type != INSN_JUMP_DYNAMIC) + continue; + + rela = find_switch_table(file, func, insn); + if (!rela) continue; /* @@ -728,13 +768,13 @@ static int add_func_switch_tables(struct objtool_file *file, */ if (prev_jump) { ret = add_switch_table(file, func, prev_jump, prev_rela, - rodata_rela); + rela); if (ret) return ret; } prev_jump = insn; - prev_rela = rodata_rela; + prev_rela = rela; } if (prev_jump) { diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 8a61372bb47a..5bd7b9260cc0 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -2,3 +2,5 @@ libperf-y += header.o libperf-y += kvm-stat.o libperf-$(CONFIG_DWARF) += dwarf-regs.o + +libperf-y += machine.o diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c new file mode 100644 index 000000000000..b9a95a1a8e69 --- /dev/null +++ b/tools/perf/arch/s390/util/machine.c @@ -0,0 +1,19 @@ +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include "util.h" +#include "machine.h" +#include "api/fs/fs.h" + +int arch__fix_module_text_start(u64 *start, const char *name) +{ + char path[PATH_MAX]; + + snprintf(path, PATH_MAX, "module/%.*s/sections/.text", + (int)strlen(name) - 2, name + 1); + + if (sysfs__read_ull(path, (unsigned long long *)start) < 0) + return -1; + + return 0; +} diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index 3918dd52e903..0f196eec9f48 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -1664,5 +1664,3 @@ "0f c7 1d 78 56 34 12 \txrstors 0x12345678",}, {{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",}, -{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "", -"66 0f ae f8 \tpcommit ",}, diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index 9c8c61e06d5a..af25bc8240d0 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -1696,5 +1696,3 @@ "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",}, {{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", "41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",}, -{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "", -"66 0f ae f8 \tpcommit ",}, diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index 76e0ec379c8b..979487dae8d4 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -2655,10 +2655,6 @@ int main(void) #endif /* #ifndef __x86_64__ */ - /* pcommit */ - - asm volatile("pcommit"); - /* Following line is a marker for the awk script - do not change */ asm volatile("rdtsc"); /* Stop here */ diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index b1d491c2e704..fdde1bd3e306 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -608,6 +608,7 @@ static const struct { const char *compact; } gfp_compact_table[] = { { "GFP_TRANSHUGE", "THP" }, + { "GFP_TRANSHUGE_LIGHT", "THL" }, { "GFP_HIGHUSER_MOVABLE", "HUM" }, { "GFP_HIGHUSER", "HU" }, { "GFP_USER", "U" }, diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py index 4d21ef2d601d..4c6f09ac7d12 100644 --- a/tools/perf/scripts/python/netdev-times.py +++ b/tools/perf/scripts/python/netdev-times.py @@ -252,9 +252,10 @@ def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, callchain, i event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret) all_event_list.append(event_info) -def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi, dev_name): +def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi, + dev_name, work=None, budget=None): event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, - napi, dev_name) + napi, dev_name, work, budget) all_event_list.append(event_info) def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr, @@ -354,11 +355,13 @@ def handle_irq_softirq_exit(event_info): receive_hunk_list.append(rec_data) def handle_napi_poll(event_info): - (name, context, cpu, time, pid, comm, napi, dev_name) = event_info + (name, context, cpu, time, pid, comm, napi, dev_name, + work, budget) = event_info if cpu in net_rx_dic.keys(): event_list = net_rx_dic[cpu]['event_list'] rec_data = {'event_name':'napi_poll', - 'dev':dev_name, 'event_t':time} + 'dev':dev_name, 'event_t':time, + 'work':work, 'budget':budget} event_list.append(rec_data) def handle_netif_rx(event_info): diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2fa7d8b69873..91c5f6e1af59 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -70,7 +70,6 @@ libperf-y += stat.o libperf-y += stat-shadow.o libperf-y += record.o libperf-y += srcline.o -libperf-y += str_error_r.o libperf-y += data.o libperf-y += tsc.o libperf-y += cloexec.o @@ -176,10 +175,6 @@ $(OUTPUT)util/libstring.o: ../lib/string.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -$(OUTPUT)util/str_error_r.o: ../lib/str_error_r.c FORCE - $(call rule_mkdir) - $(call if_changed_dep,cc_o_c) - $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt index ec378cd7b71e..767be7c76034 100644 --- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt @@ -1012,7 +1012,7 @@ GrpTable: Grp15 4: XSAVE 5: XRSTOR | lfence (11B) 6: XSAVEOPT | clwb (66) | mfence (11B) -7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) +7: clflush | clflushopt (66) | sfence (11B) EndTable GrpTable: Grp16 diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index bc2cdbd09a25..cb6388dbdd98 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1093,12 +1093,20 @@ static int machine__set_modules_path(struct machine *machine) return map_groups__set_modules_path_dir(&machine->kmaps, modules_path, 0); } +int __weak arch__fix_module_text_start(u64 *start __maybe_unused, + const char *name __maybe_unused) +{ + return 0; +} static int machine__create_module(void *arg, const char *name, u64 start) { struct machine *machine = arg; struct map *map; + if (arch__fix_module_text_start(&start, name) < 0) + return -1; + map = machine__findnew_module_map(machine, start, name); if (map == NULL) return -1; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 41ac9cfd416b..20739f746bc4 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -216,6 +216,7 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine, struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename); +int arch__fix_module_text_start(u64 *start, const char *name); int __machine__load_kallsyms(struct machine *machine, const char *filename, enum map_type type, bool no_kcore, symbol_filter_t filter); diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 5065ec98049c..b7d4f4aeee61 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -13,7 +13,6 @@ util/cpumap.c ../lib/bitmap.c ../lib/find_bit.c ../lib/hweight.c -../lib/str_error_r.c ../lib/vsprintf.c util/thread_map.c util/util.c diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index 552af68d5414..a538ff44b108 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -54,9 +54,10 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM} # to something more interesting, like "arm-linux-". If you want # to compile vs uClibc, that can be done here as well. CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- -CC = $(CROSS)gcc -LD = $(CROSS)gcc -STRIP = $(CROSS)strip +CROSS_COMPILE ?= $(CROSS) +CC = $(CROSS_COMPILE)gcc +LD = $(CROSS_COMPILE)gcc +STRIP = $(CROSS_COMPILE)strip HOSTCC = gcc # check if compiler option is supported diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index e367b1a85d70..8561e7ddca59 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -1,7 +1,7 @@ CC = $(CROSS_COMPILE)gcc BUILD_OUTPUT := $(CURDIR) -PREFIX := /usr -DESTDIR := +PREFIX ?= /usr +DESTDIR ?= ifeq ("$(origin O)", "command line") BUILD_OUTPUT := $(O) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 89a55d5e32f3..492e84fbebfa 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -123,7 +123,7 @@ cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300 35 * 100 = 3500 MHz TSC frequency cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled) cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0) -cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 +cpu0: MSR_TURBO_RATIO_LIMIT: 0x25262727 37 * 100 = 3700 MHz max turbo 4 active cores 38 * 100 = 3800 MHz max turbo 3 active cores 39 * 100 = 3900 MHz max turbo 2 active cores diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index acbf7ff2ee6e..3e199b508a96 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1480,7 +1480,7 @@ dump_knl_turbo_ratio_limits(void) unsigned int cores[buckets_no]; unsigned int ratio[buckets_no]; - get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 785985677159..ad6dd0543019 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -11,12 +11,14 @@ ldflags-y += --wrap=__devm_release_region ldflags-y += --wrap=__request_region ldflags-y += --wrap=__release_region ldflags-y += --wrap=devm_memremap_pages -ldflags-y += --wrap=phys_to_pfn_t +ldflags-y += --wrap=insert_resource +ldflags-y += --wrap=remove_resource DRIVERS := ../../../drivers NVDIMM_SRC := $(DRIVERS)/nvdimm -ACPI_SRC := $(DRIVERS)/acpi +ACPI_SRC := $(DRIVERS)/acpi/nfit DAX_SRC := $(DRIVERS)/dax +ccflags-y := -I$(src)/$(NVDIMM_SRC)/ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o @@ -27,10 +29,12 @@ obj-$(CONFIG_ACPI_NFIT) += nfit.o obj-$(CONFIG_DEV_DAX) += dax.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o -nfit-y := $(ACPI_SRC)/nfit.o +nfit-y := $(ACPI_SRC)/core.o +nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o nfit-y += config_check.o nd_pmem-y := $(NVDIMM_SRC)/pmem.o +nd_pmem-y += pmem-dax.o nd_pmem-y += config_check.o nd_btt-y := $(NVDIMM_SRC)/btt.o diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c index adf18bfeca00..878daf3429e8 100644 --- a/tools/testing/nvdimm/config_check.c +++ b/tools/testing/nvdimm/config_check.c @@ -10,6 +10,7 @@ void check(void) BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM)); BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX)); diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c new file mode 100644 index 000000000000..c9b8c48f85fc --- /dev/null +++ b/tools/testing/nvdimm/pmem-dax.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2014-2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include "test/nfit_test.h" +#include <linux/blkdev.h> +#include <pmem.h> +#include <nd.h> + +long pmem_direct_access(struct block_device *bdev, sector_t sector, + void **kaddr, pfn_t *pfn, long size) +{ + struct pmem_device *pmem = bdev->bd_queue->queuedata; + resource_size_t offset = sector * 512 + pmem->data_offset; + + if (unlikely(is_bad_pmem(&pmem->bb, sector, size))) + return -EIO; + + /* + * Limit dax to a single page at a time given vmalloc()-backed + * in the nfit_test case. + */ + if (get_nfit_res(pmem->phys_addr + offset)) { + struct page *page; + + *kaddr = pmem->virt_addr + offset; + page = vmalloc_to_page(pmem->virt_addr + offset); + *pfn = page_to_pfn_t(page); + dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent, + "%s: sector: %#llx pfn: %#lx\n", __func__, + (unsigned long long) sector, page_to_pfn(page)); + + return PAGE_SIZE; + } + + *kaddr = pmem->virt_addr + offset; + *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); + + /* + * If badblocks are present, limit known good range to the + * requested range. + */ + if (unlikely(pmem->bb.count)) + return size; + return pmem->size - pmem->pfn_pad - offset; +} diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild index 9241064970fe..d32f25bba42a 100644 --- a/tools/testing/nvdimm/test/Kbuild +++ b/tools/testing/nvdimm/test/Kbuild @@ -1,5 +1,5 @@ ccflags-y := -I$(src)/../../../../drivers/nvdimm/ -ccflags-y += -I$(src)/../../../../drivers/acpi/ +ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/ obj-m += nfit_test.o obj-m += nfit_test_iomap.o diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index c842095f2801..c29f8dca9e67 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -10,11 +10,13 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. */ +#include <linux/memremap.h> #include <linux/rculist.h> #include <linux/export.h> #include <linux/ioport.h> #include <linux/module.h> #include <linux/types.h> +#include <linux/pfn_t.h> #include <linux/io.h> #include <linux/mm.h> #include "nfit_test.h" @@ -52,7 +54,7 @@ static struct nfit_test_resource *__get_nfit_res(resource_size_t resource) return NULL; } -static struct nfit_test_resource *get_nfit_res(resource_size_t resource) +struct nfit_test_resource *get_nfit_res(resource_size_t resource) { struct nfit_test_resource *res; @@ -62,6 +64,7 @@ static struct nfit_test_resource *get_nfit_res(resource_size_t resource) return res; } +EXPORT_SYMBOL(get_nfit_res); void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, void __iomem *(*fallback_fn)(resource_size_t, unsigned long)) @@ -97,10 +100,6 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, } EXPORT_SYMBOL(__wrap_devm_memremap); -#ifdef __HAVE_ARCH_PTE_DEVMAP -#include <linux/memremap.h> -#include <linux/pfn_t.h> - void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap) { @@ -122,19 +121,6 @@ pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags) return phys_to_pfn_t(addr, flags); } EXPORT_SYMBOL(__wrap_phys_to_pfn_t); -#else -/* to be removed post 4.5-rc1 */ -void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res) -{ - resource_size_t offset = res->start; - struct nfit_test_resource *nfit_res = get_nfit_res(offset); - - if (nfit_res) - return nfit_res->buf + offset - nfit_res->res->start; - return devm_memremap_pages(dev, res); -} -EXPORT_SYMBOL(__wrap_devm_memremap_pages); -#endif void *__wrap_memremap(resource_size_t offset, size_t size, unsigned long flags) @@ -229,6 +215,22 @@ struct resource *__wrap___request_region(struct resource *parent, } EXPORT_SYMBOL(__wrap___request_region); +int __wrap_insert_resource(struct resource *parent, struct resource *res) +{ + if (get_nfit_res(res->start)) + return 0; + return insert_resource(parent, res); +} +EXPORT_SYMBOL(__wrap_insert_resource); + +int __wrap_remove_resource(struct resource *res) +{ + if (get_nfit_res(res->start)) + return 0; + return remove_resource(res); +} +EXPORT_SYMBOL(__wrap_remove_resource); + struct resource *__wrap___devm_request_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n, const char *name) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index c919866853a0..5404efa578a3 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -98,11 +98,13 @@ enum { NUM_PM = 3, NUM_DCR = 5, + NUM_HINTS = 8, NUM_BDW = NUM_DCR, NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, DIMM_SIZE = SZ_32M, LABEL_SIZE = SZ_128K, + SPA_VCD_SIZE = SZ_4M, SPA0_SIZE = DIMM_SIZE, SPA1_SIZE = DIMM_SIZE*2, SPA2_SIZE = DIMM_SIZE, @@ -470,11 +472,7 @@ static void release_nfit_res(void *data) list_del(&nfit_res->list); spin_unlock(&nfit_test_lock); - if (is_vmalloc_addr(nfit_res->buf)) - vfree(nfit_res->buf); - else - dma_free_coherent(nfit_res->dev, resource_size(res), - nfit_res->buf, res->start); + vfree(nfit_res->buf); kfree(res); kfree(nfit_res); } @@ -507,9 +505,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, return nfit_res->buf; err: - if (buf && !is_vmalloc_addr(buf)) - dma_free_coherent(dev, size, buf, *dma); - else if (buf) + if (buf) vfree(buf); kfree(res); kfree(nfit_res); @@ -524,15 +520,6 @@ static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma) return __test_alloc(t, size, dma, buf); } -static void *test_alloc_coherent(struct nfit_test *t, size_t size, - dma_addr_t *dma) -{ - struct device *dev = &t->pdev.dev; - void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL); - - return __test_alloc(t, size, dma, buf); -} - static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr) { int i; @@ -584,7 +571,8 @@ static int nfit_test0_alloc(struct nfit_test *t) + offsetof(struct acpi_nfit_control_region, window_size) * NUM_DCR + sizeof(struct acpi_nfit_data_region) * NUM_BDW - + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; + + (sizeof(struct acpi_nfit_flush_address) + + sizeof(u64) * NUM_HINTS) * NUM_DCR; int i; t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); @@ -592,15 +580,15 @@ static int nfit_test0_alloc(struct nfit_test *t) return -ENOMEM; t->nfit_size = nfit_size; - t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]); + t->spa_set[0] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[0]); if (!t->spa_set[0]) return -ENOMEM; - t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]); + t->spa_set[1] = test_alloc(t, SPA1_SIZE, &t->spa_set_dma[1]); if (!t->spa_set[1]) return -ENOMEM; - t->spa_set[2] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[2]); + t->spa_set[2] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[2]); if (!t->spa_set[2]) return -ENOMEM; @@ -614,7 +602,8 @@ static int nfit_test0_alloc(struct nfit_test *t) return -ENOMEM; sprintf(t->label[i], "label%d", i); - t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]); + t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS, + &t->flush_dma[i]); if (!t->flush[i]) return -ENOMEM; } @@ -630,7 +619,7 @@ static int nfit_test0_alloc(struct nfit_test *t) static int nfit_test1_alloc(struct nfit_test *t) { - size_t nfit_size = sizeof(struct acpi_nfit_system_address) + size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2 + sizeof(struct acpi_nfit_memory_map) + offsetof(struct acpi_nfit_control_region, window_size); @@ -639,15 +628,31 @@ static int nfit_test1_alloc(struct nfit_test *t) return -ENOMEM; t->nfit_size = nfit_size; - t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]); + t->spa_set[0] = test_alloc(t, SPA2_SIZE, &t->spa_set_dma[0]); if (!t->spa_set[0]) return -ENOMEM; + t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]); + if (!t->spa_set[1]) + return -ENOMEM; + return ars_state_init(&t->pdev.dev, &t->ars_state); } +static void dcr_common_init(struct acpi_nfit_control_region *dcr) +{ + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->valid_fields = 1; + dcr->manufacturing_location = 0xa; + dcr->manufacturing_date = cpu_to_be16(2016); +} + static void nfit_test0_setup(struct nfit_test *t) { + const int flush_hint_size = sizeof(struct acpi_nfit_flush_address) + + (sizeof(u64) * NUM_HINTS); struct acpi_nfit_desc *acpi_desc; struct acpi_nfit_memory_map *memdev; void *nfit_buf = t->nfit_buf; @@ -655,7 +660,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_control_region *dcr; struct acpi_nfit_data_region *bdw; struct acpi_nfit_flush_address *flush; - unsigned int offset; + unsigned int offset, i; /* * spa0 (interleave first half of dimm0 and dimm1, note storage @@ -972,9 +977,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); dcr->region_index = 0+1; - dcr->vendor_id = 0xabcd; - dcr->device_id = 0; - dcr->revision_id = 1; + dcr_common_init(dcr); dcr->serial_number = ~handle[0]; dcr->code = NFIT_FIC_BLK; dcr->windows = 1; @@ -989,9 +992,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); dcr->region_index = 1+1; - dcr->vendor_id = 0xabcd; - dcr->device_id = 0; - dcr->revision_id = 1; + dcr_common_init(dcr); dcr->serial_number = ~handle[1]; dcr->code = NFIT_FIC_BLK; dcr->windows = 1; @@ -1006,9 +1007,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); dcr->region_index = 2+1; - dcr->vendor_id = 0xabcd; - dcr->device_id = 0; - dcr->revision_id = 1; + dcr_common_init(dcr); dcr->serial_number = ~handle[2]; dcr->code = NFIT_FIC_BLK; dcr->windows = 1; @@ -1023,9 +1022,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); dcr->region_index = 3+1; - dcr->vendor_id = 0xabcd; - dcr->device_id = 0; - dcr->revision_id = 1; + dcr_common_init(dcr); dcr->serial_number = ~handle[3]; dcr->code = NFIT_FIC_BLK; dcr->windows = 1; @@ -1042,9 +1039,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); dcr->region_index = 4+1; - dcr->vendor_id = 0xabcd; - dcr->device_id = 0; - dcr->revision_id = 1; + dcr_common_init(dcr); dcr->serial_number = ~handle[0]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; @@ -1056,9 +1051,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); dcr->region_index = 5+1; - dcr->vendor_id = 0xabcd; - dcr->device_id = 0; - dcr->revision_id = 1; + dcr_common_init(dcr); dcr->serial_number = ~handle[1]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; @@ -1070,9 +1063,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); dcr->region_index = 6+1; - dcr->vendor_id = 0xabcd; - dcr->device_id = 0; - dcr->revision_id = 1; + dcr_common_init(dcr); dcr->serial_number = ~handle[2]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; @@ -1084,9 +1075,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); dcr->region_index = 7+1; - dcr->vendor_id = 0xabcd; - dcr->device_id = 0; - dcr->revision_id = 1; + dcr_common_init(dcr); dcr->serial_number = ~handle[3]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; @@ -1141,45 +1130,47 @@ static void nfit_test0_setup(struct nfit_test *t) /* flush0 (dimm0) */ flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; - flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->header.length = flush_hint_size; flush->device_handle = handle[0]; - flush->hint_count = 1; - flush->hint_address[0] = t->flush_dma[0]; + flush->hint_count = NUM_HINTS; + for (i = 0; i < NUM_HINTS; i++) + flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64); /* flush1 (dimm1) */ - flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1; + flush = nfit_buf + offset + flush_hint_size * 1; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; - flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->header.length = flush_hint_size; flush->device_handle = handle[1]; - flush->hint_count = 1; - flush->hint_address[0] = t->flush_dma[1]; + flush->hint_count = NUM_HINTS; + for (i = 0; i < NUM_HINTS; i++) + flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64); /* flush2 (dimm2) */ - flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2; + flush = nfit_buf + offset + flush_hint_size * 2; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; - flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->header.length = flush_hint_size; flush->device_handle = handle[2]; - flush->hint_count = 1; - flush->hint_address[0] = t->flush_dma[2]; + flush->hint_count = NUM_HINTS; + for (i = 0; i < NUM_HINTS; i++) + flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64); /* flush3 (dimm3) */ - flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3; + flush = nfit_buf + offset + flush_hint_size * 3; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; - flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->header.length = flush_hint_size; flush->device_handle = handle[3]; - flush->hint_count = 1; - flush->hint_address[0] = t->flush_dma[3]; + flush->hint_count = NUM_HINTS; + for (i = 0; i < NUM_HINTS; i++) + flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); if (t->setup_hotplug) { - offset = offset + sizeof(struct acpi_nfit_flush_address) * 4; + offset = offset + flush_hint_size * 4; /* dcr-descriptor4: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = sizeof(struct acpi_nfit_control_region); dcr->region_index = 8+1; - dcr->vendor_id = 0xabcd; - dcr->device_id = 0; - dcr->revision_id = 1; + dcr_common_init(dcr); dcr->serial_number = ~handle[4]; dcr->code = NFIT_FIC_BLK; dcr->windows = 1; @@ -1196,9 +1187,7 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); dcr->region_index = 9+1; - dcr->vendor_id = 0xabcd; - dcr->device_id = 0; - dcr->revision_id = 1; + dcr_common_init(dcr); dcr->serial_number = ~handle[4]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; @@ -1300,10 +1289,12 @@ static void nfit_test0_setup(struct nfit_test *t) /* flush3 (dimm4) */ flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; - flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->header.length = flush_hint_size; flush->device_handle = handle[4]; - flush->hint_count = 1; - flush->hint_address[0] = t->flush_dma[4]; + flush->hint_count = NUM_HINTS; + for (i = 0; i < NUM_HINTS; i++) + flush->hint_address[i] = t->flush_dma[4] + + i * sizeof(u64); } post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE); @@ -1339,7 +1330,16 @@ static void nfit_test1_setup(struct nfit_test *t) spa->address = t->spa_set_dma[0]; spa->length = SPA2_SIZE; - offset += sizeof(*spa); + /* virtual cd region */ + spa = nfit_buf + sizeof(*spa); + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16); + spa->range_index = 0; + spa->address = t->spa_set_dma[1]; + spa->length = SPA_VCD_SIZE; + + offset += sizeof(*spa) * 2; /* mem-region0 (spa0, dimm0) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -1365,9 +1365,7 @@ static void nfit_test1_setup(struct nfit_test *t) dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); dcr->region_index = 0+1; - dcr->vendor_id = 0xabcd; - dcr->device_id = 0; - dcr->revision_id = 1; + dcr_common_init(dcr); dcr->serial_number = ~0; dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; @@ -1462,20 +1460,16 @@ static int nfit_test_probe(struct platform_device *pdev) nfit_test->setup(nfit_test); acpi_desc = &nfit_test->acpi_desc; acpi_nfit_desc_init(acpi_desc, &pdev->dev); - acpi_desc->nfit = nfit_test->nfit_buf; acpi_desc->blk_do_io = nfit_test_blk_do_io; nd_desc = &acpi_desc->nd_desc; nd_desc->provider_name = NULL; + nd_desc->module = THIS_MODULE; nd_desc->ndctl = nfit_test_ctl; - acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc); - if (!acpi_desc->nvdimm_bus) - return -ENXIO; - rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); - if (rc) { - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf, + nfit_test->nfit_size); + if (rc) return rc; - } if (nfit_test->setup != nfit_test0_setup) return 0; @@ -1483,22 +1477,16 @@ static int nfit_test_probe(struct platform_device *pdev) nfit_test->setup_hotplug = 1; nfit_test->setup(nfit_test); - rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); - if (rc) { - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf, + nfit_test->nfit_size); + if (rc) return rc; - } return 0; } static int nfit_test_remove(struct platform_device *pdev) { - struct nfit_test *nfit_test = to_nfit_test(&pdev->dev); - struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc; - - nvdimm_bus_unregister(acpi_desc->nvdimm_bus); - return 0; } @@ -1523,12 +1511,6 @@ static struct platform_driver nfit_test_driver = { .id_table = nfit_test_id, }; -#ifdef CONFIG_CMA_SIZE_MBYTES -#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES -#else -#define CMA_SIZE_MBYTES 0 -#endif - static __init int nfit_test_init(void) { int rc, i; @@ -1538,7 +1520,6 @@ static __init int nfit_test_init(void) for (i = 0; i < NUM_NFITS; i++) { struct nfit_test *nfit_test; struct platform_device *pdev; - static int once; nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL); if (!nfit_test) { @@ -1577,20 +1558,6 @@ static __init int nfit_test_init(void) goto err_register; instances[i] = nfit_test; - - if (!once++) { - dma_addr_t dma; - void *buf; - - buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma, - GFP_KERNEL); - if (!buf) { - rc = -ENOMEM; - dev_warn(&pdev->dev, "need 128M of free cma\n"); - goto err_register; - } - dma_free_coherent(&pdev->dev, SZ_128M, buf, dma); - } } rc = platform_driver_register(&nfit_test_driver); diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 96c5e16d7db9..9f18e2a4a862 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -12,6 +12,7 @@ */ #ifndef __NFIT_TEST_H__ #define __NFIT_TEST_H__ +#include <linux/list.h> struct nfit_test_resource { struct list_head list; @@ -26,4 +27,5 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset, void __wrap_iounmap(volatile void __iomem *addr); void nfit_test_setup(nfit_test_lookup_fn lookup); void nfit_test_teardown(void); +struct nfit_test_resource *get_nfit_res(resource_size_t resource); #endif diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c index b7447ceb75e9..b0ac05741750 100644 --- a/tools/testing/radix-tree/tag_check.c +++ b/tools/testing/radix-tree/tag_check.c @@ -122,7 +122,7 @@ enum { NODE_TAGGED = 2, }; -#define THRASH_SIZE 1000 * 1000 +#define THRASH_SIZE (1000 * 1000) #define N 127 #define BATCH 33 diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh index b325470c01b3..1426a9b97494 100644 --- a/tools/testing/selftests/rcutorture/bin/functions.sh +++ b/tools/testing/selftests/rcutorture/bin/functions.sh @@ -99,8 +99,9 @@ configfrag_hotplug_cpu () { # identify_boot_image qemu-cmd # # Returns the relative path to the kernel build image. This will be -# arch/<arch>/boot/bzImage unless overridden with the TORTURE_BOOT_IMAGE -# environment variable. +# arch/<arch>/boot/bzImage or vmlinux if bzImage is not a target for the +# architecture, unless overridden with the TORTURE_BOOT_IMAGE environment +# variable. identify_boot_image () { if test -n "$TORTURE_BOOT_IMAGE" then @@ -110,11 +111,8 @@ identify_boot_image () { qemu-system-x86_64|qemu-system-i386) echo arch/x86/boot/bzImage ;; - qemu-system-ppc64) - echo arch/powerpc/boot/bzImage - ;; *) - echo "" + echo vmlinux ;; esac fi @@ -175,7 +173,7 @@ identify_qemu_args () { qemu-system-x86_64|qemu-system-i386) ;; qemu-system-ppc64) - echo -enable-kvm -M pseries -cpu POWER7 -nodefaults + echo -enable-kvm -M pseries -nodefaults echo -device spapr-vscsi if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC" then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 4109f306d855..ea6e373edc27 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -8,9 +8,9 @@ # # Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args # -# qemu-args defaults to "-enable-kvm -soundhw pcspk -nographic", along with -# arguments specifying the number of CPUs and other -# options generated from the underlying CPU architecture. +# qemu-args defaults to "-enable-kvm -nographic", along with arguments +# specifying the number of CPUs and other options +# generated from the underlying CPU architecture. # boot_args defaults to value returned by the per_version_boot_params # shell function. # @@ -96,7 +96,8 @@ if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdi then # Rerunning previous test, so use that test's kernel. QEMU="`identify_qemu $base_resdir/vmlinux`" - KERNEL=$base_resdir/bzImage + BOOT_IMAGE="`identify_boot_image $QEMU`" + KERNEL=$base_resdir/${BOOT_IMAGE##*/} # use the last component of ${BOOT_IMAGE} ln -s $base_resdir/Make*.out $resdir # for kvm-recheck.sh ln -s $base_resdir/.config $resdir # for kvm-recheck.sh elif kvm-build.sh $config_template $builddir $T @@ -110,7 +111,7 @@ then if test -n "$BOOT_IMAGE" then cp $builddir/$BOOT_IMAGE $resdir - KERNEL=$resdir/bzImage + KERNEL=$resdir/${BOOT_IMAGE##*/} else echo No identifiable boot image, not running KVM, see $resdir. echo Do the torture scripts know about your architecture? @@ -147,7 +148,7 @@ then fi # Generate -smp qemu argument. -qemu_args="-enable-kvm -soundhw pcspk -nographic $qemu_args" +qemu_args="-enable-kvm -nographic $qemu_args" cpu_count=`configNR_CPUS.sh $config_template` cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"` vcpus=`identify_qemu_vcpus` @@ -229,6 +230,7 @@ fi if test $commandcompleted -eq 0 -a -n "$qemu_pid" then echo Grace period for qemu job at pid $qemu_pid + oldline="`tail $resdir/console.log`" while : do kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` @@ -238,13 +240,29 @@ then else break fi - if test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE)) + must_continue=no + newline="`tail $resdir/console.log`" + if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : ' + then + must_continue=yes + fi + last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`" + if test -z "last_ts" + then + last_ts=0 + fi + if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) + then + must_continue=yes + fi + if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE)) then echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1 kill -KILL $qemu_pid break fi - sleep 1 + oldline=$newline + sleep 10 done elif test -z "$qemu_pid" then diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 0d598145873e..0aed965f0062 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -48,7 +48,7 @@ resdir="" configs="" cpus=0 ds=`date +%Y.%m.%d-%H:%M:%S` -jitter=0 +jitter="-1" . functions.sh diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 5eb49b7f864c..08aa7d50ae0e 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -33,7 +33,7 @@ if grep -Pq '\x00' < $file then print_warning Console output contains nul bytes, old qemu still running? fi -egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags +egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags if test -s $1.diags then print_warning Assertion failure in $file $title @@ -69,6 +69,11 @@ then then summary="$summary Stalls: $n_stalls" fi + n_starves=`grep -c 'rcu_.*kthread starved for' $1` + if test "$n_starves" -ne 0 + then + summary="$summary Starves: $n_starves" + fi print_warning Summary: $summary else rm $1.diags diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt index 4170e714f044..833f826d6ec2 100644 --- a/tools/testing/selftests/rcutorture/doc/initrd.txt +++ b/tools/testing/selftests/rcutorture/doc/initrd.txt @@ -13,6 +13,22 @@ cd initrd cpio -id < /tmp/initrd.img.zcat ------------------------------------------------------------------------ +Another way to create an initramfs image is using "dracut"[1], which is +available on many distros, however the initramfs dracut generates is a cpio +archive with another cpio archive in it, so an extra step is needed to create +the initrd directory hierarchy. + +Here are the commands to create a initrd directory for rcutorture using +dracut: + +------------------------------------------------------------------------ +dracut --no-hostonly --no-hostonly-cmdline --module "base bash shutdown" /tmp/initramfs.img +cd tools/testing/selftests/rcutorture +mkdir initrd +cd initrd +/usr/lib/dracut/skipcpio /tmp/initramfs.img | zcat | cpio -id < /tmp/initramfs.img +------------------------------------------------------------------------ + Interestingly enough, if you are running rcutorture, you don't really need userspace in many cases. Running without userspace has the advantage of allowing you to test your kernel independently of the @@ -89,3 +105,9 @@ while : do sleep 10 done +------------------------------------------------------------------------ + +References: +[1]: https://dracut.wiki.kernel.org/index.php/Main_Page +[2]: http://blog.elastocloud.org/2015/06/rapid-linux-kernel-devtest-with-qemu.html +[3]: https://www.centos.org/forums/viewtopic.php?t=51621 diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 2e58549b2f02..03f1fa495d74 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1021,8 +1021,8 @@ void tracer_stop(int sig) typedef void tracer_func_t(struct __test_metadata *_metadata, pid_t tracee, int status, void *args); -void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, - tracer_func_t tracer_func, void *args) +void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, + tracer_func_t tracer_func, void *args, bool ptrace_syscall) { int ret = -1; struct sigaction action = { @@ -1042,12 +1042,16 @@ void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, /* Wait for attach stop */ wait(NULL); - ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP); + ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ? + PTRACE_O_TRACESYSGOOD : + PTRACE_O_TRACESECCOMP); ASSERT_EQ(0, ret) { TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); kill(tracee, SIGKILL); } - ptrace(PTRACE_CONT, tracee, NULL, 0); + ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, + tracee, NULL, 0); + ASSERT_EQ(0, ret); /* Unblock the tracee */ ASSERT_EQ(1, write(fd, "A", 1)); @@ -1063,12 +1067,13 @@ void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, /* Child is dead. Time to go. */ return; - /* Make sure this is a seccomp event. */ - ASSERT_EQ(true, IS_SECCOMP_EVENT(status)); + /* Check if this is a seccomp event. */ + ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status)); tracer_func(_metadata, tracee, status, args); - ret = ptrace(PTRACE_CONT, tracee, NULL, NULL); + ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, + tracee, NULL, 0); ASSERT_EQ(0, ret); } /* Directly report the status of our test harness results. */ @@ -1079,7 +1084,7 @@ void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, void cont_handler(int num) { } pid_t setup_trace_fixture(struct __test_metadata *_metadata, - tracer_func_t func, void *args) + tracer_func_t func, void *args, bool ptrace_syscall) { char sync; int pipefd[2]; @@ -1095,7 +1100,8 @@ pid_t setup_trace_fixture(struct __test_metadata *_metadata, signal(SIGALRM, cont_handler); if (tracer_pid == 0) { close(pipefd[0]); - tracer(_metadata, pipefd[1], tracee, func, args); + start_tracer(_metadata, pipefd[1], tracee, func, args, + ptrace_syscall); syscall(__NR_exit, 0); } close(pipefd[1]); @@ -1177,7 +1183,7 @@ FIXTURE_SETUP(TRACE_poke) /* Launch tracer. */ self->tracer = setup_trace_fixture(_metadata, tracer_poke, - &self->tracer_args); + &self->tracer_args, false); } FIXTURE_TEARDOWN(TRACE_poke) @@ -1399,6 +1405,29 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, } +void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, + int status, void *args) +{ + int ret, nr; + unsigned long msg; + static bool entry; + + /* Make sure we got an empty message. */ + ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, msg); + + /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */ + entry = !entry; + if (!entry) + return; + + nr = get_syscall(_metadata, tracee); + + if (nr == __NR_getpid) + change_syscall(_metadata, tracee, __NR_getppid); +} + FIXTURE_DATA(TRACE_syscall) { struct sock_fprog prog; pid_t tracer, mytid, mypid, parent; @@ -1440,7 +1469,8 @@ FIXTURE_SETUP(TRACE_syscall) ASSERT_NE(self->parent, self->mypid); /* Launch tracer. */ - self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL); + self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL, + false); } FIXTURE_TEARDOWN(TRACE_syscall) @@ -1500,6 +1530,130 @@ TEST_F(TRACE_syscall, syscall_dropped) EXPECT_NE(self->mytid, syscall(__NR_gettid)); } +TEST_F(TRACE_syscall, skip_after_RET_TRACE) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + /* Install fixture filter. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* Install "errno on getppid" filter. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + /* Tracer will redirect getpid to getppid, and we should see EPERM. */ + EXPECT_EQ(-1, syscall(__NR_getpid)); + EXPECT_EQ(EPERM, errno); +} + +TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + /* Install fixture filter. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* Install "death on getppid" filter. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + /* Tracer will redirect getpid to getppid, and we should die. */ + EXPECT_NE(self->mypid, syscall(__NR_getpid)); +} + +TEST_F(TRACE_syscall, skip_after_ptrace) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + /* Install "errno on getppid" filter. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + /* Tracer will redirect getpid to getppid, and we should see EPERM. */ + EXPECT_EQ(-1, syscall(__NR_getpid)); + EXPECT_EQ(EPERM, errno); +} + +TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + /* Install "death on getppid" filter. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + /* Tracer will redirect getpid to getppid, and we should die. */ + EXPECT_NE(self->mypid, syscall(__NR_getpid)); +} + #ifndef __NR_seccomp # if defined(__i386__) # define __NR_seccomp 354 diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index c73425de3cfe..4f747ee07f10 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -4,8 +4,8 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \ - check_initial_reg_state sigreturn ldt_gdt iopl +TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ + check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h new file mode 100644 index 000000000000..9230981f2e12 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-debug.h @@ -0,0 +1,14 @@ +#ifndef _MPX_DEBUG_H +#define _MPX_DEBUG_H + +#ifndef DEBUG_LEVEL +#define DEBUG_LEVEL 0 +#endif +#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0) +#define dprintf1(args...) dprintf_level(1, args) +#define dprintf2(args...) dprintf_level(2, args) +#define dprintf3(args...) dprintf_level(3, args) +#define dprintf4(args...) dprintf_level(4, args) +#define dprintf5(args...) dprintf_level(5, args) + +#endif /* _MPX_DEBUG_H */ diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c new file mode 100644 index 000000000000..ce85356d7e2e --- /dev/null +++ b/tools/testing/selftests/x86/mpx-dig.c @@ -0,0 +1,498 @@ +/* + * Written by Dave Hansen <dave.hansen@intel.com> + */ + +#include <stdlib.h> +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <sys/mman.h> +#include <string.h> +#include <fcntl.h> +#include "mpx-debug.h" +#include "mpx-mm.h" +#include "mpx-hw.h" + +unsigned long bounds_dir_global; + +#define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__) +static void inline __mpx_dig_abort(const char *file, const char *func, int line) +{ + fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func); + printf("MPX dig abort @ %s::%d in %s()\n", file, line, func); + abort(); +} + +/* + * run like this (BDIR finds the probably bounds directory): + * + * BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \ + * | head -1 | awk -F- '{print $1}')"; + * ./mpx-dig $pid 0x$BDIR + * + * NOTE: + * assumes that the only 2097152-kb VMA is the bounds dir + */ + +long nr_incore(void *ptr, unsigned long size_bytes) +{ + int i; + long ret = 0; + long vec_len = size_bytes / PAGE_SIZE; + unsigned char *vec = malloc(vec_len); + int incore_ret; + + if (!vec) + mpx_dig_abort(); + + incore_ret = mincore(ptr, size_bytes, vec); + if (incore_ret) { + printf("mincore ret: %d\n", incore_ret); + perror("mincore"); + mpx_dig_abort(); + } + for (i = 0; i < vec_len; i++) + ret += vec[i]; + free(vec); + return ret; +} + +int open_proc(int pid, char *file) +{ + static char buf[100]; + int fd; + + snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file); + fd = open(&buf[0], O_RDONLY); + if (fd < 0) + perror(buf); + + return fd; +} + +struct vaddr_range { + unsigned long start; + unsigned long end; +}; +struct vaddr_range *ranges; +int nr_ranges_allocated; +int nr_ranges_populated; +int last_range = -1; + +int __pid_load_vaddrs(int pid) +{ + int ret = 0; + int proc_maps_fd = open_proc(pid, "maps"); + char linebuf[10000]; + unsigned long start; + unsigned long end; + char rest[1000]; + FILE *f = fdopen(proc_maps_fd, "r"); + + if (!f) + mpx_dig_abort(); + nr_ranges_populated = 0; + while (!feof(f)) { + char *readret = fgets(linebuf, sizeof(linebuf), f); + int parsed; + + if (readret == NULL) { + if (feof(f)) + break; + mpx_dig_abort(); + } + + parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest); + if (parsed != 3) + mpx_dig_abort(); + + dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest); + if (nr_ranges_populated >= nr_ranges_allocated) { + ret = -E2BIG; + break; + } + ranges[nr_ranges_populated].start = start; + ranges[nr_ranges_populated].end = end; + nr_ranges_populated++; + } + last_range = -1; + fclose(f); + close(proc_maps_fd); + return ret; +} + +int pid_load_vaddrs(int pid) +{ + int ret; + + dprintf2("%s(%d)\n", __func__, pid); + if (!ranges) { + nr_ranges_allocated = 4; + ranges = malloc(nr_ranges_allocated * sizeof(ranges[0])); + dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid, + nr_ranges_allocated, ranges); + assert(ranges != NULL); + } + do { + ret = __pid_load_vaddrs(pid); + if (!ret) + break; + if (ret == -E2BIG) { + dprintf2("%s(%d) need to realloc\n", __func__, pid); + nr_ranges_allocated *= 2; + ranges = realloc(ranges, + nr_ranges_allocated * sizeof(ranges[0])); + dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, + pid, nr_ranges_allocated, ranges); + assert(ranges != NULL); + dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated); + } + } while (1); + + dprintf2("%s(%d) done\n", __func__, pid); + + return ret; +} + +static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r) +{ + if (vaddr < r->start) + return 0; + if (vaddr >= r->end) + return 0; + return 1; +} + +static inline int vaddr_mapped_by_range(unsigned long vaddr) +{ + int i; + + if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range])) + return 1; + + for (i = 0; i < nr_ranges_populated; i++) { + struct vaddr_range *r = &ranges[i]; + + if (vaddr_in_range(vaddr, r)) + continue; + last_range = i; + return 1; + } + return 0; +} + +const int bt_entry_size_bytes = sizeof(unsigned long) * 4; + +void *read_bounds_table_into_buf(unsigned long table_vaddr) +{ +#ifdef MPX_DIG_STANDALONE + static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES]; + off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET); + if (seek_ret != table_vaddr) + mpx_dig_abort(); + + int read_ret = read(fd, &bt_buf, sizeof(bt_buf)); + if (read_ret != sizeof(bt_buf)) + mpx_dig_abort(); + return &bt_buf; +#else + return (void *)table_vaddr; +#endif +} + +int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr, + unsigned long bde_vaddr) +{ + unsigned long offset_inside_bt; + int nr_entries = 0; + int do_abort = 0; + char *bt_buf; + + dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n", + __func__, base_controlled_vaddr, bde_vaddr); + + bt_buf = read_bounds_table_into_buf(table_vaddr); + + dprintf4("%s() read done\n", __func__); + + for (offset_inside_bt = 0; + offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES; + offset_inside_bt += bt_entry_size_bytes) { + unsigned long bt_entry_index; + unsigned long bt_entry_controls; + unsigned long this_bt_entry_for_vaddr; + unsigned long *bt_entry_buf; + int i; + + dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__, + offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES); + bt_entry_buf = (void *)&bt_buf[offset_inside_bt]; + if (!bt_buf) { + printf("null bt_buf\n"); + mpx_dig_abort(); + } + if (!bt_entry_buf) { + printf("null bt_entry_buf\n"); + mpx_dig_abort(); + } + dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__, + bt_entry_buf); + if (!bt_entry_buf[0] && + !bt_entry_buf[1] && + !bt_entry_buf[2] && + !bt_entry_buf[3]) + continue; + + nr_entries++; + + bt_entry_index = offset_inside_bt/bt_entry_size_bytes; + bt_entry_controls = sizeof(void *); + this_bt_entry_for_vaddr = + base_controlled_vaddr + bt_entry_index*bt_entry_controls; + /* + * We sign extend vaddr bits 48->63 which effectively + * creates a hole in the virtual address space. + * This calculation corrects for the hole. + */ + if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL) + this_bt_entry_for_vaddr |= 0xffff800000000000; + + if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) { + printf("bt_entry_buf: %p\n", bt_entry_buf); + printf("there is a bte for %lx but no mapping\n", + this_bt_entry_for_vaddr); + printf(" bde vaddr: %016lx\n", bde_vaddr); + printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr); + printf(" table_vaddr: %016lx\n", table_vaddr); + printf(" entry vaddr: %016lx @ offset %lx\n", + table_vaddr + offset_inside_bt, offset_inside_bt); + do_abort = 1; + mpx_dig_abort(); + } + if (DEBUG_LEVEL < 4) + continue; + + printf("table entry[%lx]: ", offset_inside_bt); + for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long)) + printf("0x%016lx ", bt_entry_buf[i]); + printf("\n"); + } + if (do_abort) + mpx_dig_abort(); + dprintf4("%s() done\n", __func__); + return nr_entries; +} + +int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes, + int *nr_populated_bdes) +{ + unsigned long i; + int total_entries = 0; + + dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf, + len_bytes, bd_offset_bytes, buf + len_bytes); + + for (i = 0; i < len_bytes; i += sizeof(unsigned long)) { + unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long); + unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i]; + unsigned long bounds_dir_entry; + unsigned long bd_for_vaddr; + unsigned long bt_start; + unsigned long bt_tail; + int nr_entries; + + dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i, + bounds_dir_entry_ptr); + + bounds_dir_entry = *bounds_dir_entry_ptr; + if (!bounds_dir_entry) { + dprintf4("no bounds dir at index 0x%lx / 0x%lx " + "start at offset:%lx %lx\n", bd_index, bd_index, + bd_offset_bytes, i); + continue; + } + dprintf3("found bounds_dir_entry: 0x%lx @ " + "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i, + &buf[i]); + /* mask off the enable bit: */ + bounds_dir_entry &= ~0x1; + (*nr_populated_bdes)++; + dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes); + dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes); + + bt_start = bounds_dir_entry; + bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1; + if (!vaddr_mapped_by_range(bt_start)) { + printf("bounds directory 0x%lx points to nowhere\n", + bounds_dir_entry); + mpx_dig_abort(); + } + if (!vaddr_mapped_by_range(bt_tail)) { + printf("bounds directory end 0x%lx points to nowhere\n", + bt_tail); + mpx_dig_abort(); + } + /* + * Each bounds directory entry controls 1MB of virtual address + * space. This variable is the virtual address in the process + * of the beginning of the area controlled by this bounds_dir. + */ + bd_for_vaddr = bd_index * (1UL<<20); + + nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr, + bounds_dir_global+bd_offset_bytes+i); + total_entries += nr_entries; + dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries " + "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n", + bd_index, buf+i, + bounds_dir_entry, nr_entries, total_entries, + bd_for_vaddr, bd_for_vaddr + (1UL<<20)); + } + dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes, + bd_offset_bytes); + return total_entries; +} + +int proc_pid_mem_fd = -1; + +void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir, + long buffer_size_bytes, void *buffer) +{ + unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir; + int read_ret; + off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET); + + if (seek_ret != seekto) + mpx_dig_abort(); + + read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes); + /* there shouldn't practically be short reads of /proc/$pid/mem */ + if (read_ret != buffer_size_bytes) + mpx_dig_abort(); + + return buffer; +} +void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir, + long buffer_size_bytes, void *buffer) + +{ + unsigned char vec[buffer_size_bytes / PAGE_SIZE]; + char *dig_bounds_dir_ptr = + (void *)(bounds_dir_global + byte_offset_inside_bounds_dir); + /* + * use mincore() to quickly find the areas of the bounds directory + * that have memory and thus will be worth scanning. + */ + int incore_ret; + + int incore = 0; + int i; + + dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr); + + incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]); + if (incore_ret) { + printf("mincore ret: %d\n", incore_ret); + perror("mincore"); + mpx_dig_abort(); + } + for (i = 0; i < sizeof(vec); i++) + incore += vec[i]; + dprintf4("%s() total incore: %d\n", __func__, incore); + if (!incore) + return NULL; + dprintf3("%s() total incore: %d\n", __func__, incore); + return dig_bounds_dir_ptr; +} + +int inspect_pid(int pid) +{ + static int dig_nr; + long offset_inside_bounds_dir; + char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)]; + char *dig_bounds_dir_ptr; + int total_entries = 0; + int nr_populated_bdes = 0; + int inspect_self; + + if (getpid() == pid) { + dprintf4("inspecting self\n"); + inspect_self = 1; + } else { + dprintf4("inspecting pid %d\n", pid); + mpx_dig_abort(); + } + + for (offset_inside_bounds_dir = 0; + offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES; + offset_inside_bounds_dir += sizeof(bounds_dir_buf)) { + static int bufs_skipped; + int this_entries; + + if (inspect_self) { + dig_bounds_dir_ptr = + fill_bounds_dir_buf_self(offset_inside_bounds_dir, + sizeof(bounds_dir_buf), + &bounds_dir_buf[0]); + } else { + dig_bounds_dir_ptr = + fill_bounds_dir_buf_other(offset_inside_bounds_dir, + sizeof(bounds_dir_buf), + &bounds_dir_buf[0]); + } + if (!dig_bounds_dir_ptr) { + bufs_skipped++; + continue; + } + this_entries = search_bd_buf(dig_bounds_dir_ptr, + sizeof(bounds_dir_buf), + offset_inside_bounds_dir, + &nr_populated_bdes); + total_entries += this_entries; + } + printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr, + total_entries, nr_populated_bdes); + return total_entries + nr_populated_bdes; +} + +#ifdef MPX_DIG_REMOTE +int main(int argc, char **argv) +{ + int err; + char *c; + unsigned long bounds_dir_entry; + int pid; + + printf("mpx-dig starting...\n"); + err = sscanf(argv[1], "%d", &pid); + printf("parsing: '%s', err: %d\n", argv[1], err); + if (err != 1) + mpx_dig_abort(); + + err = sscanf(argv[2], "%lx", &bounds_dir_global); + printf("parsing: '%s': %d\n", argv[2], err); + if (err != 1) + mpx_dig_abort(); + + proc_pid_mem_fd = open_proc(pid, "mem"); + if (proc_pid_mem_fd < 0) + mpx_dig_abort(); + + inspect_pid(pid); + return 0; +} +#endif + +long inspect_me(struct mpx_bounds_dir *bounds_dir) +{ + int pid = getpid(); + + pid_load_vaddrs(pid); + bounds_dir_global = (unsigned long)bounds_dir; + dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir); + return inspect_pid(pid); +} diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h new file mode 100644 index 000000000000..093c190178a9 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-hw.h @@ -0,0 +1,123 @@ +#ifndef _MPX_HW_H +#define _MPX_HW_H + +#include <assert.h> + +/* Describe the MPX Hardware Layout in here */ + +#define NR_MPX_BOUNDS_REGISTERS 4 + +#ifdef __i386__ + +#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 16 /* 4 * 32-bits */ +#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 14) /* 16k */ +#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 4 +#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 22) /* 4MB */ + +#define MPX_BOUNDS_TABLE_BOTTOM_BIT 2 +#define MPX_BOUNDS_TABLE_TOP_BIT 11 +#define MPX_BOUNDS_DIR_BOTTOM_BIT 12 +#define MPX_BOUNDS_DIR_TOP_BIT 31 + +#else + +/* + * Linear Address of "pointer" (LAp) + * 0 -> 2: ignored + * 3 -> 19: index in to bounds table + * 20 -> 47: index in to bounds directory + * 48 -> 63: ignored + */ + +#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 32 +#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 22) /* 4MB */ +#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 8 +#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 31) /* 2GB */ + +#define MPX_BOUNDS_TABLE_BOTTOM_BIT 3 +#define MPX_BOUNDS_TABLE_TOP_BIT 19 +#define MPX_BOUNDS_DIR_BOTTOM_BIT 20 +#define MPX_BOUNDS_DIR_TOP_BIT 47 + +#endif + +#define MPX_BOUNDS_DIR_NR_ENTRIES \ + (MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES) +#define MPX_BOUNDS_TABLE_NR_ENTRIES \ + (MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES) + +#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT 0x1 + +struct mpx_bd_entry { + union { + char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES]; + void *contents[1]; + }; +} __attribute__((packed)); + +struct mpx_bt_entry { + union { + char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES]; + unsigned long contents[1]; + }; +} __attribute__((packed)); + +struct mpx_bounds_dir { + struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES]; +} __attribute__((packed)); + +struct mpx_bounds_table { + struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES]; +} __attribute__((packed)); + +static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit) +{ + int total_nr_bits = topbit - bottombit; + unsigned long mask = (1UL << total_nr_bits)-1; + return (val >> bottombit) & mask; +} + +static inline unsigned long __vaddr_bounds_table_index(void *vaddr) +{ + return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT, + MPX_BOUNDS_TABLE_TOP_BIT); +} + +static inline unsigned long __vaddr_bounds_directory_index(void *vaddr) +{ + return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT, + MPX_BOUNDS_DIR_TOP_BIT); +} + +static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr, + struct mpx_bounds_dir *bounds_dir) +{ + unsigned long index = __vaddr_bounds_directory_index(vaddr); + return &bounds_dir->entries[index]; +} + +static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry) +{ + unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents; + return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT); +} + +static inline struct mpx_bounds_table * +__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry) +{ + unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents; + assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT); + __bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT; + return (struct mpx_bounds_table *)__bd_entry; +} + +static inline struct mpx_bt_entry * +mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir) +{ + struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir); + struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde); + unsigned long index = __vaddr_bounds_table_index(vaddr); + return &bt->entries[index]; +} + +#endif /* _MPX_HW_H */ diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c new file mode 100644 index 000000000000..616ee9673339 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -0,0 +1,1585 @@ +/* + * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions) + * + * Written by: + * "Ren, Qiaowei" <qiaowei.ren@intel.com> + * "Wei, Gang" <gang.wei@intel.com> + * "Hansen, Dave" <dave.hansen@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2. + */ + +/* + * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure + * it works on 32-bit. + */ + +int inspect_every_this_many_mallocs = 100; +int zap_all_every_this_many_mallocs = 1000; + +#define _GNU_SOURCE +#define _LARGEFILE64_SOURCE + +#include <string.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <ucontext.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "mpx-hw.h" +#include "mpx-debug.h" +#include "mpx-mm.h" + +#ifndef __always_inline +#define __always_inline inline __attribute__((always_inline) +#endif + +#ifndef TEST_DURATION_SECS +#define TEST_DURATION_SECS 3 +#endif + +void write_int_to(char *prefix, char *file, int int_to_write) +{ + char buf[100]; + int fd = open(file, O_RDWR); + int len; + int ret; + + assert(fd >= 0); + len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write); + assert(len >= 0); + assert(len < sizeof(buf)); + ret = write(fd, buf, len); + assert(ret == len); + ret = close(fd); + assert(!ret); +} + +void write_pid_to(char *prefix, char *file) +{ + write_int_to(prefix, file, getpid()); +} + +void trace_me(void) +{ +/* tracing events dir */ +#define TED "/sys/kernel/debug/tracing/events/" +/* + write_pid_to("common_pid=", TED "signal/filter"); + write_pid_to("common_pid=", TED "exceptions/filter"); + write_int_to("", TED "signal/enable", 1); + write_int_to("", TED "exceptions/enable", 1); +*/ + write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid"); + write_int_to("", "/sys/kernel/debug/tracing/trace", 0); +} + +#define test_failed() __test_failed(__FILE__, __LINE__) +static void __test_failed(char *f, int l) +{ + fprintf(stderr, "abort @ %s::%d\n", f, l); + abort(); +} + +/* Error Printf */ +#define eprintf(args...) fprintf(stderr, args) + +#ifdef __i386__ + +/* i386 directory size is 4MB */ +#define REG_IP_IDX REG_EIP +#define REX_PREFIX + +#define XSAVE_OFFSET_IN_FPMEM sizeof(struct _libc_fpstate) + +/* + * __cpuid() is from the Linux Kernel: + */ +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "push %%ebx;" + "cpuid;" + "mov %%ebx, %1;" + "pop %%ebx" + : "=a" (*eax), + "=g" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +#else /* __i386__ */ + +#define REG_IP_IDX REG_RIP +#define REX_PREFIX "0x48, " + +#define XSAVE_OFFSET_IN_FPMEM 0 + +/* + * __cpuid() is from the Linux Kernel: + */ +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "cpuid;" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +#endif /* !__i386__ */ + +struct xsave_hdr_struct { + uint64_t xstate_bv; + uint64_t reserved1[2]; + uint64_t reserved2[5]; +} __attribute__((packed)); + +struct bndregs_struct { + uint64_t bndregs[8]; +} __attribute__((packed)); + +struct bndcsr_struct { + uint64_t cfg_reg_u; + uint64_t status_reg; +} __attribute__((packed)); + +struct xsave_struct { + uint8_t fpu_sse[512]; + struct xsave_hdr_struct xsave_hdr; + uint8_t ymm[256]; + uint8_t lwp[128]; + struct bndregs_struct bndregs; + struct bndcsr_struct bndcsr; +} __attribute__((packed)); + +uint8_t __attribute__((__aligned__(64))) buffer[4096]; +struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer; + +uint8_t __attribute__((__aligned__(64))) test_buffer[4096]; +struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer; + +uint64_t num_bnd_chk; + +static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask) +{ + uint32_t lmask = mask; + uint32_t hmask = mask >> 32; + + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + +static __always_inline void xsave_state_1(void *_fx, uint64_t mask) +{ + uint32_t lmask = mask; + uint32_t hmask = mask >> 32; + unsigned char *fx = _fx; + + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + +static inline uint64_t xgetbv(uint32_t index) +{ + uint32_t eax, edx; + + asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((uint64_t)edx << 32); +} + +static uint64_t read_mpx_status_sig(ucontext_t *uctxt) +{ + memset(buffer, 0, sizeof(buffer)); + memcpy(buffer, + (uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM, + sizeof(struct xsave_struct)); + + return xsave_buf->bndcsr.status_reg; +} + +#include <pthread.h> + +static uint8_t *get_next_inst_ip(uint8_t *addr) +{ + uint8_t *ip = addr; + uint8_t sib; + uint8_t rm; + uint8_t mod; + uint8_t base; + uint8_t modrm; + + /* determine the prefix. */ + switch(*ip) { + case 0xf2: + case 0xf3: + case 0x66: + ip++; + break; + } + + /* look for rex prefix */ + if ((*ip & 0x40) == 0x40) + ip++; + + /* Make sure we have a MPX instruction. */ + if (*ip++ != 0x0f) + return addr; + + /* Skip the op code byte. */ + ip++; + + /* Get the modrm byte. */ + modrm = *ip++; + + /* Break it down into parts. */ + rm = modrm & 7; + mod = (modrm >> 6); + + /* Init the parts of the address mode. */ + base = 8; + + /* Is it a mem mode? */ + if (mod != 3) { + /* look for scaled indexed addressing */ + if (rm == 4) { + /* SIB addressing */ + sib = *ip++; + base = sib & 7; + switch (mod) { + case 0: + if (base == 5) + ip += 4; + break; + + case 1: + ip++; + break; + + case 2: + ip += 4; + break; + } + + } else { + /* MODRM addressing */ + switch (mod) { + case 0: + /* DISP32 addressing, no base */ + if (rm == 5) + ip += 4; + break; + + case 1: + ip++; + break; + + case 2: + ip += 4; + break; + } + } + } + return ip; +} + +#ifdef si_lower +static inline void *__si_bounds_lower(siginfo_t *si) +{ + return si->si_lower; +} + +static inline void *__si_bounds_upper(siginfo_t *si) +{ + return si->si_upper; +} +#else +static inline void **__si_bounds_hack(siginfo_t *si) +{ + void *sigfault = &si->_sifields._sigfault; + void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault); + void **__si_lower = end_sigfault; + + return __si_lower; +} + +static inline void *__si_bounds_lower(siginfo_t *si) +{ + return *__si_bounds_hack(si); +} + +static inline void *__si_bounds_upper(siginfo_t *si) +{ + return (*__si_bounds_hack(si)) + sizeof(void *); +} +#endif + +static int br_count; +static int expected_bnd_index = -1; +uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ +unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; + +/* + * The kernel is supposed to provide some information about the bounds + * exception in the siginfo. It should match what we have in the bounds + * registers that we are checking against. Just check against the shadow copy + * since it is easily available, and we also check that *it* matches the real + * registers. + */ +void check_siginfo_vs_shadow(siginfo_t* si) +{ + int siginfo_ok = 1; + void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0]; + void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1]; + + if ((expected_bnd_index < 0) || + (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) { + fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n", + expected_bnd_index); + exit(6); + } + if (__si_bounds_lower(si) != shadow_lower) + siginfo_ok = 0; + if (__si_bounds_upper(si) != shadow_upper) + siginfo_ok = 0; + + if (!siginfo_ok) { + fprintf(stderr, "ERROR: siginfo bounds do not match " + "shadow bounds for register %d\n", expected_bnd_index); + exit(7); + } +} + +void handler(int signum, siginfo_t *si, void *vucontext) +{ + int i; + ucontext_t *uctxt = vucontext; + int trapno; + unsigned long ip; + + dprintf1("entered signal handler\n"); + + trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; + ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; + + if (trapno == 5) { + typeof(si->si_addr) *si_addr_ptr = &si->si_addr; + uint64_t status = read_mpx_status_sig(uctxt); + uint64_t br_reason = status & 0x3; + + br_count++; + dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); + +#define __SI_FAULT (3 << 16) +#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ + + dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", + status, ip, br_reason); + dprintf2("si_signo: %d\n", si->si_signo); + dprintf2(" signum: %d\n", signum); + dprintf2("info->si_code == SEGV_BNDERR: %d\n", + (si->si_code == SEGV_BNDERR)); + dprintf2("info->si_code: %d\n", si->si_code); + dprintf2("info->si_lower: %p\n", __si_bounds_lower(si)); + dprintf2("info->si_upper: %p\n", __si_bounds_upper(si)); + + check_siginfo_vs_shadow(si); + + for (i = 0; i < 8; i++) + dprintf3("[%d]: %p\n", i, si_addr_ptr[i]); + switch (br_reason) { + case 0: /* traditional BR */ + fprintf(stderr, + "Undefined status with bound exception:%jx\n", + status); + exit(5); + case 1: /* #BR MPX bounds exception */ + /* these are normal and we expect to see them */ + dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n", + status, (void *)ip, si->si_addr); + num_bnd_chk++; + uctxt->uc_mcontext.gregs[REG_IP_IDX] = + (greg_t)get_next_inst_ip((uint8_t *)ip); + break; + case 2: + fprintf(stderr, "#BR status == 2, missing bounds table," + "kernel should have handled!!\n"); + exit(4); + break; + default: + fprintf(stderr, "bound check error: status 0x%jx at %p\n", + status, (void *)ip); + num_bnd_chk++; + uctxt->uc_mcontext.gregs[REG_IP_IDX] = + (greg_t)get_next_inst_ip((uint8_t *)ip); + fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr); + exit(3); + } + } else if (trapno == 14) { + eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", + trapno, ip); + eprintf("si_addr %p\n", si->si_addr); + eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); + test_failed(); + } else { + eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip); + eprintf("si_addr %p\n", si->si_addr); + eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); + test_failed(); + } +} + +static inline void cpuid_count(unsigned int op, int count, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); +} + +#define XSTATE_CPUID 0x0000000d + +/* + * List of XSAVE features Linux knows about: + */ +enum xfeature_bit { + XSTATE_BIT_FP, + XSTATE_BIT_SSE, + XSTATE_BIT_YMM, + XSTATE_BIT_BNDREGS, + XSTATE_BIT_BNDCSR, + XSTATE_BIT_OPMASK, + XSTATE_BIT_ZMM_Hi256, + XSTATE_BIT_Hi16_ZMM, + + XFEATURES_NR_MAX, +}; + +#define XSTATE_FP (1 << XSTATE_BIT_FP) +#define XSTATE_SSE (1 << XSTATE_BIT_SSE) +#define XSTATE_YMM (1 << XSTATE_BIT_YMM) +#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) +#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) +#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) +#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) +#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) + +#define MPX_XSTATES (XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */ + +bool one_bit(unsigned int x, int bit) +{ + return !!(x & (1<<bit)); +} + +void print_state_component(int state_bit_nr, char *name) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int state_component_size; + unsigned int state_component_supervisor; + unsigned int state_component_user; + unsigned int state_component_aligned; + + /* See SDM Section 13.2 */ + cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx); + assert(eax || ebx || ecx); + state_component_size = eax; + state_component_supervisor = ((!ebx) && one_bit(ecx, 0)); + state_component_user = !one_bit(ecx, 0); + state_component_aligned = one_bit(ecx, 1); + printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n", + name, + state_component_size, state_component_user, + state_component_supervisor, state_component_aligned); + +} + +/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */ +#define XSAVE_FEATURE_BIT (26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +#define OSXSAVE_FEATURE_BIT (27) /* XSAVE enabled in the OS */ + +bool check_mpx_support(void) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid_count(1, 0, &eax, &ebx, &ecx, &edx); + + /* We can't do much without XSAVE, so just make these assert()'s */ + if (!one_bit(ecx, XSAVE_FEATURE_BIT)) { + fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n"); + exit(0); + } + + if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) { + fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n"); + exit(0); + } + + /* CPUs not supporting the XSTATE CPUID leaf do not support MPX */ + /* Is this redundant with the feature bit checks? */ + cpuid_count(0, 0, &eax, &ebx, &ecx, &edx); + if (eax < XSTATE_CPUID) { + fprintf(stderr, "processor lacks XSTATE CPUID leaf," + " can not run MPX tests\n"); + exit(0); + } + + printf("XSAVE is supported by HW & OS\n"); + + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + + printf("XSAVE processor supported state mask: 0x%x\n", eax); + printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0)); + + /* Make sure that the MPX states are enabled in in XCR0 */ + if ((eax & MPX_XSTATES) != MPX_XSTATES) { + fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n"); + exit(0); + } + + /* Make sure the MPX states are supported by XSAVE* */ + if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) { + fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, " + "can not run MPX tests\n"); + exit(0); + } + + print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS"); + print_state_component(XSTATE_BIT_BNDCSR, "BNDCSR"); + + return true; +} + +void enable_mpx(void *l1base) +{ + /* enable point lookup */ + memset(buffer, 0, sizeof(buffer)); + xrstor_state(xsave_buf, 0x18); + + xsave_buf->xsave_hdr.xstate_bv = 0x10; + xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1; + xsave_buf->bndcsr.status_reg = 0; + + dprintf2("bf xrstor\n"); + dprintf2("xsave cndcsr: status %jx, configu %jx\n", + xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u); + xrstor_state(xsave_buf, 0x18); + dprintf2("after xrstor\n"); + + xsave_state_1(xsave_buf, 0x18); + + dprintf1("xsave bndcsr: status %jx, configu %jx\n", + xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u); +} + +#include <sys/prctl.h> + +struct mpx_bounds_dir *bounds_dir_ptr; + +unsigned long __bd_incore(const char *func, int line) +{ + unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES); + return ret; +} +#define bd_incore() __bd_incore(__func__, __LINE__) + +void check_clear(void *ptr, unsigned long sz) +{ + unsigned long *i; + + for (i = ptr; (void *)i < ptr + sz; i++) { + if (*i) { + dprintf1("%p is NOT clear at %p\n", ptr, i); + assert(0); + } + } + dprintf1("%p is clear for %lx\n", ptr, sz); +} + +void check_clear_bd(void) +{ + check_clear(bounds_dir_ptr, 2UL << 30); +} + +#define USE_MALLOC_FOR_BOUNDS_DIR 1 +bool process_specific_init(void) +{ + unsigned long size; + unsigned long *dir; + /* Guarantee we have the space to align it, add padding: */ + unsigned long pad = getpagesize(); + + size = 2UL << 30; /* 2GB */ + if (sizeof(unsigned long) == 4) + size = 4UL << 20; /* 4MB */ + dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20)); + + if (USE_MALLOC_FOR_BOUNDS_DIR) { + unsigned long _dir; + + dir = malloc(size + pad); + assert(dir); + _dir = (unsigned long)dir; + _dir += 0xfffUL; + _dir &= ~0xfffUL; + dir = (void *)_dir; + } else { + /* + * This makes debugging easier because the address + * calculations are simpler: + */ + dir = mmap((void *)0x200000000000, size + pad, + PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (dir == (void *)-1) { + perror("unable to allocate bounds directory"); + abort(); + } + check_clear(dir, size); + } + bounds_dir_ptr = (void *)dir; + madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE); + bd_incore(); + dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr, + (char *)bounds_dir_ptr + size); + check_clear(dir, size); + enable_mpx(dir); + check_clear(dir, size); + if (prctl(43, 0, 0, 0, 0)) { + printf("no MPX support\n"); + abort(); + return false; + } + return true; +} + +bool process_specific_finish(void) +{ + if (prctl(44)) { + printf("no MPX support\n"); + return false; + } + return true; +} + +void setup_handler() +{ + int r, rs; + struct sigaction newact; + struct sigaction oldact; + + /* #BR is mapped to sigsegv */ + int signum = SIGSEGV; + + newact.sa_handler = 0; /* void(*)(int)*/ + newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */ + + /*sigset_t - signals to block while in the handler */ + /* get the old signal mask. */ + rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); + assert(rs == 0); + + /* call sa_sigaction, not sa_handler*/ + newact.sa_flags = SA_SIGINFO; + + newact.sa_restorer = 0; /* void(*)(), obsolete */ + r = sigaction(signum, &newact, &oldact); + assert(r == 0); +} + +void mpx_prepare(void) +{ + dprintf2("%s()\n", __func__); + setup_handler(); + process_specific_init(); +} + +void mpx_cleanup(void) +{ + printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk); + process_specific_finish(); +} + +/*-------------- the following is test case ---------------*/ +#include <stdint.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <time.h> + +uint64_t num_lower_brs; +uint64_t num_upper_brs; + +#define MPX_CONFIG_OFFSET 1024 +#define MPX_BOUNDS_OFFSET 960 +#define MPX_HEADER_OFFSET 512 +#define MAX_ADDR_TESTED (1<<28) +#define TEST_ROUNDS 100 + +/* + 0F 1A /r BNDLDX-Load + 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation + 66 0F 1A /r BNDMOV bnd1, bnd2/m128 + 66 0F 1B /r BNDMOV bnd1/m128, bnd2 + F2 0F 1A /r BNDCU bnd, r/m64 + F2 0F 1B /r BNDCN bnd, r/m64 + F3 0F 1A /r BNDCL bnd, r/m64 + F3 0F 1B /r BNDMK bnd, m64 +*/ + +static __always_inline void xsave_state(void *_fx, uint64_t mask) +{ + uint32_t lmask = mask; + uint32_t hmask = mask >> 32; + unsigned char *fx = _fx; + + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + +static __always_inline void mpx_clear_bnd0(void) +{ + long size = 0; + void *ptr = NULL; + /* F3 0F 1B /r BNDMK bnd, m64 */ + /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */ + asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t" + : : "c" (ptr), "d" (size-1) + : "memory"); +} + +static __always_inline void mpx_make_bound_helper(unsigned long ptr, + unsigned long size) +{ + /* F3 0F 1B /r BNDMK bnd, m64 */ + /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */ + asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t" + : : "c" (ptr), "d" (size-1) + : "memory"); +} + +static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr) +{ + /* F3 0F 1A /r NDCL bnd, r/m64 */ + /* f3 0f 1a 01 bndcl (%rcx),%bnd0 */ + asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t" + : : "c" (ptr) + : "memory"); +} + +static __always_inline void mpx_check_upperbound_helper(unsigned long ptr) +{ + /* F2 0F 1A /r BNDCU bnd, r/m64 */ + /* f2 0f 1a 01 bndcu (%rcx),%bnd0 */ + asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t" + : : "c" (ptr) + : "memory"); +} + +static __always_inline void mpx_movbndreg_helper() +{ + /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */ + /* 66 0f 1b c2 bndmov %bnd0,%bnd2 */ + + asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t"); +} + +static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem) +{ + /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */ + /* 66 0f 1b 01 bndmov %bnd0,(%rcx) */ + asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t" + : : "c" (mem) + : "memory"); +} + +static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem) +{ + /* 66 0F 1A /r BNDMOV bnd1, bnd2/m128 */ + /* 66 0f 1a 01 bndmov (%rcx),%bnd0 */ + asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t" + : : "c" (mem) + : "memory"); +} + +static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr, + unsigned long ptr_val) +{ + /* 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation */ + /* 0f 1b 04 11 bndstx %bnd0,(%rcx,%rdx,1) */ + asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t" + : : "c" (ptr_addr), "d" (ptr_val) + : "memory"); +} + +static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr, + unsigned long ptr_val) +{ + /* 0F 1A /r BNDLDX-Load */ + /*/ 0f 1a 04 11 bndldx (%rcx,%rdx,1),%bnd0 */ + asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t" + : : "c" (ptr_addr), "d" (ptr_val) + : "memory"); +} + +void __print_context(void *__print_xsave_buffer, int line) +{ + uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET); + uint64_t *cfg = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET); + + int i; + eprintf("%s()::%d\n", "print_context", line); + for (i = 0; i < 4; i++) { + eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i, + (unsigned long)bounds[i*2], + ~(unsigned long)bounds[i*2+1], + (unsigned long)bounds[i*2+1]); + } + + eprintf("cpcfg: %jx cpstatus: %jx\n", cfg[0], cfg[1]); +} +#define print_context(x) __print_context(x, __LINE__) +#ifdef DEBUG +#define dprint_context(x) print_context(x) +#else +#define dprint_context(x) do{}while(0) +#endif + +void init() +{ + int i; + + srand((unsigned int)time(NULL)); + + for (i = 0; i < 4; i++) { + shadow_plb[i][0] = 0; + shadow_plb[i][1] = ~(unsigned long)0; + } +} + +long int __mpx_random(int line) +{ +#ifdef NOT_SO_RANDOM + static long fake = 722122311; + fake += 563792075; + return fakse; +#else + return random(); +#endif +} +#define mpx_random() __mpx_random(__LINE__) + +uint8_t *get_random_addr() +{ + uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED); + return (addr - (unsigned long)addr % sizeof(uint8_t *)); +} + +static inline bool compare_context(void *__xsave_buffer) +{ + uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET); + + int i; + for (i = 0; i < 4; i++) { + dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n", + i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1], + i, (unsigned long)bounds[i*2], ~(unsigned long)bounds[i*2+1]); + if ((shadow_plb[i][0] != bounds[i*2]) || + (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) { + eprintf("ERROR comparing shadow to real bound register %d\n", i); + eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n", + (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1], + (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]); + return false; + } + } + + return true; +} + +void mkbnd_shadow(uint8_t *ptr, int index, long offset) +{ + uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]); + uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]); + *lower = (unsigned long)ptr; + *upper = (unsigned long)ptr + offset - 1; +} + +void check_lowerbound_shadow(uint8_t *ptr, int index) +{ + uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]); + if (*lower > (uint64_t)(unsigned long)ptr) + num_lower_brs++; + else + dprintf1("LowerBoundChk passed:%p\n", ptr); +} + +void check_upperbound_shadow(uint8_t *ptr, int index) +{ + uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]); + if (upper < (uint64_t)(unsigned long)ptr) + num_upper_brs++; + else + dprintf1("UpperBoundChk passed:%p\n", ptr); +} + +__always_inline void movbndreg_shadow(int src, int dest) +{ + shadow_plb[dest][0] = shadow_plb[src][0]; + shadow_plb[dest][1] = shadow_plb[src][1]; +} + +__always_inline void movbnd2mem_shadow(int src, unsigned long *dest) +{ + unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]); + unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]); + *dest = *lower; + *(dest+1) = *upper; +} + +__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest) +{ + unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]); + unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]); + *lower = *src; + *upper = *(src+1); +} + +__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val) +{ + shadow_map[0] = (unsigned long)shadow_plb[index][0]; + shadow_map[1] = (unsigned long)shadow_plb[index][1]; + shadow_map[2] = (unsigned long)ptr_val; + dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__, + index, ptr, ptr_val, ptr_val); + /*ptr ignored */ +} + +void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val) +{ + uint64_t lower = shadow_map[0]; + uint64_t upper = shadow_map[1]; + uint8_t *value = (uint8_t *)shadow_map[2]; + + if (value != ptr_val) { + dprintf2("%s(%d, %p, %p) init shadow bounds[%d] " + "because %p != %p\n", __func__, index, ptr, + ptr_val, index, value, ptr_val); + shadow_plb[index][0] = 0; + shadow_plb[index][1] = ~(unsigned long)0; + } else { + shadow_plb[index][0] = lower; + shadow_plb[index][1] = upper; + } + /* ptr ignored */ +} + +static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr) +{ + mpx_make_bound_helper((unsigned long)ptr, 0x1800); +} + +static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr) +{ + mkbnd_shadow(ptr, 0, 0x1800); +} + +static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr) +{ + /* these are hard-coded to check bnd0 */ + expected_bnd_index = 0; + mpx_check_lowerbound_helper((unsigned long)(ptr-1)); + mpx_check_upperbound_helper((unsigned long)(ptr+0x1800)); + /* reset this since we do not expect any more bounds exceptions */ + expected_bnd_index = -1; +} + +static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr) +{ + check_lowerbound_shadow(ptr-1, 0); + check_upperbound_shadow(ptr+0x1800, 0); +} + +static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr) +{ + mpx_make_bound_helper((unsigned long)ptr, 0x1800); + mpx_movbndreg_helper(); + mpx_movbnd2mem_helper(buf); + mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800); +} + +static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr) +{ + mkbnd_shadow(ptr, 0, 0x1800); + movbndreg_shadow(0, 2); + movbnd2mem_shadow(0, (unsigned long *)buf); + mkbnd_shadow(ptr+0x12, 0, 0x1800); +} + +static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr) +{ + mpx_movbnd_from_mem_helper(buf); +} + +static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr) +{ + movbnd_from_mem_shadow((unsigned long *)buf, 0); +} + +static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr) +{ + mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr); + mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800); +} + +static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr) +{ + stdsc_shadow(0, buf, ptr); + mkbnd_shadow(ptr+0x12, 0, 0x1800); +} + +static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr) +{ + mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr); +} + +static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr) +{ + lddsc_shadow(0, buf, ptr); +} + +#define NR_MPX_TEST_FUNCTIONS 6 + +/* + * For compatibility reasons, MPX will clear the bounds registers + * when you make function calls (among other things). We have to + * preserve the registers in between calls to the "helpers" since + * they build on each other. + * + * Be very careful not to make any function calls inside the + * helpers, or anywhere else beween the xrstor and xsave. + */ +#define run_helper(helper_nr, buf, buf_shadow, ptr) do { \ + xrstor_state(xsave_test_buf, flags); \ + mpx_test_helper##helper_nr(buf, ptr); \ + xsave_state(xsave_test_buf, flags); \ + mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr); \ +} while (0) + +static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr) +{ + uint64_t flags = 0x18; + + dprint_context(xsave_test_buf); + switch (nr) { + case 0: + run_helper(0, buf, buf_shadow, ptr); + break; + case 1: + run_helper(1, buf, buf_shadow, ptr); + break; + case 2: + run_helper(2, buf, buf_shadow, ptr); + break; + case 3: + run_helper(3, buf, buf_shadow, ptr); + break; + case 4: + run_helper(4, buf, buf_shadow, ptr); + break; + case 5: + run_helper(5, buf, buf_shadow, ptr); + break; + default: + test_failed(); + break; + } + dprint_context(xsave_test_buf); +} + +unsigned long buf_shadow[1024]; /* used to check load / store descriptors */ +extern long inspect_me(struct mpx_bounds_dir *bounds_dir); + +long cover_buf_with_bt_entries(void *buf, long buf_len) +{ + int i; + long nr_to_fill; + int ratio = 1000; + unsigned long buf_len_in_ptrs; + + /* Fill about 1/100 of the space with bt entries */ + nr_to_fill = buf_len / (sizeof(unsigned long) * ratio); + + if (!nr_to_fill) + dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill); + + /* Align the buffer to pointer size */ + while (((unsigned long)buf) % sizeof(void *)) { + buf++; + buf_len--; + } + /* We are storing pointers, so make */ + buf_len_in_ptrs = buf_len / sizeof(void *); + + for (i = 0; i < nr_to_fill; i++) { + long index = (mpx_random() % buf_len_in_ptrs); + void *ptr = buf + index * sizeof(unsigned long); + unsigned long ptr_addr = (unsigned long)ptr; + + /* ptr and size can be anything */ + mpx_make_bound_helper((unsigned long)ptr, 8); + + /* + * take bnd0 and put it in to bounds tables "buf + index" is an + * address inside the buffer where we are pretending that we + * are going to put a pointer We do not, though because we will + * never load entries from the table, so it doesn't matter. + */ + mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr); + dprintf4("storing bound table entry for %lx (buf start @ %p)\n", + ptr_addr, buf); + } + return nr_to_fill; +} + +unsigned long align_down(unsigned long alignme, unsigned long align_to) +{ + return alignme & ~(align_to-1); +} + +unsigned long align_up(unsigned long alignme, unsigned long align_to) +{ + return (alignme + align_to - 1) & ~(align_to-1); +} + +/* + * Using 1MB alignment guarantees that each no allocation + * will overlap with another's bounds tables. + * + * We have to cook our own allocator here. malloc() can + * mix other allocation with ours which means that even + * if we free all of our allocations, there might still + * be bounds tables for the *areas* since there is other + * valid memory there. + * + * We also can't use malloc() because a free() of an area + * might not free it back to the kernel. We want it + * completely unmapped an malloc() does not guarantee + * that. + */ +#ifdef __i386__ +long alignment = 4096; +long sz_alignment = 4096; +#else +long alignment = 1 * MB; +long sz_alignment = 1 * MB; +#endif +void *mpx_mini_alloc(unsigned long sz) +{ + unsigned long long tries = 0; + static void *last; + void *ptr; + void *try_at; + + sz = align_up(sz, sz_alignment); + + try_at = last + alignment; + while (1) { + ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (ptr == (void *)-1) + return NULL; + if (ptr == try_at) + break; + + munmap(ptr, sz); + try_at += alignment; +#ifdef __i386__ + /* + * This isn't quite correct for 32-bit binaries + * on 64-bit kernels since they can use the + * entire 32-bit address space, but it's close + * enough. + */ + if (try_at > (void *)0xC0000000) +#else + if (try_at > (void *)0x0000800000000000) +#endif + try_at = (void *)0x0; + if (!(++tries % 10000)) + dprintf1("stuck in %s(), tries: %lld\n", __func__, tries); + continue; + } + last = ptr; + dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr); + return ptr; +} +void mpx_mini_free(void *ptr, long sz) +{ + dprintf2("%s() ptr: %p\n", __func__, ptr); + if ((unsigned long)ptr > 0x100000000000) { + dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr); + test_failed(); + } + sz = align_up(sz, sz_alignment); + dprintf3("%s() ptr: %p before munmap\n", __func__, ptr); + munmap(ptr, sz); + dprintf3("%s() ptr: %p DONE\n", __func__, ptr); +} + +#define NR_MALLOCS 100 +struct one_malloc { + char *ptr; + int nr_filled_btes; + unsigned long size; +}; +struct one_malloc mallocs[NR_MALLOCS]; + +void free_one_malloc(int index) +{ + unsigned long free_ptr; + unsigned long mask; + + if (!mallocs[index].ptr) + return; + + mpx_mini_free(mallocs[index].ptr, mallocs[index].size); + dprintf4("freed[%d]: %p\n", index, mallocs[index].ptr); + + free_ptr = (unsigned long)mallocs[index].ptr; + mask = alignment-1; + dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr, + (free_ptr & mask), mask); + assert((free_ptr & mask) == 0); + + mallocs[index].ptr = NULL; +} + +#ifdef __i386__ +#define MPX_BOUNDS_TABLE_COVERS 4096 +#else +#define MPX_BOUNDS_TABLE_COVERS (1 * MB) +#endif +void zap_everything(void) +{ + long after_zap; + long before_zap; + int i; + + before_zap = inspect_me(bounds_dir_ptr); + dprintf1("zapping everything start: %ld\n", before_zap); + for (i = 0; i < NR_MALLOCS; i++) + free_one_malloc(i); + + after_zap = inspect_me(bounds_dir_ptr); + dprintf1("zapping everything done: %ld\n", after_zap); + /* + * We only guarantee to empty the thing out if our allocations are + * exactly aligned on the boundaries of a boudns table. + */ + if ((alignment >= MPX_BOUNDS_TABLE_COVERS) && + (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) { + if (after_zap != 0) + test_failed(); + + assert(after_zap == 0); + } +} + +void do_one_malloc(void) +{ + static int malloc_counter; + long sz; + int rand_index = (mpx_random() % NR_MALLOCS); + void *ptr = mallocs[rand_index].ptr; + + dprintf3("%s() enter\n", __func__); + + if (ptr) { + dprintf3("freeing one malloc at index: %d\n", rand_index); + free_one_malloc(rand_index); + if (mpx_random() % (NR_MALLOCS*3) == 3) { + int i; + dprintf3("zapping some more\n"); + for (i = rand_index; i < NR_MALLOCS; i++) + free_one_malloc(i); + } + if ((mpx_random() % zap_all_every_this_many_mallocs) == 4) + zap_everything(); + } + + /* 1->~1M */ + sz = (1 + mpx_random() % 1000) * 1000; + ptr = mpx_mini_alloc(sz); + if (!ptr) { + /* + * If we are failing allocations, just assume we + * are out of memory and zap everything. + */ + dprintf3("zapping everything because out of memory\n"); + zap_everything(); + goto out; + } + + dprintf3("malloc: %p size: 0x%lx\n", ptr, sz); + mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz); + mallocs[rand_index].ptr = ptr; + mallocs[rand_index].size = sz; +out: + if ((++malloc_counter) % inspect_every_this_many_mallocs == 0) + inspect_me(bounds_dir_ptr); +} + +void run_timed_test(void (*test_func)(void)) +{ + int done = 0; + long iteration = 0; + static time_t last_print; + time_t now; + time_t start; + + time(&start); + while (!done) { + time(&now); + if ((now - start) > TEST_DURATION_SECS) + done = 1; + + test_func(); + iteration++; + + if ((now - last_print > 1) || done) { + printf("iteration %ld complete, OK so far\n", iteration); + last_print = now; + } + } +} + +void check_bounds_table_frees(void) +{ + printf("executing unmaptest\n"); + inspect_me(bounds_dir_ptr); + run_timed_test(&do_one_malloc); + printf("done with malloc() fun\n"); +} + +void insn_test_failed(int test_nr, int test_round, void *buf, + void *buf_shadow, void *ptr) +{ + print_context(xsave_test_buf); + eprintf("ERROR: test %d round %d failed\n", test_nr, test_round); + while (test_nr == 5) { + struct mpx_bt_entry *bte; + struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr; + struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd); + + printf(" bd: %p\n", bd); + printf("&bde: %p\n", bde); + printf("*bde: %lx\n", *(unsigned long *)bde); + if (!bd_entry_valid(bde)) + break; + + bte = mpx_vaddr_to_bt_entry(buf, bd); + printf(" te: %p\n", bte); + printf("bte[0]: %lx\n", bte->contents[0]); + printf("bte[1]: %lx\n", bte->contents[1]); + printf("bte[2]: %lx\n", bte->contents[2]); + printf("bte[3]: %lx\n", bte->contents[3]); + break; + } + test_failed(); +} + +void check_mpx_insns_and_tables(void) +{ + int successes = 0; + int failures = 0; + int buf_size = (1024*1024); + unsigned long *buf = malloc(buf_size); + const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS; + int i, j; + + memset(buf, 0, buf_size); + memset(buf_shadow, 0, sizeof(buf_shadow)); + + for (i = 0; i < TEST_ROUNDS; i++) { + uint8_t *ptr = get_random_addr() + 8; + + for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) { + if (0 && j != 5) { + successes++; + continue; + } + dprintf2("starting test %d round %d\n", j, i); + dprint_context(xsave_test_buf); + /* + * test5 loads an address from the bounds tables. + * The load will only complete if 'ptr' matches + * the load and the store, so with random addrs, + * the odds of this are very small. Make it + * higher by only moving 'ptr' 1/10 times. + */ + if (random() % 10 <= 0) + ptr = get_random_addr() + 8; + dprintf3("random ptr{%p}\n", ptr); + dprint_context(xsave_test_buf); + run_helpers(j, (void *)buf, (void *)buf_shadow, ptr); + dprint_context(xsave_test_buf); + if (!compare_context(xsave_test_buf)) { + insn_test_failed(j, i, buf, buf_shadow, ptr); + failures++; + goto exit; + } + successes++; + dprint_context(xsave_test_buf); + dprintf2("finished test %d round %d\n", j, i); + dprintf3("\n"); + dprint_context(xsave_test_buf); + } + } + +exit: + dprintf2("\nabout to free:\n"); + free(buf); + dprintf1("successes: %d\n", successes); + dprintf1(" failures: %d\n", failures); + dprintf1(" tests: %d\n", total_nr_tests); + dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs); + dprintf1(" saw: %d #BRs\n", br_count); + if (failures) { + eprintf("ERROR: non-zero number of failures\n"); + exit(20); + } + if (successes != total_nr_tests) { + eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n", + successes, total_nr_tests); + exit(21); + } + if (num_upper_brs + num_lower_brs != br_count) { + eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n", + num_upper_brs, num_lower_brs, br_count); + eprintf("successes: %d\n", successes); + eprintf(" failures: %d\n", failures); + eprintf(" tests: %d\n", total_nr_tests); + eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs); + eprintf(" saw: %d #BRs\n", br_count); + exit(22); + } +} + +/* + * This is supposed to SIGSEGV nicely once the kernel + * can no longer allocate vaddr space. + */ +void exhaust_vaddr_space(void) +{ + unsigned long ptr; + /* Try to make sure there is no room for a bounds table anywhere */ + unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE; +#ifdef __i386__ + unsigned long max_vaddr = 0xf7788000UL; +#else + unsigned long max_vaddr = 0x800000000000UL; +#endif + + dprintf1("%s() start\n", __func__); + /* do not start at 0, we aren't allowed to map there */ + for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) { + void *ptr_ret; + int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL); + + if (!ret) { + dprintf1("madvise() %lx ret: %d\n", ptr, ret); + continue; + } + ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (ptr_ret != (void *)ptr) { + perror("mmap"); + dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret); + break; + } + if (!(ptr & 0xffffff)) + dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret); + } + for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) { + dprintf2("covering 0x%lx with bounds table entries\n", ptr); + cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE); + } + dprintf1("%s() end\n", __func__); + printf("done with vaddr space fun\n"); +} + +void mpx_table_test(void) +{ + printf("starting mpx bounds table test\n"); + run_timed_test(check_mpx_insns_and_tables); + printf("done with mpx bounds table test\n"); +} + +int main(int argc, char **argv) +{ + int unmaptest = 0; + int vaddrexhaust = 0; + int tabletest = 0; + int i; + + check_mpx_support(); + mpx_prepare(); + srandom(11179); + + bd_incore(); + init(); + bd_incore(); + + trace_me(); + + xsave_state((void *)xsave_test_buf, 0x1f); + if (!compare_context(xsave_test_buf)) + printf("Init failed\n"); + + for (i = 1; i < argc; i++) { + if (!strcmp(argv[i], "unmaptest")) + unmaptest = 1; + if (!strcmp(argv[i], "vaddrexhaust")) + vaddrexhaust = 1; + if (!strcmp(argv[i], "tabletest")) + tabletest = 1; + } + if (!(unmaptest || vaddrexhaust || tabletest)) { + unmaptest = 1; + /* vaddrexhaust = 1; */ + tabletest = 1; + } + if (unmaptest) + check_bounds_table_frees(); + if (tabletest) + mpx_table_test(); + if (vaddrexhaust) + exhaust_vaddr_space(); + printf("%s completed successfully\n", argv[0]); + exit(0); +} + +#include "mpx-dig.c" diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h new file mode 100644 index 000000000000..af706a5398f7 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-mm.h @@ -0,0 +1,9 @@ +#ifndef _MPX_MM_H +#define _MPX_MM_H + +#define PAGE_SIZE 4096 +#define MB (1UL<<20) + +extern long nr_incore(void *ptr, unsigned long size_bytes); + +#endif /* _MPX_MM_H */ diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c new file mode 100644 index 000000000000..bf0d687c7db7 --- /dev/null +++ b/tools/testing/selftests/x86/test_mremap_vdso.c @@ -0,0 +1,111 @@ +/* + * 32-bit test to check vDSO mremap. + * + * Copyright (c) 2016 Dmitry Safonov + * Suggested-by: Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Can be built statically: + * gcc -Os -Wall -static -m32 test_mremap_vdso.c + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> + +#include <sys/mman.h> +#include <sys/auxv.h> +#include <sys/syscall.h> +#include <sys/wait.h> + +#define PAGE_SIZE 4096 + +static int try_to_remap(void *vdso_addr, unsigned long size) +{ + void *dest_addr, *new_addr; + + /* Searching for memory location where to remap */ + dest_addr = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (dest_addr == MAP_FAILED) { + printf("[WARN]\tmmap failed (%d): %m\n", errno); + return 0; + } + + printf("[NOTE]\tMoving vDSO: [%p, %#lx] -> [%p, %#lx]\n", + vdso_addr, (unsigned long)vdso_addr + size, + dest_addr, (unsigned long)dest_addr + size); + fflush(stdout); + + new_addr = mremap(vdso_addr, size, size, + MREMAP_FIXED|MREMAP_MAYMOVE, dest_addr); + if ((unsigned long)new_addr == (unsigned long)-1) { + munmap(dest_addr, size); + if (errno == EINVAL) { + printf("[NOTE]\tvDSO partial move failed, will try with bigger size\n"); + return -1; /* Retry with larger */ + } + printf("[FAIL]\tmremap failed (%d): %m\n", errno); + return 1; + } + + return 0; + +} + +int main(int argc, char **argv, char **envp) +{ + pid_t child; + + child = fork(); + if (child == -1) { + printf("[WARN]\tfailed to fork (%d): %m\n", errno); + return 1; + } + + if (child == 0) { + unsigned long vdso_size = PAGE_SIZE; + unsigned long auxval; + int ret = -1; + + auxval = getauxval(AT_SYSINFO_EHDR); + printf("\tAT_SYSINFO_EHDR is %#lx\n", auxval); + if (!auxval || auxval == -ENOENT) { + printf("[WARN]\tgetauxval failed\n"); + return 0; + } + + /* Simpler than parsing ELF header */ + while (ret < 0) { + ret = try_to_remap((void *)auxval, vdso_size); + vdso_size += PAGE_SIZE; + } + + /* Glibc is likely to explode now - exit with raw syscall */ + asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret)); + } else { + int status; + + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tmremap() of the vDSO does not work on this kernel!\n"); + return 1; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed with %d\n", + WEXITSTATUS(status)); + return 1; + } + printf("[OK]\n"); + } + + return 0; +} diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile index 6173adae9f08..877a8a4721b6 100644 --- a/tools/virtio/ringtest/Makefile +++ b/tools/virtio/ringtest/Makefile @@ -1,6 +1,6 @@ all: -all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder noring +all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring noring CFLAGS += -Wall CFLAGS += -pthread -O2 -ggdb @@ -8,6 +8,7 @@ LDFLAGS += -pthread -O2 -ggdb main.o: main.c main.h ring.o: ring.c main.h +ptr_ring.o: ptr_ring.c main.h ../../../include/linux/ptr_ring.h virtio_ring_0_9.o: virtio_ring_0_9.c main.h virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h virtio_ring_inorder.o: virtio_ring_inorder.c virtio_ring_0_9.c main.h @@ -15,6 +16,7 @@ ring: ring.o main.o virtio_ring_0_9: virtio_ring_0_9.o main.o virtio_ring_poll: virtio_ring_poll.o main.o virtio_ring_inorder: virtio_ring_inorder.o main.o +ptr_ring: ptr_ring.o main.o noring: noring.o main.o clean: -rm main.o @@ -22,6 +24,7 @@ clean: -rm virtio_ring_0_9.o virtio_ring_0_9 -rm virtio_ring_poll.o virtio_ring_poll -rm virtio_ring_inorder.o virtio_ring_inorder + -rm ptr_ring.o ptr_ring -rm noring.o noring .PHONY: all clean diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c new file mode 100644 index 000000000000..68e4f9f0da3a --- /dev/null +++ b/tools/virtio/ringtest/ptr_ring.c @@ -0,0 +1,197 @@ +#define _GNU_SOURCE +#include "main.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <malloc.h> +#include <assert.h> +#include <errno.h> +#include <limits.h> + +#define SMP_CACHE_BYTES 64 +#define cache_line_size() SMP_CACHE_BYTES +#define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES))) +#define unlikely(x) (__builtin_expect(!!(x), 0)) +#define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a)) +typedef pthread_spinlock_t spinlock_t; + +typedef int gfp_t; +static void *kmalloc(unsigned size, gfp_t gfp) +{ + return memalign(64, size); +} + +static void *kzalloc(unsigned size, gfp_t gfp) +{ + void *p = memalign(64, size); + if (!p) + return p; + memset(p, 0, size); + + return p; +} + +static void kfree(void *p) +{ + if (p) + free(p); +} + +static void spin_lock_init(spinlock_t *lock) +{ + int r = pthread_spin_init(lock, 0); + assert(!r); +} + +static void spin_lock(spinlock_t *lock) +{ + int ret = pthread_spin_lock(lock); + assert(!ret); +} + +static void spin_unlock(spinlock_t *lock) +{ + int ret = pthread_spin_unlock(lock); + assert(!ret); +} + +static void spin_lock_bh(spinlock_t *lock) +{ + spin_lock(lock); +} + +static void spin_unlock_bh(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static void spin_lock_irq(spinlock_t *lock) +{ + spin_lock(lock); +} + +static void spin_unlock_irq(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static void spin_lock_irqsave(spinlock_t *lock, unsigned long f) +{ + spin_lock(lock); +} + +static void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f) +{ + spin_unlock(lock); +} + +#include "../../../include/linux/ptr_ring.h" + +static unsigned long long headcnt, tailcnt; +static struct ptr_ring array ____cacheline_aligned_in_smp; + +/* implemented by ring */ +void alloc_ring(void) +{ + int ret = ptr_ring_init(&array, ring_size, 0); + assert(!ret); +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + int ret; + + ret = __ptr_ring_produce(&array, buf); + if (ret >= 0) { + ret = 0; + headcnt++; + } + + return ret; +} + +/* + * ptr_ring API provides no way for producer to find out whether a given + * buffer was consumed. Our tests merely require that a successful get_buf + * implies that add_inbuf succeed in the past, and that add_inbuf will succeed, + * fake it accordingly. + */ +void *get_buf(unsigned *lenp, void **bufp) +{ + void *datap; + + if (tailcnt == headcnt || __ptr_ring_full(&array)) + datap = NULL; + else { + datap = "Buffer\n"; + ++tailcnt; + } + + return datap; +} + +void poll_used(void) +{ + void *b; + + do { + if (tailcnt == headcnt || __ptr_ring_full(&array)) { + b = NULL; + barrier(); + } else { + b = "Buffer\n"; + } + } while (!b); +} + +void disable_call() +{ + assert(0); +} + +bool enable_call() +{ + assert(0); +} + +void kick_available(void) +{ + assert(0); +} + +/* host side */ +void disable_kick() +{ + assert(0); +} + +bool enable_kick() +{ + assert(0); +} + +void poll_avail(void) +{ + void *b; + + do { + barrier(); + b = __ptr_ring_peek(&array); + } while (!b); +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + void *ptr; + + ptr = __ptr_ring_consume(&array); + + return ptr; +} + +void call_used(void) +{ + assert(0); +} diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c index 77147b42d598..f1c055f3c243 100644 --- a/tools/vm/page_owner_sort.c +++ b/tools/vm/page_owner_sort.c @@ -79,12 +79,12 @@ static void add_list(char *buf, int len) } } -#define BUF_SIZE 1024 +#define BUF_SIZE (128 * 1024) int main(int argc, char **argv) { FILE *fin, *fout; - char buf[BUF_SIZE]; + char *buf; int ret, i, count; struct block_list *list2; struct stat st; @@ -107,6 +107,11 @@ int main(int argc, char **argv) max_size = st.st_size / 100; /* hack ... */ list = malloc(max_size * sizeof(*list)); + buf = malloc(BUF_SIZE); + if (!list || !buf) { + printf("Out of memory\n"); + exit(1); + } for ( ; ; ) { ret = read_block(buf, BUF_SIZE, fin); diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 7cf6e1769903..b9d34b37c017 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -510,10 +510,11 @@ static void slab_stats(struct slabinfo *s) s->alloc_node_mismatch, (s->alloc_node_mismatch * 100) / total); } - if (s->cmpxchg_double_fail || s->cmpxchg_double_cpu_fail) + if (s->cmpxchg_double_fail || s->cmpxchg_double_cpu_fail) { printf("\nCmpxchg_double Looping\n------------------------\n"); printf("Locked Cmpxchg Double redos %lu\nUnlocked Cmpxchg Double redos %lu\n", s->cmpxchg_double_fail, s->cmpxchg_double_cpu_fail); + } } static void report(struct slabinfo *s) |