diff options
184 files changed, 10378 insertions, 1896 deletions
diff --git a/Documentation/admin-guide/perf/arm-cmn.rst b/Documentation/admin-guide/perf/arm-cmn.rst new file mode 100644 index 000000000000..0e4809346014 --- /dev/null +++ b/Documentation/admin-guide/perf/arm-cmn.rst @@ -0,0 +1,65 @@ +============================= +Arm Coherent Mesh Network PMU +============================= + +CMN-600 is a configurable mesh interconnect consisting of a rectangular +grid of crosspoints (XPs), with each crosspoint supporting up to two +device ports to which various AMBA CHI agents are attached. + +CMN implements a distributed PMU design as part of its debug and trace +functionality. This consists of a local monitor (DTM) at every XP, which +counts up to 4 event signals from the connected device nodes and/or the +XP itself. Overflow from these local counters is accumulated in up to 8 +global counters implemented by the main controller (DTC), which provides +overall PMU control and interrupts for global counter overflow. + +PMU events +---------- + +The PMU driver registers a single PMU device for the whole interconnect, +see /sys/bus/event_source/devices/arm_cmn. Multi-chip systems may link +more than one CMN together via external CCIX links - in this situation, +each mesh counts its own events entirely independently, and additional +PMU devices will be named arm_cmn_{1..n}. + +Most events are specified in a format based directly on the TRM +definitions - "type" selects the respective node type, and "eventid" the +event number. Some events require an additional occupancy ID, which is +specified by "occupid". + +* Since RN-D nodes do not have any distinct events from RN-I nodes, they + are treated as the same type (0xa), and the common event templates are + named "rnid_*". + +* The cycle counter is treated as a synthetic event belonging to the DTC + node ("type" == 0x3, "eventid" is ignored). + +* XP events also encode the port and channel in the "eventid" field, to + match the underlying pmu_event0_id encoding for the pmu_event_sel + register. The event templates are named with prefixes to cover all + permutations. + +By default each event provides an aggregate count over all nodes of the +given type. To target a specific node, "bynodeid" must be set to 1 and +"nodeid" to the appropriate value derived from the CMN configuration +(as defined in the "Node ID Mapping" section of the TRM). + +Watchpoints +----------- + +The PMU can also count watchpoint events to monitor specific flit +traffic. Watchpoints are treated as a synthetic event type, and like PMU +events can be global or targeted with a particular XP's "nodeid" value. +Since the watchpoint direction is otherwise implicit in the underlying +register selection, separate events are provided for flit uploads and +downloads. + +The flit match value and mask are passed in config1 and config2 ("val" +and "mask" respectively). "wp_dev_sel", "wp_chn_sel", "wp_grp" and +"wp_exclusive" are specified per the TRM definitions for dtm_wp_config0. +Where a watchpoint needs to match fields from both match groups on the +REQ or SNP channel, it can be specified as two events - one for each +group - with the same nonzero "combine" value. The count for such a +pair of combined events will be attributed to the primary match. +Watchpoint events with a "combine" value of 0 are considered independent +and will count individually. diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index 47c99f40cc16..5a8f2529a033 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -12,6 +12,7 @@ Performance monitor support qcom_l2_pmu qcom_l3_pmu arm-ccn + arm-cmn xgene-pmu arm_dsu_pmu thunderx2-pmu diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index f28853f80089..328e0c454fbd 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -175,6 +175,8 @@ infrastructure: +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ + | MTE | [11-8] | y | + +------------------------------+---------+---------+ | SSBS | [7-4] | y | +------------------------------+---------+---------+ | BT | [3-0] | y | diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 84a9fd2d41b4..bbd9cf54db6c 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -240,6 +240,10 @@ HWCAP2_BTI Functionality implied by ID_AA64PFR0_EL1.BT == 0b0001. +HWCAP2_MTE + + Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described + by Documentation/arm64/memory-tagging-extension.rst. 4. Unused AT_HWCAP bits ----------------------- diff --git a/Documentation/arm64/index.rst b/Documentation/arm64/index.rst index d9665d83c53a..43b0939d384e 100644 --- a/Documentation/arm64/index.rst +++ b/Documentation/arm64/index.rst @@ -14,6 +14,7 @@ ARM64 Architecture hugetlbpage legacy_instructions memory + memory-tagging-extension perf pointer-authentication silicon-errata diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst new file mode 100644 index 000000000000..034d37c605e8 --- /dev/null +++ b/Documentation/arm64/memory-tagging-extension.rst @@ -0,0 +1,305 @@ +=============================================== +Memory Tagging Extension (MTE) in AArch64 Linux +=============================================== + +Authors: Vincenzo Frascino <vincenzo.frascino@arm.com> + Catalin Marinas <catalin.marinas@arm.com> + +Date: 2020-02-25 + +This document describes the provision of the Memory Tagging Extension +functionality in AArch64 Linux. + +Introduction +============ + +ARMv8.5 based processors introduce the Memory Tagging Extension (MTE) +feature. MTE is built on top of the ARMv8.0 virtual address tagging TBI +(Top Byte Ignore) feature and allows software to access a 4-bit +allocation tag for each 16-byte granule in the physical address space. +Such memory range must be mapped with the Normal-Tagged memory +attribute. A logical tag is derived from bits 59-56 of the virtual +address used for the memory access. A CPU with MTE enabled will compare +the logical tag against the allocation tag and potentially raise an +exception on mismatch, subject to system registers configuration. + +Userspace Support +================= + +When ``CONFIG_ARM64_MTE`` is selected and Memory Tagging Extension is +supported by the hardware, the kernel advertises the feature to +userspace via ``HWCAP2_MTE``. + +PROT_MTE +-------- + +To access the allocation tags, a user process must enable the Tagged +memory attribute on an address range using a new ``prot`` flag for +``mmap()`` and ``mprotect()``: + +``PROT_MTE`` - Pages allow access to the MTE allocation tags. + +The allocation tag is set to 0 when such pages are first mapped in the +user address space and preserved on copy-on-write. ``MAP_SHARED`` is +supported and the allocation tags can be shared between processes. + +**Note**: ``PROT_MTE`` is only supported on ``MAP_ANONYMOUS`` and +RAM-based file mappings (``tmpfs``, ``memfd``). Passing it to other +types of mapping will result in ``-EINVAL`` returned by these system +calls. + +**Note**: The ``PROT_MTE`` flag (and corresponding memory type) cannot +be cleared by ``mprotect()``. + +**Note**: ``madvise()`` memory ranges with ``MADV_DONTNEED`` and +``MADV_FREE`` may have the allocation tags cleared (set to 0) at any +point after the system call. + +Tag Check Faults +---------------- + +When ``PROT_MTE`` is enabled on an address range and a mismatch between +the logical and allocation tags occurs on access, there are three +configurable behaviours: + +- *Ignore* - This is the default mode. The CPU (and kernel) ignores the + tag check fault. + +- *Synchronous* - The kernel raises a ``SIGSEGV`` synchronously, with + ``.si_code = SEGV_MTESERR`` and ``.si_addr = <fault-address>``. The + memory access is not performed. If ``SIGSEGV`` is ignored or blocked + by the offending thread, the containing process is terminated with a + ``coredump``. + +- *Asynchronous* - The kernel raises a ``SIGSEGV``, in the offending + thread, asynchronously following one or multiple tag check faults, + with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting + address is unknown). + +The user can select the above modes, per thread, using the +``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where +``flags`` contain one of the following values in the ``PR_MTE_TCF_MASK`` +bit-field: + +- ``PR_MTE_TCF_NONE`` - *Ignore* tag check faults +- ``PR_MTE_TCF_SYNC`` - *Synchronous* tag check fault mode +- ``PR_MTE_TCF_ASYNC`` - *Asynchronous* tag check fault mode + +The current tag check fault mode can be read using the +``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. + +Tag checking can also be disabled for a user thread by setting the +``PSTATE.TCO`` bit with ``MSR TCO, #1``. + +**Note**: Signal handlers are always invoked with ``PSTATE.TCO = 0``, +irrespective of the interrupted context. ``PSTATE.TCO`` is restored on +``sigreturn()``. + +**Note**: There are no *match-all* logical tags available for user +applications. + +**Note**: Kernel accesses to the user address space (e.g. ``read()`` +system call) are not checked if the user thread tag checking mode is +``PR_MTE_TCF_NONE`` or ``PR_MTE_TCF_ASYNC``. If the tag checking mode is +``PR_MTE_TCF_SYNC``, the kernel makes a best effort to check its user +address accesses, however it cannot always guarantee it. + +Excluding Tags in the ``IRG``, ``ADDG`` and ``SUBG`` instructions +----------------------------------------------------------------- + +The architecture allows excluding certain tags to be randomly generated +via the ``GCR_EL1.Exclude`` register bit-field. By default, Linux +excludes all tags other than 0. A user thread can enable specific tags +in the randomly generated set using the ``prctl(PR_SET_TAGGED_ADDR_CTRL, +flags, 0, 0, 0)`` system call where ``flags`` contains the tags bitmap +in the ``PR_MTE_TAG_MASK`` bit-field. + +**Note**: The hardware uses an exclude mask but the ``prctl()`` +interface provides an include mask. An include mask of ``0`` (exclusion +mask ``0xffff``) results in the CPU always generating tag ``0``. + +Initial process state +--------------------- + +On ``execve()``, the new process has the following configuration: + +- ``PR_TAGGED_ADDR_ENABLE`` set to 0 (disabled) +- Tag checking mode set to ``PR_MTE_TCF_NONE`` +- ``PR_MTE_TAG_MASK`` set to 0 (all tags excluded) +- ``PSTATE.TCO`` set to 0 +- ``PROT_MTE`` not set on any of the initial memory maps + +On ``fork()``, the new process inherits the parent's configuration and +memory map attributes with the exception of the ``madvise()`` ranges +with ``MADV_WIPEONFORK`` which will have the data and tags cleared (set +to 0). + +The ``ptrace()`` interface +-------------------------- + +``PTRACE_PEEKMTETAGS`` and ``PTRACE_POKEMTETAGS`` allow a tracer to read +the tags from or set the tags to a tracee's address space. The +``ptrace()`` system call is invoked as ``ptrace(request, pid, addr, +data)`` where: + +- ``request`` - one of ``PTRACE_PEEKMTETAGS`` or ``PTRACE_POKEMTETAGS``. +- ``pid`` - the tracee's PID. +- ``addr`` - address in the tracee's address space. +- ``data`` - pointer to a ``struct iovec`` where ``iov_base`` points to + a buffer of ``iov_len`` length in the tracer's address space. + +The tags in the tracer's ``iov_base`` buffer are represented as one +4-bit tag per byte and correspond to a 16-byte MTE tag granule in the +tracee's address space. + +**Note**: If ``addr`` is not aligned to a 16-byte granule, the kernel +will use the corresponding aligned address. + +``ptrace()`` return value: + +- 0 - tags were copied, the tracer's ``iov_len`` was updated to the + number of tags transferred. This may be smaller than the requested + ``iov_len`` if the requested address range in the tracee's or the + tracer's space cannot be accessed or does not have valid tags. +- ``-EPERM`` - the specified process cannot be traced. +- ``-EIO`` - the tracee's address range cannot be accessed (e.g. invalid + address) and no tags copied. ``iov_len`` not updated. +- ``-EFAULT`` - fault on accessing the tracer's memory (``struct iovec`` + or ``iov_base`` buffer) and no tags copied. ``iov_len`` not updated. +- ``-EOPNOTSUPP`` - the tracee's address does not have valid tags (never + mapped with the ``PROT_MTE`` flag). ``iov_len`` not updated. + +**Note**: There are no transient errors for the requests above, so user +programs should not retry in case of a non-zero system call return. + +``PTRACE_GETREGSET`` and ``PTRACE_SETREGSET`` with ``addr == +``NT_ARM_TAGGED_ADDR_CTRL`` allow ``ptrace()`` access to the tagged +address ABI control and MTE configuration of a process as per the +``prctl()`` options described in +Documentation/arm64/tagged-address-abi.rst and above. The corresponding +``regset`` is 1 element of 8 bytes (``sizeof(long))``). + +Example of correct usage +======================== + +*MTE Example code* + +.. code-block:: c + + /* + * To be compiled with -march=armv8.5-a+memtag + */ + #include <errno.h> + #include <stdint.h> + #include <stdio.h> + #include <stdlib.h> + #include <unistd.h> + #include <sys/auxv.h> + #include <sys/mman.h> + #include <sys/prctl.h> + + /* + * From arch/arm64/include/uapi/asm/hwcap.h + */ + #define HWCAP2_MTE (1 << 18) + + /* + * From arch/arm64/include/uapi/asm/mman.h + */ + #define PROT_MTE 0x20 + + /* + * From include/uapi/linux/prctl.h + */ + #define PR_SET_TAGGED_ADDR_CTRL 55 + #define PR_GET_TAGGED_ADDR_CTRL 56 + # define PR_TAGGED_ADDR_ENABLE (1UL << 0) + # define PR_MTE_TCF_SHIFT 1 + # define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) + # define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) + # define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) + # define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) + # define PR_MTE_TAG_SHIFT 3 + # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) + + /* + * Insert a random logical tag into the given pointer. + */ + #define insert_random_tag(ptr) ({ \ + uint64_t __val; \ + asm("irg %0, %1" : "=r" (__val) : "r" (ptr)); \ + __val; \ + }) + + /* + * Set the allocation tag on the destination address. + */ + #define set_tag(tagged_addr) do { \ + asm volatile("stg %0, [%0]" : : "r" (tagged_addr) : "memory"); \ + } while (0) + + int main() + { + unsigned char *a; + unsigned long page_sz = sysconf(_SC_PAGESIZE); + unsigned long hwcap2 = getauxval(AT_HWCAP2); + + /* check if MTE is present */ + if (!(hwcap2 & HWCAP2_MTE)) + return EXIT_FAILURE; + + /* + * Enable the tagged address ABI, synchronous MTE tag check faults and + * allow all non-zero tags in the randomly generated set. + */ + if (prctl(PR_SET_TAGGED_ADDR_CTRL, + PR_TAGGED_ADDR_ENABLE | PR_MTE_TCF_SYNC | (0xfffe << PR_MTE_TAG_SHIFT), + 0, 0, 0)) { + perror("prctl() failed"); + return EXIT_FAILURE; + } + + a = mmap(0, page_sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (a == MAP_FAILED) { + perror("mmap() failed"); + return EXIT_FAILURE; + } + + /* + * Enable MTE on the above anonymous mmap. The flag could be passed + * directly to mmap() and skip this step. + */ + if (mprotect(a, page_sz, PROT_READ | PROT_WRITE | PROT_MTE)) { + perror("mprotect() failed"); + return EXIT_FAILURE; + } + + /* access with the default tag (0) */ + a[0] = 1; + a[1] = 2; + + printf("a[0] = %hhu a[1] = %hhu\n", a[0], a[1]); + + /* set the logical and allocation tags */ + a = (unsigned char *)insert_random_tag(a); + set_tag(a); + + printf("%p\n", a); + + /* non-zero tag access */ + a[0] = 3; + printf("a[0] = %hhu a[1] = %hhu\n", a[0], a[1]); + + /* + * If MTE is enabled correctly the next instruction will generate an + * exception. + */ + printf("Expecting SIGSEGV...\n"); + a[16] = 0xdd; + + /* this should not be printed in the PR_MTE_TCF_SYNC mode */ + printf("...haven't got one\n"); + + return EXIT_FAILURE; + } diff --git a/Documentation/devicetree/bindings/perf/arm,cmn.yaml b/Documentation/devicetree/bindings/perf/arm,cmn.yaml new file mode 100644 index 000000000000..e4fcc0de25e2 --- /dev/null +++ b/Documentation/devicetree/bindings/perf/arm,cmn.yaml @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright 2020 Arm Ltd. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/perf/arm,cmn.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Arm CMN (Coherent Mesh Network) Performance Monitors + +maintainers: + - Robin Murphy <robin.murphy@arm.com> + +properties: + compatible: + const: arm,cmn-600 + + reg: + items: + - description: Physical address of the base (PERIPHBASE) and + size (up to 64MB) of the configuration address space. + + interrupts: + minItems: 1 + maxItems: 4 + items: + - description: Overflow interrupt for DTC0 + - description: Overflow interrupt for DTC1 + - description: Overflow interrupt for DTC2 + - description: Overflow interrupt for DTC3 + description: One interrupt for each DTC domain implemented must + be specified, in order. DTC0 is always present. + + arm,root-node: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Offset from PERIPHBASE of the configuration + discovery node (see TRM definition of ROOTNODEBASE). + +required: + - compatible + - reg + - interrupts + - arm,root-node + +additionalProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/interrupt-controller/irq.h> + pmu@50000000 { + compatible = "arm,cmn-600"; + reg = <0x50000000 0x4000000>; + /* 4x2 mesh with one DTC, and CFG node at 0,1,1,0 */ + interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>; + arm,root-node = <0x104000>; + }; +... diff --git a/Documentation/virt/kvm/arm/hyp-abi.rst b/Documentation/virt/kvm/arm/hyp-abi.rst index d9eba93aa364..83cadd8186fa 100644 --- a/Documentation/virt/kvm/arm/hyp-abi.rst +++ b/Documentation/virt/kvm/arm/hyp-abi.rst @@ -54,9 +54,9 @@ these functions (see arch/arm{,64}/include/asm/virt.h): x3 = x1's value when entering the next payload (arm64) x4 = x2's value when entering the next payload (arm64) - Mask all exceptions, disable the MMU, move the arguments into place - (arm64 only), and jump to the restart address while at HYP/EL2. This - hypercall is not expected to return to its caller. + Mask all exceptions, disable the MMU, clear I+D bits, move the arguments + into place (arm64 only), and jump to the restart address while at HYP/EL2. + This hypercall is not expected to return to its caller. Any other value of r0/x0 triggers a hypervisor-specific handling, which is not documented here. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6d232837cbee..43091f439e4e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -29,6 +29,7 @@ config ARM64 select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY + select ARCH_STACKWALK select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYNC_DMA_FOR_DEVICE @@ -211,12 +212,18 @@ config ARM64_PAGE_SHIFT default 14 if ARM64_16K_PAGES default 12 -config ARM64_CONT_SHIFT +config ARM64_CONT_PTE_SHIFT int default 5 if ARM64_64K_PAGES default 7 if ARM64_16K_PAGES default 4 +config ARM64_CONT_PMD_SHIFT + int + default 5 if ARM64_64K_PAGES + default 5 if ARM64_16K_PAGES + default 4 + config ARCH_MMAP_RND_BITS_MIN default 14 if ARM64_64K_PAGES default 16 if ARM64_16K_PAGES @@ -1165,32 +1172,6 @@ config UNMAP_KERNEL_AT_EL0 If unsure, say Y. -config HARDEN_BRANCH_PREDICTOR - bool "Harden the branch predictor against aliasing attacks" if EXPERT - default y - help - Speculation attacks against some high-performance processors rely on - being able to manipulate the branch predictor for a victim context by - executing aliasing branches in the attacker context. Such attacks - can be partially mitigated against by clearing internal branch - predictor state and limiting the prediction logic in some situations. - - This config option will take CPU-specific actions to harden the - branch predictor against aliasing attacks and may rely on specific - instruction sequences or control bits being set by the system - firmware. - - If unsure, say Y. - -config ARM64_SSBD - bool "Speculative Store Bypass Disable" if EXPERT - default y - help - This enables mitigation of the bypassing of previous stores - by speculative loads. - - If unsure, say Y. - config RODATA_FULL_DEFAULT_ENABLED bool "Apply r/o permissions of VM areas also to their linear aliases" default y @@ -1664,6 +1645,39 @@ config ARCH_RANDOM provides a high bandwidth, cryptographically secure hardware random number generator. +config ARM64_AS_HAS_MTE + # Initial support for MTE went in binutils 2.32.0, checked with + # ".arch armv8.5-a+memtag" below. However, this was incomplete + # as a late addition to the final architecture spec (LDGM/STGM) + # is only supported in the newer 2.32.x and 2.33 binutils + # versions, hence the extra "stgm" instruction check below. + def_bool $(as-instr,.arch armv8.5-a+memtag\nstgm xzr$(comma)[x0]) + +config ARM64_MTE + bool "Memory Tagging Extension support" + default y + depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI + select ARCH_USES_HIGH_VMA_FLAGS + help + Memory Tagging (part of the ARMv8.5 Extensions) provides + architectural support for run-time, always-on detection of + various classes of memory error to aid with software debugging + to eliminate vulnerabilities arising from memory-unsafe + languages. + + This option enables the support for the Memory Tagging + Extension at EL0 (i.e. for userspace). + + Selecting this option allows the feature to be detected at + runtime. Any secondary CPU not implementing this feature will + not be allowed a late bring-up. + + Userspace binaries that want to use this feature must + explicitly opt in. The mechanism for the userspace is + described in: + + Documentation/arm64/memory-tagging-extension.rst. + endmenu config ARM64_SVE @@ -1876,6 +1890,10 @@ config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on HUGETLB_PAGE && MIGRATION +config ARCH_ENABLE_THP_MIGRATION + def_bool y + depends on TRANSPARENT_HUGEPAGE + menu "Power management options" source "kernel/power/Kconfig" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 130569f90c54..0fd4c1be4f64 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -11,7 +11,6 @@ # Copyright (C) 1995-2001 by Russell King LDFLAGS_vmlinux :=--no-undefined -X -CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour @@ -132,9 +131,6 @@ endif # Default value head-y := arch/arm64/kernel/head.o -# The byte offset of the kernel image in RAM from the start of RAM. -TEXT_OFFSET := 0x0 - ifeq ($(CONFIG_KASAN_SW_TAGS), y) KASAN_SHADOW_SCALE_SHIFT := 4 else @@ -145,8 +141,6 @@ KBUILD_CFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_CPPFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_AFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) -export TEXT_OFFSET - core-y += arch/arm64/ libs-y := arch/arm64/lib/ $(libs-y) libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index 44209f6146aa..ffb1a40d5475 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -79,10 +79,5 @@ arch_get_random_seed_long_early(unsigned long *v) } #define arch_get_random_seed_long_early arch_get_random_seed_long_early -#else - -static inline bool __arm64_rndr(unsigned long *v) { return false; } -static inline bool __init __early_cpu_has_rndr(void) { return false; } - #endif /* CONFIG_ARCH_RANDOM */ #endif /* _ASM_ARCHRANDOM_H */ diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h index c7f67da13cd9..3e7943fd17a4 100644 --- a/arch/arm64/include/asm/boot.h +++ b/arch/arm64/include/asm/boot.h @@ -13,8 +13,7 @@ #define MAX_FDT_SIZE SZ_2M /* - * arm64 requires the kernel image to placed - * TEXT_OFFSET bytes beyond a 2 MB aligned base + * arm64 requires the kernel image to placed at a 2 MB aligned base address */ #define MIN_KIMG_ALIGN SZ_2M diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index d28e8f37d3b4..e95c4df83911 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -21,7 +21,7 @@ * mechanism for doing so, tests whether it is possible to boot * the given CPU. * @cpu_boot: Boots a cpu into the kernel. - * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary + * @cpu_postboot: Optionally, perform any post-boot cleanup or necessary * synchronisation. Called from the cpu being booted. * @cpu_can_disable: Determines whether a CPU can be disabled based on * mechanism-specific information. diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 07b643a70710..42868dbd29fd 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -31,13 +31,13 @@ #define ARM64_HAS_DCPOP 21 #define ARM64_SVE 22 #define ARM64_UNMAP_KERNEL_AT_EL0 23 -#define ARM64_HARDEN_BRANCH_PREDICTOR 24 +#define ARM64_SPECTRE_V2 24 #define ARM64_HAS_RAS_EXTN 25 #define ARM64_WORKAROUND_843419 26 #define ARM64_HAS_CACHE_IDC 27 #define ARM64_HAS_CACHE_DIC 28 #define ARM64_HW_DBM 29 -#define ARM64_SSBD 30 +#define ARM64_SPECTRE_V4 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 #define ARM64_HAS_CRC32 33 @@ -64,7 +64,8 @@ #define ARM64_BTI 54 #define ARM64_HAS_ARMv8_4_TTL 55 #define ARM64_HAS_TLB_RANGE 56 +#define ARM64_MTE 57 -#define ARM64_NCAPS 57 +#define ARM64_NCAPS 58 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 89b4f0142c28..f7e7144af174 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -358,7 +358,7 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) } /* - * Generic helper for handling capabilties with multiple (match,enable) pairs + * Generic helper for handling capabilities with multiple (match,enable) pairs * of call backs, sharing the same capability bit. * Iterate over each entry to see if at least one matches. */ @@ -681,6 +681,12 @@ static __always_inline bool system_uses_irq_prio_masking(void) cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); } +static inline bool system_supports_mte(void) +{ + return IS_ENABLED(CONFIG_ARM64_MTE) && + cpus_have_const_cap(ARM64_MTE); +} + static inline bool system_has_prio_mask_debugging(void) { return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) && @@ -698,30 +704,6 @@ static inline bool system_supports_tlb_range(void) cpus_have_const_cap(ARM64_HAS_TLB_RANGE); } -#define ARM64_BP_HARDEN_UNKNOWN -1 -#define ARM64_BP_HARDEN_WA_NEEDED 0 -#define ARM64_BP_HARDEN_NOT_REQUIRED 1 - -int get_spectre_v2_workaround_state(void); - -#define ARM64_SSBD_UNKNOWN -1 -#define ARM64_SSBD_FORCE_DISABLE 0 -#define ARM64_SSBD_KERNEL 1 -#define ARM64_SSBD_FORCE_ENABLE 2 -#define ARM64_SSBD_MITIGATED 3 - -static inline int arm64_get_ssbd_state(void) -{ -#ifdef CONFIG_ARM64_SSBD - extern int ssbd_state; - return ssbd_state; -#else - return ARM64_SSBD_UNKNOWN; -#endif -} - -void arm64_set_ssbd_mitigation(bool state); - extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 035003acfa87..22c81f1edda2 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -35,7 +35,9 @@ #define ESR_ELx_EC_SYS64 (0x18) #define ESR_ELx_EC_SVE (0x19) #define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ -/* Unallocated EC: 0x1b - 0x1E */ +/* Unallocated EC: 0x1B */ +#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */ +/* Unallocated EC: 0x1D - 0x1E */ #define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ #define ESR_ELx_EC_IABT_LOW (0x20) #define ESR_ELx_EC_IABT_CUR (0x21) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 7577a754d443..99b9383cd036 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -47,4 +47,5 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); void do_cp15instr(unsigned int esr, struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); +void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr); #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index 840a35ed92ec..b15eb4a3e6b2 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -22,6 +22,15 @@ struct exception_table_entry #define ARCH_HAS_RELATIVE_EXTABLE +static inline bool in_bpf_jit(struct pt_regs *regs) +{ + if (!IS_ENABLED(CONFIG_BPF_JIT)) + return false; + + return regs->pc >= BPF_JIT_REGION_START && + regs->pc < BPF_JIT_REGION_END; +} + #ifdef CONFIG_BPF_JIT int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs); diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 59f10dd13f12..bec5f14b622a 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -69,6 +69,9 @@ static inline void *sve_pffr(struct thread_struct *thread) extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_load_state(void const *state, u32 const *pfpsr, unsigned long vq_minus_1); +extern void sve_flush_live(void); +extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, + unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); struct arm64_cpu_capabilities; diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 636e9d9c7929..af43367534c7 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -164,25 +164,59 @@ | ((\np) << 5) .endm +/* PFALSE P\np.B */ +.macro _sve_pfalse np + _sve_check_preg \np + .inst 0x2518e400 \ + | (\np) +.endm + .macro __for from:req, to:req .if (\from) == (\to) - _for__body \from + _for__body %\from .else - __for \from, (\from) + ((\to) - (\from)) / 2 - __for (\from) + ((\to) - (\from)) / 2 + 1, \to + __for %\from, %((\from) + ((\to) - (\from)) / 2) + __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to .endif .endm .macro _for var:req, from:req, to:req, insn:vararg .macro _for__body \var:req + .noaltmacro \insn + .altmacro .endm + .altmacro __for \from, \to + .noaltmacro .purgem _for__body .endm +/* Update ZCR_EL1.LEN with the new VQ */ +.macro sve_load_vq xvqminus1, xtmp, xtmp2 + mrs_s \xtmp, SYS_ZCR_EL1 + bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK + orr \xtmp2, \xtmp2, \xvqminus1 + cmp \xtmp2, \xtmp + b.eq 921f + msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising +921: +.endm + +/* Preserve the first 128-bits of Znz and zero the rest. */ +.macro _sve_flush_z nz + _sve_check_zreg \nz + mov v\nz\().16b, v\nz\().16b +.endm + +.macro sve_flush + _for n, 0, 31, _sve_flush_z \n + _for n, 0, 15, _sve_pfalse \n + _sve_wrffr 0 +.endm + .macro sve_save nxbase, xpfpsr, nxtmp _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 @@ -197,13 +231,7 @@ .endm .macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 - mrs_s x\nxtmp, SYS_ZCR_EL1 - bic \xtmp2, x\nxtmp, ZCR_ELx_LEN_MASK - orr \xtmp2, \xtmp2, \xvqminus1 - cmp \xtmp2, x\nxtmp - b.eq 921f - msr_s SYS_ZCR_EL1, \xtmp2 // self-synchronising -921: + sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 _sve_ldr_p 0, \nxbase _sve_wrffr 0 diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 22f73fe09030..9a5498c2c8ee 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -8,18 +8,27 @@ #include <uapi/asm/hwcap.h> #include <asm/cpufeature.h> +#define COMPAT_HWCAP_SWP (1 << 0) #define COMPAT_HWCAP_HALF (1 << 1) #define COMPAT_HWCAP_THUMB (1 << 2) +#define COMPAT_HWCAP_26BIT (1 << 3) #define COMPAT_HWCAP_FAST_MULT (1 << 4) +#define COMPAT_HWCAP_FPA (1 << 5) #define COMPAT_HWCAP_VFP (1 << 6) #define COMPAT_HWCAP_EDSP (1 << 7) +#define COMPAT_HWCAP_JAVA (1 << 8) +#define COMPAT_HWCAP_IWMMXT (1 << 9) +#define COMPAT_HWCAP_CRUNCH (1 << 10) +#define COMPAT_HWCAP_THUMBEE (1 << 11) #define COMPAT_HWCAP_NEON (1 << 12) #define COMPAT_HWCAP_VFPv3 (1 << 13) +#define COMPAT_HWCAP_VFPV3D16 (1 << 14) #define COMPAT_HWCAP_TLS (1 << 15) #define COMPAT_HWCAP_VFPv4 (1 << 16) #define COMPAT_HWCAP_IDIVA (1 << 17) #define COMPAT_HWCAP_IDIVT (1 << 18) #define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT) +#define COMPAT_HWCAP_VFPD32 (1 << 19) #define COMPAT_HWCAP_LPAE (1 << 20) #define COMPAT_HWCAP_EVTSTRM (1 << 21) @@ -95,7 +104,7 @@ #define KERNEL_HWCAP_DGH __khwcap2_feature(DGH) #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) #define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) -/* reserved for KERNEL_HWCAP_MTE __khwcap2_feature(MTE) */ +#define KERNEL_HWCAP_MTE __khwcap2_feature(MTE) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 0bc46149e491..4b39293d0f72 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -359,9 +359,13 @@ __AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000) __AARCH64_INSN_FUNCS(exception, 0xFF000000, 0xD4000000) __AARCH64_INSN_FUNCS(hint, 0xFFFFF01F, 0xD503201F) __AARCH64_INSN_FUNCS(br, 0xFFFFFC1F, 0xD61F0000) +__AARCH64_INSN_FUNCS(br_auth, 0xFEFFF800, 0xD61F0800) __AARCH64_INSN_FUNCS(blr, 0xFFFFFC1F, 0xD63F0000) +__AARCH64_INSN_FUNCS(blr_auth, 0xFEFFF800, 0xD63F0800) __AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000) +__AARCH64_INSN_FUNCS(ret_auth, 0xFFFFFBFF, 0xD65F0BFF) __AARCH64_INSN_FUNCS(eret, 0xFFFFFFFF, 0xD69F03E0) +__AARCH64_INSN_FUNCS(eret_auth, 0xFFFFFBFF, 0xD69F0BFF) __AARCH64_INSN_FUNCS(mrs, 0xFFF00000, 0xD5300000) __AARCH64_INSN_FUNCS(msr_imm, 0xFFF8F01F, 0xD500401F) __AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 329fb15f6bac..19ca76ea60d9 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -86,7 +86,7 @@ + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ -#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) +#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 1da8e3dc4455..64ce29378467 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -12,6 +12,7 @@ #include <asm/types.h> /* Hyp Configuration Register (HCR) bits */ +#define HCR_ATA (UL(1) << 56) #define HCR_FWB (UL(1) << 46) #define HCR_API (UL(1) << 41) #define HCR_APK (UL(1) << 40) @@ -66,7 +67,7 @@ * TWI: Trap WFI * TIDCP: Trap L2CTLR/L2ECTLR * BSU_IS: Upgrade barriers to the inner shareable domain - * FB: Force broadcast of all maintainance operations + * FB: Force broadcast of all maintenance operations * AMO: Override CPSR.A and enable signaling with VA * IMO: Override CPSR.I and enable signaling with VI * FMO: Override CPSR.F and enable signaling with VF @@ -78,7 +79,7 @@ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ HCR_FMO | HCR_IMO | HCR_PTW ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) -#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) +#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 6f98fbd0ac81..7f7072f6cb45 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -9,9 +9,6 @@ #include <asm/virt.h> -#define VCPU_WORKAROUND_2_FLAG_SHIFT 0 -#define VCPU_WORKAROUND_2_FLAG (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT) - #define ARM_EXIT_WITH_SERROR_BIT 31 #define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT)) #define ARM_EXCEPTION_IS_TRAP(x) (ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_TRAP) @@ -102,11 +99,9 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) -#ifdef CONFIG_KVM_INDIRECT_VECTORS extern atomic_t arm64_el2_vector_last_slot; DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); #define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) -#endif extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 1cc5f5f72d0b..5ef2669ccd6c 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -391,20 +391,6 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } -static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG; -} - -static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu, - bool flag) -{ - if (flag) - vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; - else - vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG; -} - static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) { diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 905c2b87e05a..bb5e5b88d439 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -631,46 +631,6 @@ static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} #endif -#define KVM_BP_HARDEN_UNKNOWN -1 -#define KVM_BP_HARDEN_WA_NEEDED 0 -#define KVM_BP_HARDEN_NOT_REQUIRED 1 - -static inline int kvm_arm_harden_branch_predictor(void) -{ - switch (get_spectre_v2_workaround_state()) { - case ARM64_BP_HARDEN_WA_NEEDED: - return KVM_BP_HARDEN_WA_NEEDED; - case ARM64_BP_HARDEN_NOT_REQUIRED: - return KVM_BP_HARDEN_NOT_REQUIRED; - case ARM64_BP_HARDEN_UNKNOWN: - default: - return KVM_BP_HARDEN_UNKNOWN; - } -} - -#define KVM_SSBD_UNKNOWN -1 -#define KVM_SSBD_FORCE_DISABLE 0 -#define KVM_SSBD_KERNEL 1 -#define KVM_SSBD_FORCE_ENABLE 2 -#define KVM_SSBD_MITIGATED 3 - -static inline int kvm_arm_have_ssbd(void) -{ - switch (arm64_get_ssbd_state()) { - case ARM64_SSBD_FORCE_DISABLE: - return KVM_SSBD_FORCE_DISABLE; - case ARM64_SSBD_KERNEL: - return KVM_SSBD_KERNEL; - case ARM64_SSBD_FORCE_ENABLE: - return KVM_SSBD_FORCE_ENABLE; - case ARM64_SSBD_MITIGATED: - return KVM_SSBD_MITIGATED; - case ARM64_SSBD_UNKNOWN: - default: - return KVM_SSBD_UNKNOWN; - } -} - void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 189839c3706a..cff1cebc7590 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -9,6 +9,7 @@ #include <asm/page.h> #include <asm/memory.h> +#include <asm/mmu.h> #include <asm/cpufeature.h> /* @@ -430,19 +431,17 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } -#ifdef CONFIG_KVM_INDIRECT_VECTORS /* * EL2 vectors can be mapped and rerouted in a number of ways, * depending on the kernel configuration and CPU present: * - * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the - * hardening sequence is placed in one of the vector slots, which is - * executed before jumping to the real vectors. + * - If the CPU is affected by Spectre-v2, the hardening sequence is + * placed in one of the vector slots, which is executed before jumping + * to the real vectors. * - * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the - * ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the - * hardening sequence is mapped next to the idmap page, and executed - * before jumping to the real vectors. + * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot + * containing the hardening sequence is mapped next to the idmap page, + * and executed before jumping to the real vectors. * * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an * empty slot is selected, mapped next to the idmap page, and @@ -452,19 +451,16 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, * VHE, as we don't have hypervisor-specific mappings. If the system * is VHE and yet selects this capability, it will be ignored. */ -#include <asm/mmu.h> - extern void *__kvm_bp_vect_base; extern int __kvm_harden_el2_vector_slot; -/* This is called on both VHE and !VHE systems */ static inline void *kvm_get_hyp_vector(void) { struct bp_hardening_data *data = arm64_get_bp_hardening_data(); void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); int slot = -1; - if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) { + if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) { vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); slot = data->hyp_vectors_slot; } @@ -481,76 +477,6 @@ static inline void *kvm_get_hyp_vector(void) return vect; } -/* This is only called on a !VHE system */ -static inline int kvm_map_vectors(void) -{ - /* - * HBP = ARM64_HARDEN_BRANCH_PREDICTOR - * HEL2 = ARM64_HARDEN_EL2_VECTORS - * - * !HBP + !HEL2 -> use direct vectors - * HBP + !HEL2 -> use hardened vectors in place - * !HBP + HEL2 -> allocate one vector slot and use exec mapping - * HBP + HEL2 -> use hardened vertors and use exec mapping - */ - if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) { - __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs); - __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); - } - - if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { - phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs); - unsigned long size = __BP_HARDEN_HYP_VECS_SZ; - - /* - * Always allocate a spare vector slot, as we don't - * know yet which CPUs have a BP hardening slot that - * we can reuse. - */ - __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); - return create_hyp_exec_mappings(vect_pa, size, - &__kvm_bp_vect_base); - } - - return 0; -} -#else -static inline void *kvm_get_hyp_vector(void) -{ - return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); -} - -static inline int kvm_map_vectors(void) -{ - return 0; -} -#endif - -#ifdef CONFIG_ARM64_SSBD -DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); - -static inline int hyp_map_aux_data(void) -{ - int cpu, err; - - for_each_possible_cpu(cpu) { - u64 *ptr; - - ptr = per_cpu_ptr(&arm64_ssbd_callback_required, cpu); - err = create_hyp_mappings(ptr, ptr + 1, PAGE_HYP); - if (err) - return err; - } - return 0; -} -#else -static inline int hyp_map_aux_data(void) -{ - return 0; -} -#endif - #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) /* diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index afa722504bfd..43640d797455 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -126,13 +126,18 @@ /* * Memory types available. + * + * IMPORTANT: MT_NORMAL must be index 0 since vm_get_page_prot() may 'or' in + * the MT_NORMAL_TAGGED memory type for PROT_MTE mappings. Note + * that protection_map[] only contains MT_NORMAL attributes. */ -#define MT_DEVICE_nGnRnE 0 -#define MT_DEVICE_nGnRE 1 -#define MT_DEVICE_GRE 2 -#define MT_NORMAL_NC 3 -#define MT_NORMAL 4 -#define MT_NORMAL_WT 5 +#define MT_NORMAL 0 +#define MT_NORMAL_TAGGED 1 +#define MT_NORMAL_NC 2 +#define MT_NORMAL_WT 3 +#define MT_DEVICE_nGnRnE 4 +#define MT_DEVICE_nGnRE 5 +#define MT_DEVICE_GRE 6 /* * Memory types for Stage-2 translation @@ -169,7 +174,7 @@ extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) -/* the virtual base of the kernel image (minus TEXT_OFFSET) */ +/* the virtual base of the kernel image */ extern u64 kimage_vaddr; /* the offset between the kernel virtual and physical mappings */ diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 081ec8de9ea6..e3e28f7daf62 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -9,16 +9,53 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey __always_unused) { + unsigned long ret = 0; + if (system_supports_bti() && (prot & PROT_BTI)) - return VM_ARM64_BTI; + ret |= VM_ARM64_BTI; - return 0; + if (system_supports_mte() && (prot & PROT_MTE)) + ret |= VM_MTE; + + return ret; } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) +static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) +{ + /* + * Only allow MTE on anonymous mappings as these are guaranteed to be + * backed by tags-capable memory. The vm_flags may be overridden by a + * filesystem supporting MTE (RAM-based). + */ + if (system_supports_mte() && (flags & MAP_ANONYMOUS)) + return VM_MTE_ALLOWED; + + return 0; +} +#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) + static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) { - return (vm_flags & VM_ARM64_BTI) ? __pgprot(PTE_GP) : __pgprot(0); + pteval_t prot = 0; + + if (vm_flags & VM_ARM64_BTI) + prot |= PTE_GP; + + /* + * There are two conditions required for returning a Normal Tagged + * memory type: (1) the user requested it via PROT_MTE passed to + * mmap() or mprotect() and (2) the corresponding vma supports MTE. We + * register (1) as VM_MTE in the vma->vm_flags and (2) as + * VM_MTE_ALLOWED. Note that the latter can only be set during the + * mmap() call since mprotect() does not accept MAP_* flags. + * Checking for VM_MTE only is sufficient since arch_validate_flags() + * does not permit (VM_MTE & !VM_MTE_ALLOWED). + */ + if (vm_flags & VM_MTE) + prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED); + + return __pgprot(prot); } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) @@ -30,8 +67,21 @@ static inline bool arch_validate_prot(unsigned long prot, if (system_supports_bti()) supported |= PROT_BTI; + if (system_supports_mte()) + supported |= PROT_MTE; + return (prot & ~supported) == 0; } #define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr) +static inline bool arch_validate_flags(unsigned long vm_flags) +{ + if (!system_supports_mte()) + return true; + + /* only allow VM_MTE if VM_MTE_ALLOWED has been set previously */ + return !(vm_flags & VM_MTE) || (vm_flags & VM_MTE_ALLOWED); +} +#define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags) + #endif /* ! __ASM_MMAN_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index a7a5ecaa2e83..b2e91c187e2a 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,11 +17,14 @@ #ifndef __ASSEMBLY__ +#include <linux/refcount.h> + typedef struct { atomic64_t id; #ifdef CONFIG_COMPAT void *sigpage; #endif + refcount_t pinned; void *vdso; unsigned long flags; } mm_context_t; @@ -45,7 +48,6 @@ struct bp_hardening_data { bp_hardening_cb_t fn; }; -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) @@ -57,21 +59,13 @@ static inline void arm64_apply_bp_hardening(void) { struct bp_hardening_data *d; - if (!cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) + if (!cpus_have_const_cap(ARM64_SPECTRE_V2)) return; d = arm64_get_bp_hardening_data(); if (d->fn) d->fn(); } -#else -static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) -{ - return NULL; -} - -static inline void arm64_apply_bp_hardening(void) { } -#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ extern void arm64_memblock_init(void); extern void paging_init(void); diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index f2d7537d6f83..0672236e1aea 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -177,7 +177,13 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) #define destroy_context(mm) do { } while(0) void check_and_switch_context(struct mm_struct *mm); -#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) +static inline int +init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + atomic64_set(&mm->context.id, 0); + refcount_set(&mm->context.pinned, 0); + return 0; +} #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void update_saved_ttbr0(struct task_struct *tsk, @@ -248,6 +254,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, void verify_cpu_asid_bits(void); void post_ttbr_update_workaround(void); +unsigned long arm64_mm_context_get(struct mm_struct *mm); +void arm64_mm_context_put(struct mm_struct *mm); + #endif /* !__ASSEMBLY__ */ #endif /* !__ASM_MMU_CONTEXT_H */ diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h new file mode 100644 index 000000000000..1c99fcadb58c --- /dev/null +++ b/arch/arm64/include/asm/mte.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 ARM Ltd. + */ +#ifndef __ASM_MTE_H +#define __ASM_MTE_H + +#define MTE_GRANULE_SIZE UL(16) +#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1)) +#define MTE_TAG_SHIFT 56 +#define MTE_TAG_SIZE 4 + +#ifndef __ASSEMBLY__ + +#include <linux/page-flags.h> + +#include <asm/pgtable-types.h> + +void mte_clear_page_tags(void *addr); +unsigned long mte_copy_tags_from_user(void *to, const void __user *from, + unsigned long n); +unsigned long mte_copy_tags_to_user(void __user *to, void *from, + unsigned long n); +int mte_save_tags(struct page *page); +void mte_save_page_tags(const void *page_addr, void *tag_storage); +bool mte_restore_tags(swp_entry_t entry, struct page *page); +void mte_restore_page_tags(void *page_addr, const void *tag_storage); +void mte_invalidate_tags(int type, pgoff_t offset); +void mte_invalidate_tags_area(int type); +void *mte_allocate_tag_storage(void); +void mte_free_tag_storage(char *storage); + +#ifdef CONFIG_ARM64_MTE + +/* track which pages have valid allocation tags */ +#define PG_mte_tagged PG_arch_2 + +void mte_sync_tags(pte_t *ptep, pte_t pte); +void mte_copy_page_tags(void *kto, const void *kfrom); +void flush_mte_state(void); +void mte_thread_switch(struct task_struct *next); +void mte_suspend_exit(void); +long set_mte_ctrl(struct task_struct *task, unsigned long arg); +long get_mte_ctrl(struct task_struct *task); +int mte_ptrace_copy_tags(struct task_struct *child, long request, + unsigned long addr, unsigned long data); + +#else + +/* unused if !CONFIG_ARM64_MTE, silence the compiler */ +#define PG_mte_tagged 0 + +static inline void mte_sync_tags(pte_t *ptep, pte_t pte) +{ +} +static inline void mte_copy_page_tags(void *kto, const void *kfrom) +{ +} +static inline void flush_mte_state(void) +{ +} +static inline void mte_thread_switch(struct task_struct *next) +{ +} +static inline void mte_suspend_exit(void) +{ +} +static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg) +{ + return 0; +} +static inline long get_mte_ctrl(struct task_struct *task) +{ + return 0; +} +static inline int mte_ptrace_copy_tags(struct task_struct *child, + long request, unsigned long addr, + unsigned long data) +{ + return -EIO; +} + +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_MTE_H */ diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index 626ad01e83bf..dd870390d639 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -25,6 +25,9 @@ const struct cpumask *cpumask_of_node(int node); /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ static inline const struct cpumask *cpumask_of_node(int node) { + if (node == NUMA_NO_NODE) + return cpu_all_mask; + return node_to_cpumask_map[node]; } #endif diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h index f99d48ecbeef..2403f7b4cdbf 100644 --- a/arch/arm64/include/asm/page-def.h +++ b/arch/arm64/include/asm/page-def.h @@ -11,13 +11,8 @@ #include <linux/const.h> /* PAGE_SHIFT determines the page size */ -/* CONT_SHIFT determines the number of pages which can be tracked together */ #define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT -#define CONT_SHIFT CONFIG_ARM64_CONT_SHIFT #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT)) -#define CONT_MASK (~(CONT_SIZE-1)) - #endif /* __ASM_PAGE_DEF_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index c01b52add377..012cffc574e8 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -15,18 +15,25 @@ #include <linux/personality.h> /* for READ_IMPLIES_EXEC */ #include <asm/pgtable-types.h> -extern void __cpu_clear_user_page(void *p, unsigned long user); -extern void __cpu_copy_user_page(void *to, const void *from, - unsigned long user); +struct page; +struct vm_area_struct; + extern void copy_page(void *to, const void *from); extern void clear_page(void *to); +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +#define __HAVE_ARCH_COPY_USER_HIGHPAGE + +void copy_highpage(struct page *to, struct page *from); +#define __HAVE_ARCH_COPY_HIGHPAGE + #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE -#define clear_user_page(addr,vaddr,pg) __cpu_clear_user_page(addr, vaddr) -#define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr) +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) typedef struct page *pgtable_t; @@ -36,7 +43,7 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC +#define VM_DATA_DEFAULT_FLAGS (VM_DATA_FLAGS_TSK_EXEC | VM_MTE_ALLOWED) #include <asm-generic/getorder.h> diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h index 70b323cf8300..b33ca260e3c9 100644 --- a/arch/arm64/include/asm/pci.h +++ b/arch/arm64/include/asm/pci.h @@ -17,6 +17,7 @@ #define pcibios_assign_all_busses() \ (pci_has_flag(PCI_REASSIGN_ALL_BUS)) +#define arch_can_pci_mmap_wc() 1 #define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 extern int isa_dma_bridge_buggy; diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2c2d7dbe8a02..60731f602d3e 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -236,6 +236,9 @@ #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +/* PMMIR_EL1.SLOTS mask */ +#define ARMV8_PMU_SLOTS_MASK 0xff + #ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d400a4d9aee2..94b3f2ac2e9d 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -81,25 +81,15 @@ /* * Contiguous page definitions. */ -#ifdef CONFIG_ARM64_64K_PAGES -#define CONT_PTE_SHIFT (5 + PAGE_SHIFT) -#define CONT_PMD_SHIFT (5 + PMD_SHIFT) -#elif defined(CONFIG_ARM64_16K_PAGES) -#define CONT_PTE_SHIFT (7 + PAGE_SHIFT) -#define CONT_PMD_SHIFT (5 + PMD_SHIFT) -#else -#define CONT_PTE_SHIFT (4 + PAGE_SHIFT) -#define CONT_PMD_SHIFT (4 + PMD_SHIFT) -#endif - +#define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT) #define CONT_PTES (1 << (CONT_PTE_SHIFT - PAGE_SHIFT)) #define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) #define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) + +#define CONT_PMD_SHIFT (CONFIG_ARM64_CONT_PMD_SHIFT + PMD_SHIFT) #define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT)) #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) #define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) -/* the numerical offset of the PTE within a range of CONT_PTES */ -#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) /* * Hardware page table definitions. diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 4d867c6446c4..4cd0d6ca8aa1 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -19,6 +19,13 @@ #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ +/* + * This bit indicates that the entry is present i.e. pmd_page() + * still points to a valid huge page in memory even if the pmd + * has been invalidated. + */ +#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */ + #ifndef __ASSEMBLY__ #include <asm/cpufeature.h> @@ -50,6 +57,7 @@ extern bool arm64_use_ng_mappings; #define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) #define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) #define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) @@ -59,6 +67,7 @@ extern bool arm64_use_ng_mappings; #define _HYP_PAGE_DEFAULT _PAGE_DEFAULT #define PAGE_KERNEL __pgprot(PROT_NORMAL) +#define PAGE_KERNEL_TAGGED __pgprot(PROT_NORMAL_TAGGED) #define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) #define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d5d3fbe73953..a11bf52e0c38 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -9,6 +9,7 @@ #include <asm/proc-fns.h> #include <asm/memory.h> +#include <asm/mte.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable-prot.h> #include <asm/tlbflush.h> @@ -35,11 +36,6 @@ extern struct page *vmemmap; -extern void __pte_error(const char *file, int line, unsigned long val); -extern void __pmd_error(const char *file, int line, unsigned long val); -extern void __pud_error(const char *file, int line, unsigned long val); -extern void __pgd_error(const char *file, int line, unsigned long val); - #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE @@ -51,13 +47,22 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* + * Outside of a few very special situations (e.g. hibernation), we always + * use broadcast TLB invalidation instructions, therefore a spurious page + * fault on one CPU which has been handled concurrently by another CPU + * does not need to perform additional invalidation. + */ +#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0) + +/* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page)) -#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) +#define pte_ERROR(e) \ + pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e)) /* * Macros to convert between a physical address and its placement in a @@ -90,6 +95,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) #define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP)) +#define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \ + PTE_ATTRINDX(MT_NORMAL_TAGGED)) #define pte_cont_addr_end(addr, end) \ ({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \ @@ -145,6 +152,18 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) return pte; } +static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + pmd_val(pmd) &= ~pgprot_val(prot); + return pmd; +} + +static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) +{ + pmd_val(pmd) |= pgprot_val(prot); + return pmd; +} + static inline pte_t pte_wrprotect(pte_t pte) { pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -284,6 +303,10 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte); + if (system_supports_mte() && + pte_present(pte) && pte_tagged(pte) && !pte_special(pte)) + mte_sync_tags(ptep, pte); + __check_racy_pte_update(mm, ptep, pte); set_pte(ptep, pte); @@ -363,15 +386,24 @@ static inline int pmd_protnone(pmd_t pmd) } #endif +#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID)) + +static inline int pmd_present(pmd_t pmd) +{ + return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd); +} + /* * THP definitions. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +static inline int pmd_trans_huge(pmd_t pmd) +{ + return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -#define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) @@ -381,7 +413,14 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) -#define pmd_mkinvalid(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) + +static inline pmd_t pmd_mkinvalid(pmd_t pmd) +{ + pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID)); + pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID)); + + return pmd; +} #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) @@ -541,7 +580,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #if CONFIG_PGTABLE_LEVELS > 2 -#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) @@ -608,7 +648,8 @@ static inline unsigned long pud_page_vaddr(pud_t pud) #if CONFIG_PGTABLE_LEVELS > 3 -#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) #define p4d_none(p4d) (!p4d_val(p4d)) #define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) @@ -667,15 +708,21 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d) #endif /* CONFIG_PGTABLE_LEVELS > 3 */ -#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) #define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) #define pgd_clear_fixmap() clear_fixmap(FIX_PGD) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { + /* + * Normal and Normal-Tagged are two different memory types and indices + * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. + */ const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP | + PTE_ATTRINDX_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = pte_mkdirty(pte); @@ -847,6 +894,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) +#define __swp_entry_to_pmd(swp) __pmd((swp).val) +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ + /* * Ensure that there are not more swap files than can be encoded in the kernel * PTEs. @@ -855,6 +907,38 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, extern int kern_addr_valid(unsigned long addr); +#ifdef CONFIG_ARM64_MTE + +#define __HAVE_ARCH_PREPARE_TO_SWAP +static inline int arch_prepare_to_swap(struct page *page) +{ + if (system_supports_mte()) + return mte_save_tags(page); + return 0; +} + +#define __HAVE_ARCH_SWAP_INVALIDATE +static inline void arch_swap_invalidate_page(int type, pgoff_t offset) +{ + if (system_supports_mte()) + mte_invalidate_tags(type, offset); +} + +static inline void arch_swap_invalidate_area(int type) +{ + if (system_supports_mte()) + mte_invalidate_tags_area(type); +} + +#define __HAVE_ARCH_SWAP_RESTORE +static inline void arch_swap_restore(swp_entry_t entry, struct page *page) +{ + if (system_supports_mte() && mte_restore_tags(entry, page)) + set_bit(PG_mte_tagged, &page->flags); +} + +#endif /* CONFIG_ARM64_MTE */ + /* * On AArch64, the cache coherency is handled via the set_pte_at() function. */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 240fe5e5b720..fce8cbecd6bc 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -38,6 +38,7 @@ #include <asm/pgtable-hwdef.h> #include <asm/pointer_auth.h> #include <asm/ptrace.h> +#include <asm/spectre.h> #include <asm/types.h> /* @@ -151,6 +152,10 @@ struct thread_struct { struct ptrauth_keys_user keys_user; struct ptrauth_keys_kernel keys_kernel; #endif +#ifdef CONFIG_ARM64_MTE + u64 sctlr_tcf0; + u64 gcr_user_incl; +#endif }; static inline void arch_thread_struct_whitelist(unsigned long *offset, @@ -197,40 +202,15 @@ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) regs->pmr_save = GIC_PRIO_IRQON; } -static inline void set_ssbs_bit(struct pt_regs *regs) -{ - regs->pstate |= PSR_SSBS_BIT; -} - -static inline void set_compat_ssbs_bit(struct pt_regs *regs) -{ - regs->pstate |= PSR_AA32_SSBS_BIT; -} - static inline void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { start_thread_common(regs, pc); regs->pstate = PSR_MODE_EL0t; - - if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) - set_ssbs_bit(regs); - + spectre_v4_enable_task_mitigation(current); regs->sp = sp; } -static inline bool is_ttbr0_addr(unsigned long addr) -{ - /* entry assembly clears tags for TTBR0 addrs */ - return addr < TASK_SIZE; -} - -static inline bool is_ttbr1_addr(unsigned long addr) -{ - /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ - return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; -} - #ifdef CONFIG_COMPAT static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) @@ -244,13 +224,23 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate |= PSR_AA32_E_BIT; #endif - if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) - set_compat_ssbs_bit(regs); - + spectre_v4_enable_task_mitigation(current); regs->compat_sp = sp; } #endif +static inline bool is_ttbr0_addr(unsigned long addr) +{ + /* entry assembly clears tags for TTBR0 addrs */ + return addr < TASK_SIZE; +} + +static inline bool is_ttbr1_addr(unsigned long addr) +{ + /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ + return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; +} + /* Forward declaration, a strange C thing */ struct task_struct; @@ -315,10 +305,10 @@ extern void __init minsigstksz_setup(void); #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI /* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */ -long set_tagged_addr_ctrl(unsigned long arg); -long get_tagged_addr_ctrl(void); -#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(arg) -#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl() +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg); +long get_tagged_addr_ctrl(struct task_struct *task); +#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(current, arg) +#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current) #endif /* diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h new file mode 100644 index 000000000000..fcdfbce302bd --- /dev/null +++ b/arch/arm64/include/asm/spectre.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Interface for managing mitigations for Spectre vulnerabilities. + * + * Copyright (C) 2020 Google LLC + * Author: Will Deacon <will@kernel.org> + */ + +#ifndef __ASM_SPECTRE_H +#define __ASM_SPECTRE_H + +#include <asm/cpufeature.h> + +/* Watch out, ordering is important here. */ +enum mitigation_state { + SPECTRE_UNAFFECTED, + SPECTRE_MITIGATED, + SPECTRE_VULNERABLE, +}; + +struct task_struct; + +enum mitigation_state arm64_get_spectre_v2_state(void); +bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); +void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); + +enum mitigation_state arm64_get_spectre_v4_state(void); +bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); +void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused); +void spectre_v4_enable_task_mitigation(struct task_struct *tsk); + +#endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index fc7613023c19..eb29b1fe8255 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -63,7 +63,7 @@ struct stackframe { extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data); + bool (*fn)(void *, unsigned long), void *data); extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, const char *loglvl); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 554a7e8ecb07..d52c1b3ce589 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -91,10 +91,12 @@ #define PSTATE_PAN pstate_field(0, 4) #define PSTATE_UAO pstate_field(0, 3) #define PSTATE_SSBS pstate_field(3, 1) +#define PSTATE_TCO pstate_field(3, 4) #define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) #define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) #define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_TCO(x) __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) << PSTATE_Imm_shift)) #define __SYS_BARRIER_INSN(CRm, op2, Rt) \ __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) @@ -181,6 +183,8 @@ #define SYS_SCTLR_EL1 sys_reg(3, 0, 1, 0, 0) #define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) #define SYS_CPACR_EL1 sys_reg(3, 0, 1, 0, 2) +#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) +#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) #define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) @@ -218,6 +222,8 @@ #define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3) #define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0) #define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1) +#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0) +#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1) #define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0) #define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) @@ -321,6 +327,8 @@ #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) +#define SYS_PMMIR_EL1 sys_reg(3, 0, 9, 14, 6) + #define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) #define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) @@ -368,6 +376,7 @@ #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) +#define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) #define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) @@ -460,6 +469,7 @@ #define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0) #define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3) #define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) +#define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0) #define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0) #define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) @@ -516,6 +526,7 @@ #define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) #define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) #define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) +#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) #define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) @@ -531,6 +542,15 @@ /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_DSSBS (BIT(44)) +#define SCTLR_ELx_ATA (BIT(43)) + +#define SCTLR_ELx_TCF_SHIFT 40 +#define SCTLR_ELx_TCF_NONE (UL(0x0) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_SYNC (UL(0x1) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT) + +#define SCTLR_ELx_ITFSB (BIT(37)) #define SCTLR_ELx_ENIA (BIT(31)) #define SCTLR_ELx_ENIB (BIT(30)) #define SCTLR_ELx_ENDA (BIT(27)) @@ -559,6 +579,14 @@ #endif /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_ATA0 (BIT(42)) + +#define SCTLR_EL1_TCF0_SHIFT 38 +#define SCTLR_EL1_TCF0_NONE (UL(0x0) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_SYNC (UL(0x1) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) + #define SCTLR_EL1_BT1 (BIT(36)) #define SCTLR_EL1_BT0 (BIT(35)) #define SCTLR_EL1_UCI (BIT(26)) @@ -587,6 +615,7 @@ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ SCTLR_EL1_DZE | SCTLR_EL1_UCT |\ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ + SCTLR_ELx_ITFSB| SCTLR_ELx_ATA | SCTLR_EL1_ATA0 |\ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) /* MAIR_ELx memory attributes (used by Linux) */ @@ -595,6 +624,7 @@ #define MAIR_ATTR_DEVICE_GRE UL(0x0c) #define MAIR_ATTR_NORMAL_NC UL(0x44) #define MAIR_ATTR_NORMAL_WT UL(0xbb) +#define MAIR_ATTR_NORMAL_TAGGED UL(0xf0) #define MAIR_ATTR_NORMAL UL(0xff) #define MAIR_ATTR_MASK UL(0xff) @@ -636,14 +666,22 @@ #define ID_AA64ISAR1_APA_SHIFT 4 #define ID_AA64ISAR1_DPB_SHIFT 0 -#define ID_AA64ISAR1_APA_NI 0x0 -#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_API_NI 0x0 -#define ID_AA64ISAR1_API_IMP_DEF 0x1 -#define ID_AA64ISAR1_GPA_NI 0x0 -#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 -#define ID_AA64ISAR1_GPI_NI 0x0 -#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 +#define ID_AA64ISAR1_APA_NI 0x0 +#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_APA_ARCH_EPAC 0x2 +#define ID_AA64ISAR1_APA_ARCH_EPAC2 0x3 +#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC 0x4 +#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC_CMB 0x5 +#define ID_AA64ISAR1_API_NI 0x0 +#define ID_AA64ISAR1_API_IMP_DEF 0x1 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC 0x2 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2 0x3 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC 0x4 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC_CMB 0x5 +#define ID_AA64ISAR1_GPA_NI 0x0 +#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_GPI_NI 0x0 +#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 @@ -686,6 +724,10 @@ #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 #define ID_AA64PFR1_BT_BTI 0x1 +#define ID_AA64PFR1_MTE_NI 0x0 +#define ID_AA64PFR1_MTE_EL0 0x1 +#define ID_AA64PFR1_MTE 0x2 + /* id_aa64zfr0 */ #define ID_AA64ZFR0_F64MM_SHIFT 56 #define ID_AA64ZFR0_F32MM_SHIFT 52 @@ -920,6 +962,28 @@ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ #define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) +/* TCR EL1 Bit Definitions */ +#define SYS_TCR_EL1_TCMA1 (BIT(58)) +#define SYS_TCR_EL1_TCMA0 (BIT(57)) + +/* GCR_EL1 Definitions */ +#define SYS_GCR_EL1_RRND (BIT(16)) +#define SYS_GCR_EL1_EXCL_MASK 0xffffUL + +/* RGSR_EL1 Definitions */ +#define SYS_RGSR_EL1_TAG_MASK 0xfUL +#define SYS_RGSR_EL1_SEED_SHIFT 8 +#define SYS_RGSR_EL1_SEED_MASK 0xffffUL + +/* GMID_EL1 field definitions */ +#define SYS_GMID_EL1_BS_SHIFT 0 +#define SYS_GMID_EL1_BS_SIZE 4 + +/* TFSR{,E0}_EL1 bit definitions */ +#define SYS_TFSR_EL1_TF0_SHIFT 0 +#define SYS_TFSR_EL1_TF1_SHIFT 1 +#define SYS_TFSR_EL1_TF0 (UL(1) << SYS_TFSR_EL1_TF0_SHIFT) +#define SYS_TFSR_EL1_TF1 (UK(2) << SYS_TFSR_EL1_TF1_SHIFT) /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ #define SYS_MPIDR_SAFE_VAL (BIT(31)) @@ -1024,6 +1088,13 @@ write_sysreg(__scs_new, sysreg); \ } while (0) +#define sysreg_clear_set_s(sysreg, clear, set) do { \ + u64 __scs_val = read_sysreg_s(sysreg); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + write_sysreg_s(__scs_new, sysreg); \ +} while (0) + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 5e784e16ee89..1fbab854a51b 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -67,6 +67,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_FSCHECK 5 /* Check FS is USER_DS on return */ +#define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ @@ -96,10 +97,11 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) +#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ - _TIF_UPROBE | _TIF_FSCHECK) + _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index cee5928e1b7d..d96dc2c7c09d 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -24,7 +24,7 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); -void force_signal_inject(int signal, int code, unsigned long address); +void force_signal_inject(int signal, int code, unsigned long address, unsigned int err); void arm64_notify_segfault(unsigned long addr); void arm64_force_sig_fault(int signo, int code, void __user *addr, const char *str); void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, const char *str); diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 912162f73529..b8f41aa234ee 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -74,6 +74,6 @@ #define HWCAP2_DGH (1 << 15) #define HWCAP2_RNG (1 << 16) #define HWCAP2_BTI (1 << 17) -/* reserved for HWCAP2_MTE (1 << 18) */ +#define HWCAP2_MTE (1 << 18) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index ba85bb23f060..7d804fd0a682 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -242,6 +242,15 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 + +/* + * Only two states can be presented by the host kernel: + * - NOT_REQUIRED: the guest doesn't need to do anything + * - NOT_AVAIL: the guest isn't mitigated (it can still use SSBS if available) + * + * All the other values are deprecated. The host still accepts all + * values (they are ABI), but will narrow them to the above two. + */ #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 diff --git a/arch/arm64/include/uapi/asm/mman.h b/arch/arm64/include/uapi/asm/mman.h index 6fdd71eb644f..1e6482a838e1 100644 --- a/arch/arm64/include/uapi/asm/mman.h +++ b/arch/arm64/include/uapi/asm/mman.h @@ -5,5 +5,6 @@ #include <asm-generic/mman.h> #define PROT_BTI 0x10 /* BTI guarded page */ +#define PROT_MTE 0x20 /* Normal Tagged mapping */ #endif /* ! _UAPI__ASM_MMAN_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 42cbe34d95ce..758ae984ff97 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -51,6 +51,7 @@ #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 #define PSR_DIT_BIT 0x01000000 +#define PSR_TCO_BIT 0x02000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 #define PSR_Z_BIT 0x40000000 @@ -75,6 +76,9 @@ /* syscall emulation path in ptrace */ #define PTRACE_SYSEMU 31 #define PTRACE_SYSEMU_SINGLESTEP 32 +/* MTE allocation tag access */ +#define PTRACE_PEEKMTETAGS 33 +#define PTRACE_POKEMTETAGS 34 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index a561cbb91d4d..bbaf0bc4ad60 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -3,8 +3,6 @@ # Makefile for the linux kernel. # -CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) -AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_armv8_deprecated.o := -I$(src) CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) @@ -19,7 +17,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ - syscall.o + syscall.o proton-pack.o targets += efi-entry.o @@ -59,9 +57,9 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o -obj-$(CONFIG_ARM64_SSBD) += ssbd.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o +obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso/ probes/ obj-$(CONFIG_COMPAT_VDSO) += vdso32/ diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 4a18055b2ff9..37721eb6f9a1 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -35,6 +35,10 @@ SYM_CODE_START(__cpu_soft_restart) mov_q x13, SCTLR_ELx_FLAGS bic x12, x12, x13 pre_disable_mmu_workaround + /* + * either disable EL1&0 translation regime or disable EL2&0 translation + * regime if HCR_EL2.E2H == 1 + */ msr sctlr_el1, x12 isb diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 560ba69e13c1..24d75af344b1 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -106,365 +106,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } -atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); - -#include <asm/mmu_context.h> -#include <asm/cacheflush.h> - -DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); - -#ifdef CONFIG_KVM_INDIRECT_VECTORS -static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K); - int i; - - for (i = 0; i < SZ_2K; i += 0x80) - memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); - - __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); -} - -static void install_bp_hardening_cb(bp_hardening_cb_t fn, - const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - static DEFINE_RAW_SPINLOCK(bp_lock); - int cpu, slot = -1; - - /* - * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if - * we're a guest. Skip the hyp-vectors work. - */ - if (!hyp_vecs_start) { - __this_cpu_write(bp_hardening_data.fn, fn); - return; - } - - raw_spin_lock(&bp_lock); - for_each_possible_cpu(cpu) { - if (per_cpu(bp_hardening_data.fn, cpu) == fn) { - slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); - break; - } - } - - if (slot == -1) { - slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); - __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); - } - - __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); - __this_cpu_write(bp_hardening_data.fn, fn); - raw_spin_unlock(&bp_lock); -} -#else -static void install_bp_hardening_cb(bp_hardening_cb_t fn, - const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - __this_cpu_write(bp_hardening_data.fn, fn); -} -#endif /* CONFIG_KVM_INDIRECT_VECTORS */ - -#include <linux/arm-smccc.h> - -static void __maybe_unused call_smc_arch_workaround_1(void) -{ - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); -} - -static void call_hvc_arch_workaround_1(void) -{ - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); -} - -static void qcom_link_stack_sanitization(void) -{ - u64 tmp; - - asm volatile("mov %0, x30 \n" - ".rept 16 \n" - "bl . + 4 \n" - ".endr \n" - "mov x30, %0 \n" - : "=&r" (tmp)); -} - -static bool __nospectre_v2; -static int __init parse_nospectre_v2(char *str) -{ - __nospectre_v2 = true; - return 0; -} -early_param("nospectre_v2", parse_nospectre_v2); - -/* - * -1: No workaround - * 0: No workaround required - * 1: Workaround installed - */ -static int detect_harden_bp_fw(void) -{ - bp_hardening_cb_t cb; - void *smccc_start, *smccc_end; - struct arm_smccc_res res; - u32 midr = read_cpuid_id(); - - arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - - switch ((int)res.a0) { - case 1: - /* Firmware says we're just fine */ - return 0; - case 0: - break; - default: - return -1; - } - - switch (arm_smccc_1_1_get_conduit()) { - case SMCCC_CONDUIT_HVC: - cb = call_hvc_arch_workaround_1; - /* This is a guest, no need to patch KVM vectors */ - smccc_start = NULL; - smccc_end = NULL; - break; - -#if IS_ENABLED(CONFIG_KVM) - case SMCCC_CONDUIT_SMC: - cb = call_smc_arch_workaround_1; - smccc_start = __smccc_workaround_1_smc; - smccc_end = __smccc_workaround_1_smc + - __SMCCC_WORKAROUND_1_SMC_SZ; - break; -#endif - - default: - return -1; - } - - if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || - ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) - cb = qcom_link_stack_sanitization; - - if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) - install_bp_hardening_cb(cb, smccc_start, smccc_end); - - return 1; -} - -DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); - -int ssbd_state __read_mostly = ARM64_SSBD_KERNEL; -static bool __ssb_safe = true; - -static const struct ssbd_options { - const char *str; - int state; -} ssbd_options[] = { - { "force-on", ARM64_SSBD_FORCE_ENABLE, }, - { "force-off", ARM64_SSBD_FORCE_DISABLE, }, - { "kernel", ARM64_SSBD_KERNEL, }, -}; - -static int __init ssbd_cfg(char *buf) -{ - int i; - - if (!buf || !buf[0]) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(ssbd_options); i++) { - int len = strlen(ssbd_options[i].str); - - if (strncmp(buf, ssbd_options[i].str, len)) - continue; - - ssbd_state = ssbd_options[i].state; - return 0; - } - - return -EINVAL; -} -early_param("ssbd", ssbd_cfg); - -void __init arm64_update_smccc_conduit(struct alt_instr *alt, - __le32 *origptr, __le32 *updptr, - int nr_inst) -{ - u32 insn; - - BUG_ON(nr_inst != 1); - - switch (arm_smccc_1_1_get_conduit()) { - case SMCCC_CONDUIT_HVC: - insn = aarch64_insn_get_hvc_value(); - break; - case SMCCC_CONDUIT_SMC: - insn = aarch64_insn_get_smc_value(); - break; - default: - return; - } - - *updptr = cpu_to_le32(insn); -} - -void __init arm64_enable_wa2_handling(struct alt_instr *alt, - __le32 *origptr, __le32 *updptr, - int nr_inst) -{ - BUG_ON(nr_inst != 1); - /* - * Only allow mitigation on EL1 entry/exit and guest - * ARCH_WORKAROUND_2 handling if the SSBD state allows it to - * be flipped. - */ - if (arm64_get_ssbd_state() == ARM64_SSBD_KERNEL) - *updptr = cpu_to_le32(aarch64_insn_gen_nop()); -} - -void arm64_set_ssbd_mitigation(bool state) -{ - int conduit; - - if (!IS_ENABLED(CONFIG_ARM64_SSBD)) { - pr_info_once("SSBD disabled by kernel configuration\n"); - return; - } - - if (this_cpu_has_cap(ARM64_SSBS)) { - if (state) - asm volatile(SET_PSTATE_SSBS(0)); - else - asm volatile(SET_PSTATE_SSBS(1)); - return; - } - - conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state, - NULL); - - WARN_ON_ONCE(conduit == SMCCC_CONDUIT_NONE); -} - -static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, - int scope) -{ - struct arm_smccc_res res; - bool required = true; - s32 val; - bool this_cpu_safe = false; - int conduit; - - WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - - if (cpu_mitigations_off()) - ssbd_state = ARM64_SSBD_FORCE_DISABLE; - - /* delay setting __ssb_safe until we get a firmware response */ - if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list)) - this_cpu_safe = true; - - if (this_cpu_has_cap(ARM64_SSBS)) { - if (!this_cpu_safe) - __ssb_safe = false; - required = false; - goto out_printmsg; - } - - conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_2, &res); - - if (conduit == SMCCC_CONDUIT_NONE) { - ssbd_state = ARM64_SSBD_UNKNOWN; - if (!this_cpu_safe) - __ssb_safe = false; - return false; - } - - val = (s32)res.a0; - - switch (val) { - case SMCCC_RET_NOT_SUPPORTED: - ssbd_state = ARM64_SSBD_UNKNOWN; - if (!this_cpu_safe) - __ssb_safe = false; - return false; - - /* machines with mixed mitigation requirements must not return this */ - case SMCCC_RET_NOT_REQUIRED: - pr_info_once("%s mitigation not required\n", entry->desc); - ssbd_state = ARM64_SSBD_MITIGATED; - return false; - - case SMCCC_RET_SUCCESS: - __ssb_safe = false; - required = true; - break; - - case 1: /* Mitigation not required on this CPU */ - required = false; - break; - - default: - WARN_ON(1); - if (!this_cpu_safe) - __ssb_safe = false; - return false; - } - - switch (ssbd_state) { - case ARM64_SSBD_FORCE_DISABLE: - arm64_set_ssbd_mitigation(false); - required = false; - break; - - case ARM64_SSBD_KERNEL: - if (required) { - __this_cpu_write(arm64_ssbd_callback_required, 1); - arm64_set_ssbd_mitigation(true); - } - break; - - case ARM64_SSBD_FORCE_ENABLE: - arm64_set_ssbd_mitigation(true); - required = true; - break; - - default: - WARN_ON(1); - break; - } - -out_printmsg: - switch (ssbd_state) { - case ARM64_SSBD_FORCE_DISABLE: - pr_info_once("%s disabled from command-line\n", entry->desc); - break; - - case ARM64_SSBD_FORCE_ENABLE: - pr_info_once("%s forced from command-line\n", entry->desc); - break; - } - - return required; -} - -/* known invulnerable cores */ -static const struct midr_range arm64_ssb_cpus[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), - MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), - MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), - MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), - {}, -}; - #ifdef CONFIG_ARM64_ERRATUM_1463225 DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); @@ -519,83 +160,6 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ CAP_MIDR_RANGE_LIST(midr_list) -/* Track overall mitigation state. We are only mitigated if all cores are ok */ -static bool __hardenbp_enab = true; -static bool __spectrev2_safe = true; - -int get_spectre_v2_workaround_state(void) -{ - if (__spectrev2_safe) - return ARM64_BP_HARDEN_NOT_REQUIRED; - - if (!__hardenbp_enab) - return ARM64_BP_HARDEN_UNKNOWN; - - return ARM64_BP_HARDEN_WA_NEEDED; -} - -/* - * List of CPUs that do not need any Spectre-v2 mitigation at all. - */ -static const struct midr_range spectre_v2_safe_list[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), - MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), - MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), - MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), - MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), - { /* sentinel */ } -}; - -/* - * Track overall bp hardening for all heterogeneous cores in the machine. - * We are only considered "safe" if all booted cores are known safe. - */ -static bool __maybe_unused -check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) -{ - int need_wa; - - WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - - /* If the CPU has CSV2 set, we're safe */ - if (cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64PFR0_EL1), - ID_AA64PFR0_CSV2_SHIFT)) - return false; - - /* Alternatively, we have a list of unaffected CPUs */ - if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list)) - return false; - - /* Fallback to firmware detection */ - need_wa = detect_harden_bp_fw(); - if (!need_wa) - return false; - - __spectrev2_safe = false; - - if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) { - pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n"); - __hardenbp_enab = false; - return false; - } - - /* forced off */ - if (__nospectre_v2 || cpu_mitigations_off()) { - pr_info_once("spectrev2 mitigation disabled by command line option\n"); - __hardenbp_enab = false; - return false; - } - - if (need_wa < 0) { - pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n"); - __hardenbp_enab = false; - } - - return (need_wa > 0); -} - static const __maybe_unused struct midr_range tx2_family_cpus[] = { MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), @@ -887,9 +451,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + .desc = "Spectre-v2", + .capability = ARM64_SPECTRE_V2, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .matches = check_branch_predictor, + .matches = has_spectre_v2, + .cpu_enable = spectre_v2_enable_mitigation, }, #ifdef CONFIG_RANDOMIZE_BASE { @@ -899,11 +465,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { - .desc = "Speculative Store Bypass Disable", - .capability = ARM64_SSBD, + .desc = "Spectre-v4", + .capability = ARM64_SPECTRE_V4, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .matches = has_ssbd_mitigation, - .midr_range_list = arm64_ssb_cpus, + .matches = has_spectre_v4, + .cpu_enable = spectre_v4_enable_mitigation, }, #ifdef CONFIG_ARM64_ERRATUM_1418040 { @@ -960,40 +526,3 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { } }; - -ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); -} - -ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, - char *buf) -{ - switch (get_spectre_v2_workaround_state()) { - case ARM64_BP_HARDEN_NOT_REQUIRED: - return sprintf(buf, "Not affected\n"); - case ARM64_BP_HARDEN_WA_NEEDED: - return sprintf(buf, "Mitigation: Branch predictor hardening\n"); - case ARM64_BP_HARDEN_UNKNOWN: - default: - return sprintf(buf, "Vulnerable\n"); - } -} - -ssize_t cpu_show_spec_store_bypass(struct device *dev, - struct device_attribute *attr, char *buf) -{ - if (__ssb_safe) - return sprintf(buf, "Not affected\n"); - - switch (ssbd_state) { - case ARM64_SSBD_KERNEL: - case ARM64_SSBD_FORCE_ENABLE: - if (IS_ENABLED(CONFIG_ARM64_SSBD)) - return sprintf(buf, - "Mitigation: Speculative Store Bypass disabled via prctl\n"); - } - - return sprintf(buf, "Vulnerable\n"); -} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6424584be01e..dcc165b3fc04 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -75,6 +75,7 @@ #include <asm/cpu_ops.h> #include <asm/fpsimd.h> #include <asm/mmu_context.h> +#include <asm/mte.h> #include <asm/processor.h> #include <asm/sysreg.h> #include <asm/traps.h> @@ -197,9 +198,9 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_API_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), - FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -227,7 +228,9 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MTE_SHIFT, 4, ID_AA64PFR1_MTE_NI), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0), ARM64_FTR_END, @@ -487,7 +490,7 @@ static const struct arm64_ftr_bits ftr_id_pfr1[] = { }; static const struct arm64_ftr_bits ftr_id_pfr2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR2_SSBS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_SSBS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_CSV3_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -1111,6 +1114,7 @@ u64 read_sanitised_ftr_reg(u32 id) return 0; return regp->sys_val; } +EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg); #define read_sysreg_case(r) \ case r: return read_sysreg_s(r) @@ -1443,6 +1447,7 @@ static inline void __cpu_enable_hw_dbm(void) write_sysreg(tcr, tcr_el1); isb(); + local_flush_tlb_all(); } static bool cpu_has_broken_dbm(void) @@ -1583,48 +1588,6 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) WARN_ON(val & (7 << 27 | 7 << 21)); } -#ifdef CONFIG_ARM64_SSBD -static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) -{ - if (user_mode(regs)) - return 1; - - if (instr & BIT(PSTATE_Imm_shift)) - regs->pstate |= PSR_SSBS_BIT; - else - regs->pstate &= ~PSR_SSBS_BIT; - - arm64_skip_faulting_instruction(regs, 4); - return 0; -} - -static struct undef_hook ssbs_emulation_hook = { - .instr_mask = ~(1U << PSTATE_Imm_shift), - .instr_val = 0xd500401f | PSTATE_SSBS, - .fn = ssbs_emulation_handler, -}; - -static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) -{ - static bool undef_hook_registered = false; - static DEFINE_RAW_SPINLOCK(hook_lock); - - raw_spin_lock(&hook_lock); - if (!undef_hook_registered) { - register_undef_hook(&ssbs_emulation_hook); - undef_hook_registered = true; - } - raw_spin_unlock(&hook_lock); - - if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { - sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); - arm64_set_ssbd_mitigation(false); - } else { - arm64_set_ssbd_mitigation(true); - } -} -#endif /* CONFIG_ARM64_SSBD */ - #ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { @@ -1648,11 +1611,37 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_PTR_AUTH -static bool has_address_auth(const struct arm64_cpu_capabilities *entry, - int __unused) +static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, int scope) { - return __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_ARCH) || - __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_IMP_DEF); + int boot_val, sec_val; + + /* We don't expect to be called with SCOPE_SYSTEM */ + WARN_ON(scope == SCOPE_SYSTEM); + /* + * The ptr-auth feature levels are not intercompatible with lower + * levels. Hence we must match ptr-auth feature level of the secondary + * CPUs with that of the boot CPU. The level of boot cpu is fetched + * from the sanitised register whereas direct register read is done for + * the secondary CPUs. + * The sanitised feature state is guaranteed to match that of the + * boot CPU as a mismatched secondary CPU is parked before it gets + * a chance to update the state, with the capability. + */ + boot_val = cpuid_feature_extract_field(read_sanitised_ftr_reg(entry->sys_reg), + entry->field_pos, entry->sign); + if (scope & SCOPE_BOOT_CPU) + return boot_val >= entry->min_field_value; + /* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */ + sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg), + entry->field_pos, entry->sign); + return sec_val == boot_val; +} + +static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, + int scope) +{ + return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) || + has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); } static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, @@ -1702,6 +1691,22 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) } #endif /* CONFIG_ARM64_BTI */ +#ifdef CONFIG_ARM64_MTE +static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) +{ + static bool cleared_zero_page = false; + + /* + * Clear the tags in the zero page. This needs to be done via the + * linear map which has the Tagged attribute. + */ + if (!cleared_zero_page) { + cleared_zero_page = true; + mte_clear_page_tags(lm_alias(empty_zero_page)); + } +} +#endif /* CONFIG_ARM64_MTE */ + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -1976,19 +1981,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64ISAR0_CRC32_SHIFT, .min_field_value = 1, }, -#ifdef CONFIG_ARM64_SSBD { .desc = "Speculative Store Bypassing Safe (SSBS)", .capability = ARM64_SSBS, - .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_SSBS_SHIFT, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, - .cpu_enable = cpu_enable_ssbs, }, -#endif #ifdef CONFIG_ARM64_CNP { .desc = "Common not Private translations", @@ -2021,7 +2023,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_APA_SHIFT, .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED, - .matches = has_cpuid_feature, + .matches = has_address_auth_cpucap, }, { .desc = "Address authentication (IMP DEF algorithm)", @@ -2031,12 +2033,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_API_SHIFT, .min_field_value = ID_AA64ISAR1_API_IMP_DEF, - .matches = has_cpuid_feature, + .matches = has_address_auth_cpucap, }, { .capability = ARM64_HAS_ADDRESS_AUTH, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, - .matches = has_address_auth, + .matches = has_address_auth_metacap, }, { .desc = "Generic authentication (architected algorithm)", @@ -2121,6 +2123,19 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, }, #endif +#ifdef CONFIG_ARM64_MTE + { + .desc = "Memory Tagging Extension", + .capability = ARM64_MTE, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_MTE_SHIFT, + .min_field_value = ID_AA64PFR1_MTE, + .sign = FTR_UNSIGNED, + .cpu_enable = cpu_enable_mte, + }, +#endif /* CONFIG_ARM64_MTE */ {}, }; @@ -2237,6 +2252,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), #endif +#ifdef CONFIG_ARM64_MTE + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), +#endif /* CONFIG_ARM64_MTE */ {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index d0076c2159e6..6a7bb3729d60 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -43,94 +43,93 @@ static const char *icache_policy_str[] = { unsigned long __icache_flags; static const char *const hwcap_str[] = { - "fp", - "asimd", - "evtstrm", - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - "atomics", - "fphp", - "asimdhp", - "cpuid", - "asimdrdm", - "jscvt", - "fcma", - "lrcpc", - "dcpop", - "sha3", - "sm3", - "sm4", - "asimddp", - "sha512", - "sve", - "asimdfhm", - "dit", - "uscat", - "ilrcpc", - "flagm", - "ssbs", - "sb", - "paca", - "pacg", - "dcpodp", - "sve2", - "sveaes", - "svepmull", - "svebitperm", - "svesha3", - "svesm4", - "flagm2", - "frint", - "svei8mm", - "svef32mm", - "svef64mm", - "svebf16", - "i8mm", - "bf16", - "dgh", - "rng", - "bti", - /* reserved for "mte" */ - NULL + [KERNEL_HWCAP_FP] = "fp", + [KERNEL_HWCAP_ASIMD] = "asimd", + [KERNEL_HWCAP_EVTSTRM] = "evtstrm", + [KERNEL_HWCAP_AES] = "aes", + [KERNEL_HWCAP_PMULL] = "pmull", + [KERNEL_HWCAP_SHA1] = "sha1", + [KERNEL_HWCAP_SHA2] = "sha2", + [KERNEL_HWCAP_CRC32] = "crc32", + [KERNEL_HWCAP_ATOMICS] = "atomics", + [KERNEL_HWCAP_FPHP] = "fphp", + [KERNEL_HWCAP_ASIMDHP] = "asimdhp", + [KERNEL_HWCAP_CPUID] = "cpuid", + [KERNEL_HWCAP_ASIMDRDM] = "asimdrdm", + [KERNEL_HWCAP_JSCVT] = "jscvt", + [KERNEL_HWCAP_FCMA] = "fcma", + [KERNEL_HWCAP_LRCPC] = "lrcpc", + [KERNEL_HWCAP_DCPOP] = "dcpop", + [KERNEL_HWCAP_SHA3] = "sha3", + [KERNEL_HWCAP_SM3] = "sm3", + [KERNEL_HWCAP_SM4] = "sm4", + [KERNEL_HWCAP_ASIMDDP] = "asimddp", + [KERNEL_HWCAP_SHA512] = "sha512", + [KERNEL_HWCAP_SVE] = "sve", + [KERNEL_HWCAP_ASIMDFHM] = "asimdfhm", + [KERNEL_HWCAP_DIT] = "dit", + [KERNEL_HWCAP_USCAT] = "uscat", + [KERNEL_HWCAP_ILRCPC] = "ilrcpc", + [KERNEL_HWCAP_FLAGM] = "flagm", + [KERNEL_HWCAP_SSBS] = "ssbs", + [KERNEL_HWCAP_SB] = "sb", + [KERNEL_HWCAP_PACA] = "paca", + [KERNEL_HWCAP_PACG] = "pacg", + [KERNEL_HWCAP_DCPODP] = "dcpodp", + [KERNEL_HWCAP_SVE2] = "sve2", + [KERNEL_HWCAP_SVEAES] = "sveaes", + [KERNEL_HWCAP_SVEPMULL] = "svepmull", + [KERNEL_HWCAP_SVEBITPERM] = "svebitperm", + [KERNEL_HWCAP_SVESHA3] = "svesha3", + [KERNEL_HWCAP_SVESM4] = "svesm4", + [KERNEL_HWCAP_FLAGM2] = "flagm2", + [KERNEL_HWCAP_FRINT] = "frint", + [KERNEL_HWCAP_SVEI8MM] = "svei8mm", + [KERNEL_HWCAP_SVEF32MM] = "svef32mm", + [KERNEL_HWCAP_SVEF64MM] = "svef64mm", + [KERNEL_HWCAP_SVEBF16] = "svebf16", + [KERNEL_HWCAP_I8MM] = "i8mm", + [KERNEL_HWCAP_BF16] = "bf16", + [KERNEL_HWCAP_DGH] = "dgh", + [KERNEL_HWCAP_RNG] = "rng", + [KERNEL_HWCAP_BTI] = "bti", + [KERNEL_HWCAP_MTE] = "mte", }; #ifdef CONFIG_COMPAT +#define COMPAT_KERNEL_HWCAP(x) const_ilog2(COMPAT_HWCAP_ ## x) static const char *const compat_hwcap_str[] = { - "swp", - "half", - "thumb", - "26bit", - "fastmult", - "fpa", - "vfp", - "edsp", - "java", - "iwmmxt", - "crunch", - "thumbee", - "neon", - "vfpv3", - "vfpv3d16", - "tls", - "vfpv4", - "idiva", - "idivt", - "vfpd32", - "lpae", - "evtstrm", - NULL + [COMPAT_KERNEL_HWCAP(SWP)] = "swp", + [COMPAT_KERNEL_HWCAP(HALF)] = "half", + [COMPAT_KERNEL_HWCAP(THUMB)] = "thumb", + [COMPAT_KERNEL_HWCAP(26BIT)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(FAST_MULT)] = "fastmult", + [COMPAT_KERNEL_HWCAP(FPA)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(VFP)] = "vfp", + [COMPAT_KERNEL_HWCAP(EDSP)] = "edsp", + [COMPAT_KERNEL_HWCAP(JAVA)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(IWMMXT)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(CRUNCH)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(THUMBEE)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(NEON)] = "neon", + [COMPAT_KERNEL_HWCAP(VFPv3)] = "vfpv3", + [COMPAT_KERNEL_HWCAP(VFPV3D16)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(TLS)] = "tls", + [COMPAT_KERNEL_HWCAP(VFPv4)] = "vfpv4", + [COMPAT_KERNEL_HWCAP(IDIVA)] = "idiva", + [COMPAT_KERNEL_HWCAP(IDIVT)] = "idivt", + [COMPAT_KERNEL_HWCAP(VFPD32)] = NULL, /* Not possible on arm64 */ + [COMPAT_KERNEL_HWCAP(LPAE)] = "lpae", + [COMPAT_KERNEL_HWCAP(EVTSTRM)] = "evtstrm", }; +#define COMPAT_KERNEL_HWCAP2(x) const_ilog2(COMPAT_HWCAP2_ ## x) static const char *const compat_hwcap2_str[] = { - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - NULL + [COMPAT_KERNEL_HWCAP2(AES)] = "aes", + [COMPAT_KERNEL_HWCAP2(PMULL)] = "pmull", + [COMPAT_KERNEL_HWCAP2(SHA1)] = "sha1", + [COMPAT_KERNEL_HWCAP2(SHA2)] = "sha2", + [COMPAT_KERNEL_HWCAP2(CRC32)] = "crc32", }; #endif /* CONFIG_COMPAT */ @@ -166,16 +165,25 @@ static int c_show(struct seq_file *m, void *v) seq_puts(m, "Features\t:"); if (compat) { #ifdef CONFIG_COMPAT - for (j = 0; compat_hwcap_str[j]; j++) - if (compat_elf_hwcap & (1 << j)) + for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) { + if (compat_elf_hwcap & (1 << j)) { + /* + * Warn once if any feature should not + * have been present on arm64 platform. + */ + if (WARN_ON_ONCE(!compat_hwcap_str[j])) + continue; + seq_printf(m, " %s", compat_hwcap_str[j]); + } + } - for (j = 0; compat_hwcap2_str[j]; j++) + for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++) if (compat_elf_hwcap2 & (1 << j)) seq_printf(m, " %s", compat_hwcap2_str[j]); #endif /* CONFIG_COMPAT */ } else { - for (j = 0; hwcap_str[j]; j++) + for (j = 0; j < ARRAY_SIZE(hwcap_str); j++) if (cpu_have_feature(j)) seq_printf(m, " %s", hwcap_str[j]); } diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 7310a4f7f993..fa76151de6ff 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -384,7 +384,7 @@ void __init debug_traps_init(void) hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_TRACE, "single-step handler"); hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, - TRAP_BRKPT, "ptrace BRK handler"); + TRAP_BRKPT, "BRK handler"); } /* Re-enable single step for syscall restarting. */ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index d3be9dbf5490..43d4c329775f 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -66,6 +66,13 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) } NOKPROBE_SYMBOL(el1_dbg); +static void notrace el1_fpac(struct pt_regs *regs, unsigned long esr) +{ + local_daif_inherit(regs); + do_ptrauth_fault(regs, esr); +} +NOKPROBE_SYMBOL(el1_fpac); + asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -92,6 +99,9 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BRK64: el1_dbg(regs, esr); break; + case ESR_ELx_EC_FPAC: + el1_fpac(regs, esr); + break; default: el1_inv(regs, esr); } @@ -227,6 +237,14 @@ static void notrace el0_svc(struct pt_regs *regs) } NOKPROBE_SYMBOL(el0_svc); +static void notrace el0_fpac(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_ptrauth_fault(regs, esr); +} +NOKPROBE_SYMBOL(el0_fpac); + asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -272,6 +290,9 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BRK64: el0_dbg(regs, esr); break; + case ESR_ELx_EC_FPAC: + el0_fpac(regs, esr); + break; default: el0_inv(regs, esr); } diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index f880dd63ddc3..2ca395c25448 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -32,6 +32,7 @@ SYM_FUNC_START(fpsimd_load_state) SYM_FUNC_END(fpsimd_load_state) #ifdef CONFIG_ARM64_SVE + SYM_FUNC_START(sve_save_state) sve_save 0, x1, 2 ret @@ -46,4 +47,28 @@ SYM_FUNC_START(sve_get_vl) _sve_rdvl 0, 1 ret SYM_FUNC_END(sve_get_vl) + +/* + * Load SVE state from FPSIMD state. + * + * x0 = pointer to struct fpsimd_state + * x1 = VQ - 1 + * + * Each SVE vector will be loaded with the first 128-bits taken from FPSIMD + * and the rest zeroed. All the other SVE registers will be zeroed. + */ +SYM_FUNC_START(sve_load_from_fpsimd_state) + sve_load_vq x1, x2, x3 + fpsimd_restore x0, 8 + _for n, 0, 15, _sve_pfalse \n + _sve_wrffr 0 + ret +SYM_FUNC_END(sve_load_from_fpsimd_state) + +/* Zero all SVE registers but the first 128-bits of each vector */ +SYM_FUNC_START(sve_flush_live) + sve_flush + ret +SYM_FUNC_END(sve_flush_live) + #endif /* CONFIG_ARM64_SVE */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 55af8b504b65..f30007dff35f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -132,9 +132,8 @@ alternative_else_nop_endif * them if required. */ .macro apply_ssbd, state, tmp1, tmp2 -#ifdef CONFIG_ARM64_SSBD -alternative_cb arm64_enable_wa2_handling - b .L__asm_ssbd_skip\@ +alternative_cb spectre_v4_patch_fw_mitigation_enable + b .L__asm_ssbd_skip\@ // Patched to NOP alternative_cb_end ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 cbz \tmp2, .L__asm_ssbd_skip\@ @@ -142,10 +141,35 @@ alternative_cb_end tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 mov w1, #\state -alternative_cb arm64_update_smccc_conduit +alternative_cb spectre_v4_patch_fw_mitigation_conduit nop // Patched to SMC/HVC #0 alternative_cb_end .L__asm_ssbd_skip\@: + .endm + + /* Check for MTE asynchronous tag check faults */ + .macro check_mte_async_tcf, flgs, tmp +#ifdef CONFIG_ARM64_MTE +alternative_if_not ARM64_MTE + b 1f +alternative_else_nop_endif + mrs_s \tmp, SYS_TFSRE0_EL1 + tbz \tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f + /* Asynchronous TCF occurred for TTBR0 access, set the TI flag */ + orr \flgs, \flgs, #_TIF_MTE_ASYNC_FAULT + str \flgs, [tsk, #TSK_TI_FLAGS] + msr_s SYS_TFSRE0_EL1, xzr +1: +#endif + .endm + + /* Clear the MTE asynchronous tag check faults */ + .macro clear_mte_async_tcf +#ifdef CONFIG_ARM64_MTE +alternative_if ARM64_MTE + dsb ish + msr_s SYS_TFSRE0_EL1, xzr +alternative_else_nop_endif #endif .endm @@ -182,6 +206,8 @@ alternative_cb_end ldr x19, [tsk, #TSK_TI_FLAGS] disable_step_tsk x19, x20 + /* Check for asynchronous tag check faults in user space */ + check_mte_async_tcf x19, x22 apply_ssbd 1, x22, x23 ptrauth_keys_install_kernel tsk, x20, x22, x23 @@ -233,6 +259,13 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING str x20, [sp, #S_PMR_SAVE] alternative_else_nop_endif + /* Re-enable tag checking (TCO set on exception entry) */ +#ifdef CONFIG_ARM64_MTE +alternative_if ARM64_MTE + SET_PSTATE_TCO(0) +alternative_else_nop_endif +#endif + /* * Registers that may be useful after this macro is invoked: * @@ -697,11 +730,9 @@ el0_irq_naked: bl trace_hardirqs_off #endif -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR tbz x22, #55, 1f bl do_el0_irq_bp_hardening 1: -#endif irq_handler #ifdef CONFIG_TRACE_IRQFLAGS @@ -744,6 +775,8 @@ SYM_CODE_START_LOCAL(ret_to_user) and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: + /* Ignore asynchronous tag check faults in the uaccess routines */ + clear_mte_async_tcf enable_step_tsk x1, x2 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK bl stackleak_erase diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 55c8f3ec6705..a6d688c10745 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -32,9 +32,11 @@ #include <linux/swab.h> #include <asm/esr.h> +#include <asm/exception.h> #include <asm/fpsimd.h> #include <asm/cpufeature.h> #include <asm/cputype.h> +#include <asm/neon.h> #include <asm/processor.h> #include <asm/simd.h> #include <asm/sigcontext.h> @@ -312,7 +314,7 @@ static void fpsimd_save(void) * re-enter user with corrupt state. * There's no way to recover, so kill it: */ - force_signal_inject(SIGKILL, SI_KERNEL, 0); + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); return; } @@ -928,7 +930,7 @@ void fpsimd_release_task(struct task_struct *dead_task) * the SVE access trap will be disabled the next time this task * reaches ret_to_user. * - * TIF_SVE should be clear on entry: otherwise, task_fpsimd_load() + * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state() * would have disabled the SVE access trap for userspace during * ret_to_user, making an SVE access trap impossible in that case. */ @@ -936,7 +938,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) { /* Even if we chose not to use SVE, the hardware could still trap: */ if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) { - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); return; } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 037421c66b14..d8d9caf02834 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -36,14 +36,10 @@ #include "efi-header.S" -#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) +#define __PHYS_OFFSET KERNEL_START -#if (TEXT_OFFSET & 0xfff) != 0 -#error TEXT_OFFSET must be at least 4KB aligned -#elif (PAGE_OFFSET & 0x1fffff) != 0 +#if (PAGE_OFFSET & 0x1fffff) != 0 #error PAGE_OFFSET must be at least 2MB aligned -#elif TEXT_OFFSET > 0x1fffff -#error TEXT_OFFSET must be less than 2MB #endif /* @@ -55,7 +51,7 @@ * x0 = physical address to the FDT blob. * * This code is mostly position independent so you call this at - * __pa(PAGE_OFFSET + TEXT_OFFSET). + * __pa(PAGE_OFFSET). * * Note that the callee-saved registers are used for storing variables * that are useful before the MMU is enabled. The allocations are described @@ -77,7 +73,7 @@ _head: b primary_entry // branch to kernel start, magic .long 0 // reserved #endif - le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian + .quad 0 // Image load offset from start of RAM, little-endian le64sym _kernel_size_le // Effective size of kernel image, little-endian le64sym _kernel_flags_le // Informative flags, little-endian .quad 0 // reserved @@ -382,7 +378,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) * Map the kernel image (starting with PHYS_OFFSET). */ adrp x0, init_pg_dir - mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) + mov_q x5, KIMAGE_VADDR // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD adrp x6, _end // runtime __pa(_end) @@ -474,7 +470,7 @@ SYM_FUNC_END(__primary_switched) .pushsection ".rodata", "a" SYM_DATA_START(kimage_vaddr) - .quad _text - TEXT_OFFSET + .quad _text SYM_DATA_END(kimage_vaddr) EXPORT_SYMBOL(kimage_vaddr) .popsection diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 68e14152d6e9..42003774d261 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -21,7 +21,6 @@ #include <linux/sched.h> #include <linux/suspend.h> #include <linux/utsname.h> -#include <linux/version.h> #include <asm/barrier.h> #include <asm/cacheflush.h> @@ -31,6 +30,7 @@ #include <asm/kexec.h> #include <asm/memory.h> #include <asm/mmu_context.h> +#include <asm/mte.h> #include <asm/pgalloc.h> #include <asm/pgtable-hwdef.h> #include <asm/sections.h> @@ -285,6 +285,117 @@ static int create_safe_exec_page(void *src_start, size_t length, #define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) +#ifdef CONFIG_ARM64_MTE + +static DEFINE_XARRAY(mte_pages); + +static int save_tags(struct page *page, unsigned long pfn) +{ + void *tag_storage, *ret; + + tag_storage = mte_allocate_tag_storage(); + if (!tag_storage) + return -ENOMEM; + + mte_save_page_tags(page_address(page), tag_storage); + + ret = xa_store(&mte_pages, pfn, tag_storage, GFP_KERNEL); + if (WARN(xa_is_err(ret), "Failed to store MTE tags")) { + mte_free_tag_storage(tag_storage); + return xa_err(ret); + } else if (WARN(ret, "swsusp: %s: Duplicate entry", __func__)) { + mte_free_tag_storage(ret); + } + + return 0; +} + +static void swsusp_mte_free_storage(void) +{ + XA_STATE(xa_state, &mte_pages, 0); + void *tags; + + xa_lock(&mte_pages); + xas_for_each(&xa_state, tags, ULONG_MAX) { + mte_free_tag_storage(tags); + } + xa_unlock(&mte_pages); + + xa_destroy(&mte_pages); +} + +static int swsusp_mte_save_tags(void) +{ + struct zone *zone; + unsigned long pfn, max_zone_pfn; + int ret = 0; + int n = 0; + + if (!system_supports_mte()) + return 0; + + for_each_populated_zone(zone) { + max_zone_pfn = zone_end_pfn(zone); + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) { + struct page *page = pfn_to_online_page(pfn); + + if (!page) + continue; + + if (!test_bit(PG_mte_tagged, &page->flags)) + continue; + + ret = save_tags(page, pfn); + if (ret) { + swsusp_mte_free_storage(); + goto out; + } + + n++; + } + } + pr_info("Saved %d MTE pages\n", n); + +out: + return ret; +} + +static void swsusp_mte_restore_tags(void) +{ + XA_STATE(xa_state, &mte_pages, 0); + int n = 0; + void *tags; + + xa_lock(&mte_pages); + xas_for_each(&xa_state, tags, ULONG_MAX) { + unsigned long pfn = xa_state.xa_index; + struct page *page = pfn_to_online_page(pfn); + + mte_restore_page_tags(page_address(page), tags); + + mte_free_tag_storage(tags); + n++; + } + xa_unlock(&mte_pages); + + pr_info("Restored %d MTE pages\n", n); + + xa_destroy(&mte_pages); +} + +#else /* CONFIG_ARM64_MTE */ + +static int swsusp_mte_save_tags(void) +{ + return 0; +} + +static void swsusp_mte_restore_tags(void) +{ +} + +#endif /* CONFIG_ARM64_MTE */ + int swsusp_arch_suspend(void) { int ret = 0; @@ -302,6 +413,10 @@ int swsusp_arch_suspend(void) /* make the crash dump kernel image visible/saveable */ crash_prepare_suspend(); + ret = swsusp_mte_save_tags(); + if (ret) + return ret; + sleep_cpu = smp_processor_id(); ret = swsusp_save(); } else { @@ -315,6 +430,8 @@ int swsusp_arch_suspend(void) dcache_clean_range(__hyp_text_start, __hyp_text_end); } + swsusp_mte_restore_tags(); + /* make the crash dump kernel image protected again */ crash_post_resume(); @@ -332,11 +449,7 @@ int swsusp_arch_suspend(void) * mitigation off behind our back, let's set the state * to what we expect it to be. */ - switch (arm64_get_ssbd_state()) { - case ARM64_SSBD_FORCE_ENABLE: - case ARM64_SSBD_KERNEL: - arm64_set_ssbd_mitigation(true); - } + spectre_v4_enable_mitigation(NULL); } local_daif_restore(flags); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 8982b68289b7..d0f3f35dd0d7 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -64,12 +64,10 @@ __efistub__ctype = _ctype; #define KVM_NVHE_ALIAS(sym) __kvm_nvhe_##sym = sym; /* Alternative callbacks for init-time patching of nVHE hyp code. */ -KVM_NVHE_ALIAS(arm64_enable_wa2_handling); KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); /* Global kernel state accessed by nVHE hyp code. */ -KVM_NVHE_ALIAS(arm64_ssbd_callback_required); KVM_NVHE_ALIAS(kvm_host_data); KVM_NVHE_ALIAS(kvm_vgic_global_state); diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index c7d38c660372..7bc3ba897901 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -62,7 +62,6 @@ */ #define HEAD_SYMBOLS \ DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \ - DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); #endif /* __ARM64_KERNEL_IMAGE_H */ diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index a107375005bc..6c0de2f60ea9 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -60,16 +60,10 @@ bool __kprobes aarch64_insn_is_steppable_hint(u32 insn) case AARCH64_INSN_HINT_XPACLRI: case AARCH64_INSN_HINT_PACIA_1716: case AARCH64_INSN_HINT_PACIB_1716: - case AARCH64_INSN_HINT_AUTIA_1716: - case AARCH64_INSN_HINT_AUTIB_1716: case AARCH64_INSN_HINT_PACIAZ: case AARCH64_INSN_HINT_PACIASP: case AARCH64_INSN_HINT_PACIBZ: case AARCH64_INSN_HINT_PACIBSP: - case AARCH64_INSN_HINT_AUTIAZ: - case AARCH64_INSN_HINT_AUTIASP: - case AARCH64_INSN_HINT_AUTIBZ: - case AARCH64_INSN_HINT_AUTIBSP: case AARCH64_INSN_HINT_BTI: case AARCH64_INSN_HINT_BTIC: case AARCH64_INSN_HINT_BTIJ: @@ -176,7 +170,7 @@ bool __kprobes aarch64_insn_uses_literal(u32 insn) bool __kprobes aarch64_insn_is_branch(u32 insn) { - /* b, bl, cb*, tb*, b.cond, br, blr */ + /* b, bl, cb*, tb*, ret*, b.cond, br*, blr* */ return aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn) || @@ -185,8 +179,11 @@ bool __kprobes aarch64_insn_is_branch(u32 insn) aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn) || aarch64_insn_is_ret(insn) || + aarch64_insn_is_ret_auth(insn) || aarch64_insn_is_br(insn) || + aarch64_insn_is_br_auth(insn) || aarch64_insn_is_blr(insn) || + aarch64_insn_is_blr_auth(insn) || aarch64_insn_is_bcond(insn); } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c new file mode 100644 index 000000000000..52a0638ed967 --- /dev/null +++ b/arch/arm64/kernel/mte.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 ARM Ltd. + */ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/prctl.h> +#include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/string.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/thread_info.h> +#include <linux/uio.h> + +#include <asm/cpufeature.h> +#include <asm/mte.h> +#include <asm/ptrace.h> +#include <asm/sysreg.h> + +static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) +{ + pte_t old_pte = READ_ONCE(*ptep); + + if (check_swap && is_swap_pte(old_pte)) { + swp_entry_t entry = pte_to_swp_entry(old_pte); + + if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) + return; + } + + mte_clear_page_tags(page_address(page)); +} + +void mte_sync_tags(pte_t *ptep, pte_t pte) +{ + struct page *page = pte_page(pte); + long i, nr_pages = compound_nr(page); + bool check_swap = nr_pages == 1; + + /* if PG_mte_tagged is set, tags have already been initialised */ + for (i = 0; i < nr_pages; i++, page++) { + if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + mte_sync_page_tags(page, ptep, check_swap); + } +} + +int memcmp_pages(struct page *page1, struct page *page2) +{ + char *addr1, *addr2; + int ret; + + addr1 = page_address(page1); + addr2 = page_address(page2); + ret = memcmp(addr1, addr2, PAGE_SIZE); + + if (!system_supports_mte() || ret) + return ret; + + /* + * If the page content is identical but at least one of the pages is + * tagged, return non-zero to avoid KSM merging. If only one of the + * pages is tagged, set_pte_at() may zero or change the tags of the + * other page via mte_sync_tags(). + */ + if (test_bit(PG_mte_tagged, &page1->flags) || + test_bit(PG_mte_tagged, &page2->flags)) + return addr1 != addr2; + + return ret; +} + +static void update_sctlr_el1_tcf0(u64 tcf0) +{ + /* ISB required for the kernel uaccess routines */ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0); + isb(); +} + +static void set_sctlr_el1_tcf0(u64 tcf0) +{ + /* + * mte_thread_switch() checks current->thread.sctlr_tcf0 as an + * optimisation. Disable preemption so that it does not see + * the variable update before the SCTLR_EL1.TCF0 one. + */ + preempt_disable(); + current->thread.sctlr_tcf0 = tcf0; + update_sctlr_el1_tcf0(tcf0); + preempt_enable(); +} + +static void update_gcr_el1_excl(u64 incl) +{ + u64 excl = ~incl & SYS_GCR_EL1_EXCL_MASK; + + /* + * Note that 'incl' is an include mask (controlled by the user via + * prctl()) while GCR_EL1 accepts an exclude mask. + * No need for ISB since this only affects EL0 currently, implicit + * with ERET. + */ + sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl); +} + +static void set_gcr_el1_excl(u64 incl) +{ + current->thread.gcr_user_incl = incl; + update_gcr_el1_excl(incl); +} + +void flush_mte_state(void) +{ + if (!system_supports_mte()) + return; + + /* clear any pending asynchronous tag fault */ + dsb(ish); + write_sysreg_s(0, SYS_TFSRE0_EL1); + clear_thread_flag(TIF_MTE_ASYNC_FAULT); + /* disable tag checking */ + set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE); + /* reset tag generation mask */ + set_gcr_el1_excl(0); +} + +void mte_thread_switch(struct task_struct *next) +{ + if (!system_supports_mte()) + return; + + /* avoid expensive SCTLR_EL1 accesses if no change */ + if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) + update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); + update_gcr_el1_excl(next->thread.gcr_user_incl); +} + +void mte_suspend_exit(void) +{ + if (!system_supports_mte()) + return; + + update_gcr_el1_excl(current->thread.gcr_user_incl); +} + +long set_mte_ctrl(struct task_struct *task, unsigned long arg) +{ + u64 tcf0; + u64 gcr_incl = (arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT; + + if (!system_supports_mte()) + return 0; + + switch (arg & PR_MTE_TCF_MASK) { + case PR_MTE_TCF_NONE: + tcf0 = SCTLR_EL1_TCF0_NONE; + break; + case PR_MTE_TCF_SYNC: + tcf0 = SCTLR_EL1_TCF0_SYNC; + break; + case PR_MTE_TCF_ASYNC: + tcf0 = SCTLR_EL1_TCF0_ASYNC; + break; + default: + return -EINVAL; + } + + if (task != current) { + task->thread.sctlr_tcf0 = tcf0; + task->thread.gcr_user_incl = gcr_incl; + } else { + set_sctlr_el1_tcf0(tcf0); + set_gcr_el1_excl(gcr_incl); + } + + return 0; +} + +long get_mte_ctrl(struct task_struct *task) +{ + unsigned long ret; + + if (!system_supports_mte()) + return 0; + + ret = task->thread.gcr_user_incl << PR_MTE_TAG_SHIFT; + + switch (task->thread.sctlr_tcf0) { + case SCTLR_EL1_TCF0_NONE: + return PR_MTE_TCF_NONE; + case SCTLR_EL1_TCF0_SYNC: + ret |= PR_MTE_TCF_SYNC; + break; + case SCTLR_EL1_TCF0_ASYNC: + ret |= PR_MTE_TCF_ASYNC; + break; + } + + return ret; +} + +/* + * Access MTE tags in another process' address space as given in mm. Update + * the number of tags copied. Return 0 if any tags copied, error otherwise. + * Inspired by __access_remote_vm(). + */ +static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, + struct iovec *kiov, unsigned int gup_flags) +{ + struct vm_area_struct *vma; + void __user *buf = kiov->iov_base; + size_t len = kiov->iov_len; + int ret; + int write = gup_flags & FOLL_WRITE; + + if (!access_ok(buf, len)) + return -EFAULT; + + if (mmap_read_lock_killable(mm)) + return -EIO; + + while (len) { + unsigned long tags, offset; + void *maddr; + struct page *page = NULL; + + ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page, + &vma, NULL); + if (ret <= 0) + break; + + /* + * Only copy tags if the page has been mapped as PROT_MTE + * (PG_mte_tagged set). Otherwise the tags are not valid and + * not accessible to user. Moreover, an mprotect(PROT_MTE) + * would cause the existing tags to be cleared if the page + * was never mapped with PROT_MTE. + */ + if (!test_bit(PG_mte_tagged, &page->flags)) { + ret = -EOPNOTSUPP; + put_page(page); + break; + } + + /* limit access to the end of the page */ + offset = offset_in_page(addr); + tags = min(len, (PAGE_SIZE - offset) / MTE_GRANULE_SIZE); + + maddr = page_address(page); + if (write) { + tags = mte_copy_tags_from_user(maddr + offset, buf, tags); + set_page_dirty_lock(page); + } else { + tags = mte_copy_tags_to_user(buf, maddr + offset, tags); + } + put_page(page); + + /* error accessing the tracer's buffer */ + if (!tags) + break; + + len -= tags; + buf += tags; + addr += tags * MTE_GRANULE_SIZE; + } + mmap_read_unlock(mm); + + /* return an error if no tags copied */ + kiov->iov_len = buf - kiov->iov_base; + if (!kiov->iov_len) { + /* check for error accessing the tracee's address space */ + if (ret <= 0) + return -EIO; + else + return -EFAULT; + } + + return 0; +} + +/* + * Copy MTE tags in another process' address space at 'addr' to/from tracer's + * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm(). + */ +static int access_remote_tags(struct task_struct *tsk, unsigned long addr, + struct iovec *kiov, unsigned int gup_flags) +{ + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return -EPERM; + + if (!tsk->ptrace || (current != tsk->parent) || + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptracer_capable(tsk, mm->user_ns))) { + mmput(mm); + return -EPERM; + } + + ret = __access_remote_tags(mm, addr, kiov, gup_flags); + mmput(mm); + + return ret; +} + +int mte_ptrace_copy_tags(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int ret; + struct iovec kiov; + struct iovec __user *uiov = (void __user *)data; + unsigned int gup_flags = FOLL_FORCE; + + if (!system_supports_mte()) + return -EIO; + + if (get_user(kiov.iov_base, &uiov->iov_base) || + get_user(kiov.iov_len, &uiov->iov_len)) + return -EFAULT; + + if (request == PTRACE_POKEMTETAGS) + gup_flags |= FOLL_WRITE; + + /* align addr to the MTE tag granule */ + addr &= MTE_GRANULE_MASK; + + ret = access_remote_tags(child, addr, &kiov, gup_flags); + if (!ret) + ret = put_user(kiov.iov_len, &uiov->iov_len); + + return ret; +} diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index b0e03e052dd1..88ff471b0bce 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -137,11 +137,11 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, * whist unwinding the stackframe and is like a subroutine return so we use * the PC. */ -static int callchain_trace(struct stackframe *frame, void *data) +static bool callchain_trace(void *data, unsigned long pc) { struct perf_callchain_entry_ctx *entry = data; - perf_callchain_store(entry, frame->pc); - return 0; + perf_callchain_store(entry, pc); + return true; } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 462f9a9cc44b..3605f77ad4df 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -69,6 +69,9 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL, [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB, + [C(LL)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD, + [C(LL)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_LL_CACHE_RD, + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED, [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED, }; @@ -302,13 +305,33 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { .attrs = armv8_pmuv3_format_attrs, }; +static ssize_t slots_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); + u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK; + + return snprintf(page, PAGE_SIZE, "0x%08x\n", slots); +} + +static DEVICE_ATTR_RO(slots); + +static struct attribute *armv8_pmuv3_caps_attrs[] = { + &dev_attr_slots.attr, + NULL, +}; + +static struct attribute_group armv8_pmuv3_caps_attr_group = { + .name = "caps", + .attrs = armv8_pmuv3_caps_attrs, +}; + /* * Perf Events' indices */ #define ARMV8_IDX_CYCLE_COUNTER 0 #define ARMV8_IDX_COUNTER0 1 -#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ - (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) /* @@ -348,6 +371,73 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event) #define ARMV8_IDX_TO_COUNTER(x) \ (((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK) +/* + * This code is really good + */ + +#define PMEVN_CASE(n, case_macro) \ + case n: case_macro(n); break + +#define PMEVN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + PMEVN_CASE(0, case_macro); \ + PMEVN_CASE(1, case_macro); \ + PMEVN_CASE(2, case_macro); \ + PMEVN_CASE(3, case_macro); \ + PMEVN_CASE(4, case_macro); \ + PMEVN_CASE(5, case_macro); \ + PMEVN_CASE(6, case_macro); \ + PMEVN_CASE(7, case_macro); \ + PMEVN_CASE(8, case_macro); \ + PMEVN_CASE(9, case_macro); \ + PMEVN_CASE(10, case_macro); \ + PMEVN_CASE(11, case_macro); \ + PMEVN_CASE(12, case_macro); \ + PMEVN_CASE(13, case_macro); \ + PMEVN_CASE(14, case_macro); \ + PMEVN_CASE(15, case_macro); \ + PMEVN_CASE(16, case_macro); \ + PMEVN_CASE(17, case_macro); \ + PMEVN_CASE(18, case_macro); \ + PMEVN_CASE(19, case_macro); \ + PMEVN_CASE(20, case_macro); \ + PMEVN_CASE(21, case_macro); \ + PMEVN_CASE(22, case_macro); \ + PMEVN_CASE(23, case_macro); \ + PMEVN_CASE(24, case_macro); \ + PMEVN_CASE(25, case_macro); \ + PMEVN_CASE(26, case_macro); \ + PMEVN_CASE(27, case_macro); \ + PMEVN_CASE(28, case_macro); \ + PMEVN_CASE(29, case_macro); \ + PMEVN_CASE(30, case_macro); \ + default: WARN(1, "Invalid PMEV* index\n"); \ + } \ + } while (0) + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + static inline u32 armv8pmu_pmcr_read(void) { return read_sysreg(pmcr_el0); @@ -365,28 +455,16 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr) return pmovsr & ARMV8_PMU_OVERFLOWED_MASK; } -static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx) -{ - return idx >= ARMV8_IDX_CYCLE_COUNTER && - idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu); -} - static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) { return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline void armv8pmu_select_counter(int idx) +static inline u32 armv8pmu_read_evcntr(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); - write_sysreg(counter, pmselr_el0); - isb(); -} -static inline u64 armv8pmu_read_evcntr(int idx) -{ - armv8pmu_select_counter(idx); - return read_sysreg(pmxevcntr_el0); + return read_pmevcntrn(counter); } static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) @@ -440,15 +518,11 @@ static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) static u64 armv8pmu_read_counter(struct perf_event *event) { - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; u64 value = 0; - if (!armv8pmu_counter_valid(cpu_pmu, idx)) - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); else value = armv8pmu_read_hw_counter(event); @@ -458,8 +532,9 @@ static u64 armv8pmu_read_counter(struct perf_event *event) static inline void armv8pmu_write_evcntr(int idx, u64 value) { - armv8pmu_select_counter(idx); - write_sysreg(value, pmxevcntr_el0); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + + write_pmevcntrn(counter, value); } static inline void armv8pmu_write_hw_counter(struct perf_event *event, @@ -477,16 +552,12 @@ static inline void armv8pmu_write_hw_counter(struct perf_event *event, static void armv8pmu_write_counter(struct perf_event *event, u64 value) { - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; value = armv8pmu_bias_long_counter(event, value); - if (!armv8pmu_counter_valid(cpu_pmu, idx)) - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_IDX_CYCLE_COUNTER) write_sysreg(value, pmccntr_el0); else armv8pmu_write_hw_counter(event, value); @@ -494,9 +565,10 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) static inline void armv8pmu_write_evtype(int idx, u32 val) { - armv8pmu_select_counter(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); + val &= ARMV8_PMU_EVTYPE_MASK; - write_sysreg(val, pmxevtyper_el0); + write_pmevtypern(counter, val); } static inline void armv8pmu_write_event_type(struct perf_event *event) @@ -516,7 +588,10 @@ static inline void armv8pmu_write_event_type(struct perf_event *event) armv8pmu_write_evtype(idx - 1, hwc->config_base); armv8pmu_write_evtype(idx, chain_evt); } else { - armv8pmu_write_evtype(idx, hwc->config_base); + if (idx == ARMV8_IDX_CYCLE_COUNTER) + write_sysreg(hwc->config_base, pmccfiltr_el0); + else + armv8pmu_write_evtype(idx, hwc->config_base); } } @@ -532,6 +607,11 @@ static u32 armv8pmu_event_cnten_mask(struct perf_event *event) static inline void armv8pmu_enable_counter(u32 mask) { + /* + * Make sure event configuration register writes are visible before we + * enable the counter. + * */ + isb(); write_sysreg(mask, pmcntenset_el0); } @@ -550,6 +630,11 @@ static inline void armv8pmu_enable_event_counter(struct perf_event *event) static inline void armv8pmu_disable_counter(u32 mask) { write_sysreg(mask, pmcntenclr_el0); + /* + * Make sure the effects of disabling the counter are visible before we + * start configuring the event. + */ + isb(); } static inline void armv8pmu_disable_event_counter(struct perf_event *event) @@ -606,15 +691,10 @@ static inline u32 armv8pmu_getreset_flags(void) static void armv8pmu_enable_event(struct perf_event *event) { - unsigned long flags; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - /* * Enable counter and interrupt, and set the counter to count * the event that we're interested in. */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* * Disable counter @@ -622,7 +702,7 @@ static void armv8pmu_enable_event(struct perf_event *event) armv8pmu_disable_event_counter(event); /* - * Set event (if destined for PMNx counters). + * Set event. */ armv8pmu_write_event_type(event); @@ -635,21 +715,10 @@ static void armv8pmu_enable_event(struct perf_event *event) * Enable counter */ armv8pmu_enable_event_counter(event); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_disable_event(struct perf_event *event) { - unsigned long flags; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - /* - * Disable counter and interrupt - */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* * Disable counter */ @@ -659,30 +728,18 @@ static void armv8pmu_disable_event(struct perf_event *event) * Disable interrupt for this counter */ armv8pmu_disable_event_irq(event); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_start(struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Enable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Disable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) @@ -735,20 +792,16 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; + /* + * Perf event overflow will queue the processing of the event as + * an irq_work which will be taken care of in the handling of + * IPI_IRQ_WORK. + */ if (perf_event_overflow(event, &data, regs)) cpu_pmu->disable(event); } armv8pmu_start(cpu_pmu); - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - return IRQ_HANDLED; } @@ -997,6 +1050,12 @@ static void __armv8pmu_probe_pmu(void *info) bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); + + /* store PMMIR_EL1 register for sysfs */ + if (pmuver >= ID_AA64DFR0_PMUVER_8_4 && (pmceid_raw[1] & BIT(31))) + cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1); + else + cpu_pmu->reg_pmmir = 0; } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) @@ -1019,7 +1078,8 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, int (*map_event)(struct perf_event *event), const struct attribute_group *events, - const struct attribute_group *format) + const struct attribute_group *format, + const struct attribute_group *caps) { int ret = armv8pmu_probe_pmu(cpu_pmu); if (ret) @@ -1044,104 +1104,112 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, events : &armv8_pmuv3_events_attr_group; cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ? format : &armv8_pmuv3_format_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ? + caps : &armv8_pmuv3_caps_attr_group; return 0; } +static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name, + int (*map_event)(struct perf_event *event)) +{ + return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL); +} + static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_pmuv3", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_pmuv3", + armv8_pmuv3_map_event); } static int armv8_a34_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a34", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a34", + armv8_pmuv3_map_event); } static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", - armv8_a53_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35", + armv8_a53_map_event); } static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", - armv8_a53_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53", + armv8_a53_map_event); } static int armv8_a55_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a55", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a55", + armv8_pmuv3_map_event); } static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", - armv8_a57_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57", + armv8_a57_map_event); } static int armv8_a65_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a65", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a65", + armv8_pmuv3_map_event); } static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", - armv8_a57_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72", + armv8_a57_map_event); } static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", - armv8_a73_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73", + armv8_a73_map_event); } static int armv8_a75_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a75", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a75", + armv8_pmuv3_map_event); } static int armv8_a76_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a76", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a76", + armv8_pmuv3_map_event); } static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cortex_a77", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a77", + armv8_pmuv3_map_event); } static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_neoverse_e1", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_e1", + armv8_pmuv3_map_event); } static int armv8_n1_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_neoverse_n1", - armv8_pmuv3_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_n1", + armv8_pmuv3_map_event); } static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", - armv8_thunder_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder", + armv8_thunder_map_event); } static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", - armv8_vulcan_map_event, NULL, NULL); + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan", + armv8_vulcan_map_event); } static const struct of_device_id armv8_pmu_of_device_ids[] = { diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 666b225aeb3a..94e8718e7229 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -16,7 +16,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) /* * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but - * we're stuck with it for ABI compatability reasons. + * we're stuck with it for ABI compatibility reasons. * * For a 32-bit consumer inspecting a 32-bit task, then it will look at * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h). diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index 263d5fba4c8a..104101f633b1 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -29,7 +29,8 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) aarch64_insn_is_msr_imm(insn) || aarch64_insn_is_msr_reg(insn) || aarch64_insn_is_exception(insn) || - aarch64_insn_is_eret(insn)) + aarch64_insn_is_eret(insn) || + aarch64_insn_is_eret_auth(insn)) return false; /* @@ -42,8 +43,10 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) != AARCH64_INSN_SPCLREG_DAIF; /* - * The HINT instruction is is problematic when single-stepping, - * except for the NOP case. + * The HINT instruction is steppable only if it is in whitelist + * and the rest of other such instructions are blocked for + * single stepping as they may cause exception or other + * unintended behaviour. */ if (aarch64_insn_is_hint(insn)) return aarch64_insn_is_steppable_hint(insn); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index f1804496b935..4784011cecac 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -21,6 +21,7 @@ #include <linux/lockdep.h> #include <linux/mman.h> #include <linux/mm.h> +#include <linux/nospec.h> #include <linux/stddef.h> #include <linux/sysctl.h> #include <linux/unistd.h> @@ -52,6 +53,7 @@ #include <asm/exec.h> #include <asm/fpsimd.h> #include <asm/mmu_context.h> +#include <asm/mte.h> #include <asm/processor.h> #include <asm/pointer_auth.h> #include <asm/stacktrace.h> @@ -239,7 +241,7 @@ static void print_pstate(struct pt_regs *regs) const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >> PSR_BTYPE_SHIFT]; - printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO BTYPE=%s)\n", + printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO BTYPE=%s)\n", pstate, pstate & PSR_N_BIT ? 'N' : 'n', pstate & PSR_Z_BIT ? 'Z' : 'z', @@ -251,6 +253,7 @@ static void print_pstate(struct pt_regs *regs) pstate & PSR_F_BIT ? 'F' : 'f', pstate & PSR_PAN_BIT ? '+' : '-', pstate & PSR_UAO_BIT ? '+' : '-', + pstate & PSR_TCO_BIT ? '+' : '-', btype_str); } } @@ -336,6 +339,7 @@ void flush_thread(void) tls_thread_flush(); flush_ptrace_hw_breakpoint(current); flush_tagged_addr_state(); + flush_mte_state(); } void release_thread(struct task_struct *dead_task) @@ -368,6 +372,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); + /* clear any pending asynchronous tag fault raised by the parent */ + clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); + return 0; } @@ -421,8 +428,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, cpus_have_const_cap(ARM64_HAS_UAO)) childregs->pstate |= PSR_UAO_BIT; - if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) - set_ssbs_bit(childregs); + spectre_v4_enable_task_mitigation(p); if (system_uses_irq_prio_masking()) childregs->pmr_save = GIC_PRIO_IRQON; @@ -472,8 +478,6 @@ void uao_thread_switch(struct task_struct *next) */ static void ssbs_thread_switch(struct task_struct *next) { - struct pt_regs *regs = task_pt_regs(next); - /* * Nothing to do for kernel threads, but 'regs' may be junk * (e.g. idle task) so check the flags and bail early. @@ -485,18 +489,10 @@ static void ssbs_thread_switch(struct task_struct *next) * If all CPUs implement the SSBS extension, then we just need to * context-switch the PSTATE field. */ - if (cpu_have_feature(cpu_feature(SSBS))) + if (cpus_have_const_cap(ARM64_SSBS)) return; - /* If the mitigation is enabled, then we leave SSBS clear. */ - if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || - test_tsk_thread_flag(next, TIF_SSBD)) - return; - - if (compat_user_mode(regs)) - set_compat_ssbs_bit(regs); - else if (user_mode(regs)) - set_ssbs_bit(regs); + spectre_v4_enable_task_mitigation(next); } /* @@ -571,6 +567,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, */ dsb(ish); + /* + * MTE thread switching must happen after the DSB above to ensure that + * any asynchronous tag check faults have been logged in the TFSR*_EL1 + * registers. + */ + mte_thread_switch(next); + /* the actual thread switch */ last = cpu_switch_to(prev, next); @@ -620,6 +623,11 @@ void arch_setup_new_exec(void) current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0; ptrauth_thread_init_user(current); + + if (task_spec_ssb_noexec(current)) { + arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS, + PR_SPEC_ENABLE); + } } #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI @@ -628,11 +636,18 @@ void arch_setup_new_exec(void) */ static unsigned int tagged_addr_disabled; -long set_tagged_addr_ctrl(unsigned long arg) +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) { - if (is_compat_task()) + unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE; + struct thread_info *ti = task_thread_info(task); + + if (is_compat_thread(ti)) return -EINVAL; - if (arg & ~PR_TAGGED_ADDR_ENABLE) + + if (system_supports_mte()) + valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK; + + if (arg & ~valid_mask) return -EINVAL; /* @@ -642,20 +657,28 @@ long set_tagged_addr_ctrl(unsigned long arg) if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled) return -EINVAL; - update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); + if (set_mte_ctrl(task, arg) != 0) + return -EINVAL; + + update_ti_thread_flag(ti, TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); return 0; } -long get_tagged_addr_ctrl(void) +long get_tagged_addr_ctrl(struct task_struct *task) { - if (is_compat_task()) + long ret = 0; + struct thread_info *ti = task_thread_info(task); + + if (is_compat_thread(ti)) return -EINVAL; - if (test_thread_flag(TIF_TAGGED_ADDR)) - return PR_TAGGED_ADDR_ENABLE; + if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR)) + ret = PR_TAGGED_ADDR_ENABLE; - return 0; + ret |= get_mte_ctrl(task); + + return ret; } /* diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c new file mode 100644 index 000000000000..68b710f1b43f --- /dev/null +++ b/arch/arm64/kernel/proton-pack.c @@ -0,0 +1,792 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Handle detection, reporting and mitigation of Spectre v1, v2 and v4, as + * detailed at: + * + * https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability + * + * This code was originally written hastily under an awful lot of stress and so + * aspects of it are somewhat hacky. Unfortunately, changing anything in here + * instantly makes me feel ill. Thanks, Jann. Thann. + * + * Copyright (C) 2018 ARM Ltd, All Rights Reserved. + * Copyright (C) 2020 Google LLC + * + * "If there's something strange in your neighbourhood, who you gonna call?" + * + * Authors: Will Deacon <will@kernel.org> and Marc Zyngier <maz@kernel.org> + */ + +#include <linux/arm-smccc.h> +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/nospec.h> +#include <linux/prctl.h> +#include <linux/sched/task_stack.h> + +#include <asm/spectre.h> +#include <asm/traps.h> + +/* + * We try to ensure that the mitigation state can never change as the result of + * onlining a late CPU. + */ +static void update_mitigation_state(enum mitigation_state *oldp, + enum mitigation_state new) +{ + enum mitigation_state state; + + do { + state = READ_ONCE(*oldp); + if (new <= state) + break; + + /* Userspace almost certainly can't deal with this. */ + if (WARN_ON(system_capabilities_finalized())) + break; + } while (cmpxchg_relaxed(oldp, state, new) != state); +} + +/* + * Spectre v1. + * + * The kernel can't protect userspace for this one: it's each person for + * themselves. Advertise what we're doing and be done with it. + */ +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +/* + * Spectre v2. + * + * This one sucks. A CPU is either: + * + * - Mitigated in hardware and advertised by ID_AA64PFR0_EL1.CSV2. + * - Mitigated in hardware and listed in our "safe list". + * - Mitigated in software by firmware. + * - Mitigated in software by a CPU-specific dance in the kernel. + * - Vulnerable. + * + * It's not unlikely for different CPUs in a big.LITTLE system to fall into + * different camps. + */ +static enum mitigation_state spectre_v2_state; + +static bool __read_mostly __nospectre_v2; +static int __init parse_spectre_v2_param(char *str) +{ + __nospectre_v2 = true; + return 0; +} +early_param("nospectre_v2", parse_spectre_v2_param); + +static bool spectre_v2_mitigations_off(void) +{ + bool ret = __nospectre_v2 || cpu_mitigations_off(); + + if (ret) + pr_info_once("spectre-v2 mitigation disabled by command line option\n"); + + return ret; +} + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, + char *buf) +{ + switch (spectre_v2_state) { + case SPECTRE_UNAFFECTED: + return sprintf(buf, "Not affected\n"); + case SPECTRE_MITIGATED: + return sprintf(buf, "Mitigation: Branch predictor hardening\n"); + case SPECTRE_VULNERABLE: + fallthrough; + default: + return sprintf(buf, "Vulnerable\n"); + } +} + +static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void) +{ + u64 pfr0; + static const struct midr_range spectre_v2_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), + { /* sentinel */ } + }; + + /* If the CPU has CSV2 set, we're safe */ + pfr0 = read_cpuid(ID_AA64PFR0_EL1); + if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT)) + return SPECTRE_UNAFFECTED; + + /* Alternatively, we have a list of unaffected CPUs */ + if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list)) + return SPECTRE_UNAFFECTED; + + return SPECTRE_VULNERABLE; +} + +#define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED (1) + +static enum mitigation_state spectre_v2_get_cpu_fw_mitigation_state(void) +{ + int ret; + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + + ret = res.a0; + switch (ret) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + return SPECTRE_UNAFFECTED; + default: + fallthrough; + case SMCCC_RET_NOT_SUPPORTED: + return SPECTRE_VULNERABLE; + } +} + +bool has_spectre_v2(const struct arm64_cpu_capabilities *entry, int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + if (spectre_v2_get_cpu_hw_mitigation_state() == SPECTRE_UNAFFECTED) + return false; + + if (spectre_v2_get_cpu_fw_mitigation_state() == SPECTRE_UNAFFECTED) + return false; + + return true; +} + +DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +enum mitigation_state arm64_get_spectre_v2_state(void) +{ + return spectre_v2_state; +} + +#ifdef CONFIG_KVM +#include <asm/cacheflush.h> +#include <asm/kvm_asm.h> + +atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); + +static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K); + int i; + + for (i = 0; i < SZ_2K; i += 0x80) + memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); + + __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); +} + +static void install_bp_hardening_cb(bp_hardening_cb_t fn) +{ + static DEFINE_RAW_SPINLOCK(bp_lock); + int cpu, slot = -1; + const char *hyp_vecs_start = __smccc_workaround_1_smc; + const char *hyp_vecs_end = __smccc_workaround_1_smc + + __SMCCC_WORKAROUND_1_SMC_SZ; + + /* + * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if + * we're a guest. Skip the hyp-vectors work. + */ + if (!is_hyp_mode_available()) { + __this_cpu_write(bp_hardening_data.fn, fn); + return; + } + + raw_spin_lock(&bp_lock); + for_each_possible_cpu(cpu) { + if (per_cpu(bp_hardening_data.fn, cpu) == fn) { + slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); + break; + } + } + + if (slot == -1) { + slot = atomic_inc_return(&arm64_el2_vector_last_slot); + BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); + __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); + } + + __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); + __this_cpu_write(bp_hardening_data.fn, fn); + raw_spin_unlock(&bp_lock); +} +#else +static void install_bp_hardening_cb(bp_hardening_cb_t fn) +{ + __this_cpu_write(bp_hardening_data.fn, fn); +} +#endif /* CONFIG_KVM */ + +static void call_smc_arch_workaround_1(void) +{ + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); +} + +static void call_hvc_arch_workaround_1(void) +{ + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); +} + +static void qcom_link_stack_sanitisation(void) +{ + u64 tmp; + + asm volatile("mov %0, x30 \n" + ".rept 16 \n" + "bl . + 4 \n" + ".endr \n" + "mov x30, %0 \n" + : "=&r" (tmp)); +} + +static enum mitigation_state spectre_v2_enable_fw_mitigation(void) +{ + bp_hardening_cb_t cb; + enum mitigation_state state; + + state = spectre_v2_get_cpu_fw_mitigation_state(); + if (state != SPECTRE_MITIGATED) + return state; + + if (spectre_v2_mitigations_off()) + return SPECTRE_VULNERABLE; + + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + cb = call_hvc_arch_workaround_1; + break; + + case SMCCC_CONDUIT_SMC: + cb = call_smc_arch_workaround_1; + break; + + default: + return SPECTRE_VULNERABLE; + } + + install_bp_hardening_cb(cb); + return SPECTRE_MITIGATED; +} + +static enum mitigation_state spectre_v2_enable_sw_mitigation(void) +{ + u32 midr; + + if (spectre_v2_mitigations_off()) + return SPECTRE_VULNERABLE; + + midr = read_cpuid_id(); + if (((midr & MIDR_CPU_MODEL_MASK) != MIDR_QCOM_FALKOR) && + ((midr & MIDR_CPU_MODEL_MASK) != MIDR_QCOM_FALKOR_V1)) + return SPECTRE_VULNERABLE; + + install_bp_hardening_cb(qcom_link_stack_sanitisation); + return SPECTRE_MITIGATED; +} + +void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused) +{ + enum mitigation_state state; + + WARN_ON(preemptible()); + + state = spectre_v2_get_cpu_hw_mitigation_state(); + if (state == SPECTRE_VULNERABLE) + state = spectre_v2_enable_fw_mitigation(); + if (state == SPECTRE_VULNERABLE) + state = spectre_v2_enable_sw_mitigation(); + + update_mitigation_state(&spectre_v2_state, state); +} + +/* + * Spectre v4. + * + * If you thought Spectre v2 was nasty, wait until you see this mess. A CPU is + * either: + * + * - Mitigated in hardware and listed in our "safe list". + * - Mitigated in hardware via PSTATE.SSBS. + * - Mitigated in software by firmware (sometimes referred to as SSBD). + * + * Wait, that doesn't sound so bad, does it? Keep reading... + * + * A major source of headaches is that the software mitigation is enabled both + * on a per-task basis, but can also be forced on for the kernel, necessitating + * both context-switch *and* entry/exit hooks. To make it even worse, some CPUs + * allow EL0 to toggle SSBS directly, which can end up with the prctl() state + * being stale when re-entering the kernel. The usual big.LITTLE caveats apply, + * so you can have systems that have both firmware and SSBS mitigations. This + * means we actually have to reject late onlining of CPUs with mitigations if + * all of the currently onlined CPUs are safelisted, as the mitigation tends to + * be opt-in for userspace. Yes, really, the cure is worse than the disease. + * + * The only good part is that if the firmware mitigation is present, then it is + * present for all CPUs, meaning we don't have to worry about late onlining of a + * vulnerable CPU if one of the boot CPUs is using the firmware mitigation. + * + * Give me a VAX-11/780 any day of the week... + */ +static enum mitigation_state spectre_v4_state; + +/* This is the per-cpu state tracking whether we need to talk to firmware */ +DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); + +enum spectre_v4_policy { + SPECTRE_V4_POLICY_MITIGATION_DYNAMIC, + SPECTRE_V4_POLICY_MITIGATION_ENABLED, + SPECTRE_V4_POLICY_MITIGATION_DISABLED, +}; + +static enum spectre_v4_policy __read_mostly __spectre_v4_policy; + +static const struct spectre_v4_param { + const char *str; + enum spectre_v4_policy policy; +} spectre_v4_params[] = { + { "force-on", SPECTRE_V4_POLICY_MITIGATION_ENABLED, }, + { "force-off", SPECTRE_V4_POLICY_MITIGATION_DISABLED, }, + { "kernel", SPECTRE_V4_POLICY_MITIGATION_DYNAMIC, }, +}; +static int __init parse_spectre_v4_param(char *str) +{ + int i; + + if (!str || !str[0]) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(spectre_v4_params); i++) { + const struct spectre_v4_param *param = &spectre_v4_params[i]; + + if (strncmp(str, param->str, strlen(param->str))) + continue; + + __spectre_v4_policy = param->policy; + return 0; + } + + return -EINVAL; +} +early_param("ssbd", parse_spectre_v4_param); + +/* + * Because this was all written in a rush by people working in different silos, + * we've ended up with multiple command line options to control the same thing. + * Wrap these up in some helpers, which prefer disabling the mitigation if faced + * with contradictory parameters. The mitigation is always either "off", + * "dynamic" or "on". + */ +static bool spectre_v4_mitigations_off(void) +{ + bool ret = cpu_mitigations_off() || + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; + + if (ret) + pr_info_once("spectre-v4 mitigation disabled by command-line option\n"); + + return ret; +} + +/* Do we need to toggle the mitigation state on entry to/exit from the kernel? */ +static bool spectre_v4_mitigations_dynamic(void) +{ + return !spectre_v4_mitigations_off() && + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DYNAMIC; +} + +static bool spectre_v4_mitigations_on(void) +{ + return !spectre_v4_mitigations_off() && + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_ENABLED; +} + +ssize_t cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf) +{ + switch (spectre_v4_state) { + case SPECTRE_UNAFFECTED: + return sprintf(buf, "Not affected\n"); + case SPECTRE_MITIGATED: + return sprintf(buf, "Mitigation: Speculative Store Bypass disabled via prctl\n"); + case SPECTRE_VULNERABLE: + fallthrough; + default: + return sprintf(buf, "Vulnerable\n"); + } +} + +enum mitigation_state arm64_get_spectre_v4_state(void) +{ + return spectre_v4_state; +} + +static enum mitigation_state spectre_v4_get_cpu_hw_mitigation_state(void) +{ + static const struct midr_range spectre_v4_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), + { /* sentinel */ }, + }; + + if (is_midr_in_range_list(read_cpuid_id(), spectre_v4_safe_list)) + return SPECTRE_UNAFFECTED; + + /* CPU features are detected first */ + if (this_cpu_has_cap(ARM64_SSBS)) + return SPECTRE_MITIGATED; + + return SPECTRE_VULNERABLE; +} + +static enum mitigation_state spectre_v4_get_cpu_fw_mitigation_state(void) +{ + int ret; + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res); + + ret = res.a0; + switch (ret) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + fallthrough; + case SMCCC_RET_NOT_REQUIRED: + return SPECTRE_UNAFFECTED; + default: + fallthrough; + case SMCCC_RET_NOT_SUPPORTED: + return SPECTRE_VULNERABLE; + } +} + +bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope) +{ + enum mitigation_state state; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + state = spectre_v4_get_cpu_hw_mitigation_state(); + if (state == SPECTRE_VULNERABLE) + state = spectre_v4_get_cpu_fw_mitigation_state(); + + return state != SPECTRE_UNAFFECTED; +} + +static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) +{ + if (user_mode(regs)) + return 1; + + if (instr & BIT(PSTATE_Imm_shift)) + regs->pstate |= PSR_SSBS_BIT; + else + regs->pstate &= ~PSR_SSBS_BIT; + + arm64_skip_faulting_instruction(regs, 4); + return 0; +} + +static struct undef_hook ssbs_emulation_hook = { + .instr_mask = ~(1U << PSTATE_Imm_shift), + .instr_val = 0xd500401f | PSTATE_SSBS, + .fn = ssbs_emulation_handler, +}; + +static enum mitigation_state spectre_v4_enable_hw_mitigation(void) +{ + static bool undef_hook_registered = false; + static DEFINE_RAW_SPINLOCK(hook_lock); + enum mitigation_state state; + + /* + * If the system is mitigated but this CPU doesn't have SSBS, then + * we must be on the safelist and there's nothing more to do. + */ + state = spectre_v4_get_cpu_hw_mitigation_state(); + if (state != SPECTRE_MITIGATED || !this_cpu_has_cap(ARM64_SSBS)) + return state; + + raw_spin_lock(&hook_lock); + if (!undef_hook_registered) { + register_undef_hook(&ssbs_emulation_hook); + undef_hook_registered = true; + } + raw_spin_unlock(&hook_lock); + + if (spectre_v4_mitigations_off()) { + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); + asm volatile(SET_PSTATE_SSBS(1)); + return SPECTRE_VULNERABLE; + } + + /* SCTLR_EL1.DSSBS was initialised to 0 during boot */ + asm volatile(SET_PSTATE_SSBS(0)); + return SPECTRE_MITIGATED; +} + +/* + * Patch a branch over the Spectre-v4 mitigation code with a NOP so that + * we fallthrough and check whether firmware needs to be called on this CPU. + */ +void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); /* Branch -> NOP */ + + if (spectre_v4_mitigations_off()) + return; + + if (cpus_have_final_cap(ARM64_SSBS)) + return; + + if (spectre_v4_mitigations_dynamic()) + *updptr = cpu_to_le32(aarch64_insn_gen_nop()); +} + +/* + * Patch a NOP in the Spectre-v4 mitigation code with an SMC/HVC instruction + * to call into firmware to adjust the mitigation state. + */ +void __init spectre_v4_patch_fw_mitigation_conduit(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + u32 insn; + + BUG_ON(nr_inst != 1); /* NOP -> HVC/SMC */ + + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + insn = aarch64_insn_get_hvc_value(); + break; + case SMCCC_CONDUIT_SMC: + insn = aarch64_insn_get_smc_value(); + break; + default: + return; + } + + *updptr = cpu_to_le32(insn); +} + +static enum mitigation_state spectre_v4_enable_fw_mitigation(void) +{ + enum mitigation_state state; + + state = spectre_v4_get_cpu_fw_mitigation_state(); + if (state != SPECTRE_MITIGATED) + return state; + + if (spectre_v4_mitigations_off()) { + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, false, NULL); + return SPECTRE_VULNERABLE; + } + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, true, NULL); + + if (spectre_v4_mitigations_dynamic()) + __this_cpu_write(arm64_ssbd_callback_required, 1); + + return SPECTRE_MITIGATED; +} + +void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused) +{ + enum mitigation_state state; + + WARN_ON(preemptible()); + + state = spectre_v4_enable_hw_mitigation(); + if (state == SPECTRE_VULNERABLE) + state = spectre_v4_enable_fw_mitigation(); + + update_mitigation_state(&spectre_v4_state, state); +} + +static void __update_pstate_ssbs(struct pt_regs *regs, bool state) +{ + u64 bit = compat_user_mode(regs) ? PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + if (state) + regs->pstate |= bit; + else + regs->pstate &= ~bit; +} + +void spectre_v4_enable_task_mitigation(struct task_struct *tsk) +{ + struct pt_regs *regs = task_pt_regs(tsk); + bool ssbs = false, kthread = tsk->flags & PF_KTHREAD; + + if (spectre_v4_mitigations_off()) + ssbs = true; + else if (spectre_v4_mitigations_dynamic() && !kthread) + ssbs = !test_tsk_thread_flag(tsk, TIF_SSBD); + + __update_pstate_ssbs(regs, ssbs); +} + +/* + * The Spectre-v4 mitigation can be controlled via a prctl() from userspace. + * This is interesting because the "speculation disabled" behaviour can be + * configured so that it is preserved across exec(), which means that the + * prctl() may be necessary even when PSTATE.SSBS can be toggled directly + * from userspace. + */ +static void ssbd_prctl_enable_mitigation(struct task_struct *task) +{ + task_clear_spec_ssb_noexec(task); + task_set_spec_ssb_disable(task); + set_tsk_thread_flag(task, TIF_SSBD); +} + +static void ssbd_prctl_disable_mitigation(struct task_struct *task) +{ + task_clear_spec_ssb_noexec(task); + task_clear_spec_ssb_disable(task); + clear_tsk_thread_flag(task, TIF_SSBD); +} + +static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + switch (ctrl) { + case PR_SPEC_ENABLE: + /* Enable speculation: disable mitigation */ + /* + * Force disabled speculation prevents it from being + * re-enabled. + */ + if (task_spec_ssb_force_disable(task)) + return -EPERM; + + /* + * If the mitigation is forced on, then speculation is forced + * off and we again prevent it from being re-enabled. + */ + if (spectre_v4_mitigations_on()) + return -EPERM; + + ssbd_prctl_disable_mitigation(task); + break; + case PR_SPEC_FORCE_DISABLE: + /* Force disable speculation: force enable mitigation */ + /* + * If the mitigation is forced off, then speculation is forced + * on and we prevent it from being disabled. + */ + if (spectre_v4_mitigations_off()) + return -EPERM; + + task_set_spec_ssb_force_disable(task); + fallthrough; + case PR_SPEC_DISABLE: + /* Disable speculation: enable mitigation */ + /* Same as PR_SPEC_FORCE_DISABLE */ + if (spectre_v4_mitigations_off()) + return -EPERM; + + ssbd_prctl_enable_mitigation(task); + break; + case PR_SPEC_DISABLE_NOEXEC: + /* Disable speculation until execve(): enable mitigation */ + /* + * If the mitigation state is forced one way or the other, then + * we must fail now before we try to toggle it on execve(). + */ + if (task_spec_ssb_force_disable(task) || + spectre_v4_mitigations_off() || + spectre_v4_mitigations_on()) { + return -EPERM; + } + + ssbd_prctl_enable_mitigation(task); + task_set_spec_ssb_noexec(task); + break; + default: + return -ERANGE; + } + + spectre_v4_enable_task_mitigation(task); + return 0; +} + +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssbd_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +static int ssbd_prctl_get(struct task_struct *task) +{ + switch (spectre_v4_state) { + case SPECTRE_UNAFFECTED: + return PR_SPEC_NOT_AFFECTED; + case SPECTRE_MITIGATED: + if (spectre_v4_mitigations_on()) + return PR_SPEC_NOT_AFFECTED; + + if (spectre_v4_mitigations_dynamic()) + break; + + /* Mitigations are disabled, so we're vulnerable. */ + fallthrough; + case SPECTRE_VULNERABLE: + fallthrough; + default: + return PR_SPEC_ENABLE; + } + + /* Check the mitigation state for this task */ + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + + if (task_spec_ssb_noexec(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE_NOEXEC; + + if (task_spec_ssb_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssbd_prctl_get(task); + default: + return -ENODEV; + } +} diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index d8ebfd813e28..f49b349e16a3 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -34,6 +34,7 @@ #include <asm/cpufeature.h> #include <asm/debug-monitors.h> #include <asm/fpsimd.h> +#include <asm/mte.h> #include <asm/pointer_auth.h> #include <asm/stacktrace.h> #include <asm/syscall.h> @@ -1032,6 +1033,35 @@ static int pac_generic_keys_set(struct task_struct *target, #endif /* CONFIG_CHECKPOINT_RESTORE */ #endif /* CONFIG_ARM64_PTR_AUTH */ +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI +static int tagged_addr_ctrl_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + long ctrl = get_tagged_addr_ctrl(target); + + if (IS_ERR_VALUE(ctrl)) + return ctrl; + + return membuf_write(&to, &ctrl, sizeof(ctrl)); +} + +static int tagged_addr_ctrl_set(struct task_struct *target, const struct + user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, const + void __user *ubuf) +{ + int ret; + long ctrl; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); + if (ret) + return ret; + + return set_tagged_addr_ctrl(target, ctrl); +} +#endif + enum aarch64_regset { REGSET_GPR, REGSET_FPR, @@ -1051,6 +1081,9 @@ enum aarch64_regset { REGSET_PACG_KEYS, #endif #endif +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI + REGSET_TAGGED_ADDR_CTRL, +#endif }; static const struct user_regset aarch64_regsets[] = { @@ -1148,6 +1181,16 @@ static const struct user_regset aarch64_regsets[] = { }, #endif #endif +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI + [REGSET_TAGGED_ADDR_CTRL] = { + .core_note_type = NT_ARM_TAGGED_ADDR_CTRL, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .regset_get = tagged_addr_ctrl_get, + .set = tagged_addr_ctrl_set, + }, +#endif }; static const struct user_regset_view user_aarch64_view = { @@ -1691,6 +1734,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { + switch (request) { + case PTRACE_PEEKMTETAGS: + case PTRACE_POKEMTETAGS: + return mte_ptrace_copy_tags(child, request, addr, data); + } + return ptrace_request(child, request, addr, data); } @@ -1793,7 +1842,7 @@ void syscall_trace_exit(struct pt_regs *regs) * We also reserve IL for the kernel; SS is handled dynamically. */ #define SPSR_EL1_AARCH64_RES0_BITS \ - (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ + (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 26) | GENMASK_ULL(23, 22) | \ GENMASK_ULL(20, 13) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ (GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20)) diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 542d6edc6806..84eec95ec06c 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -36,18 +36,6 @@ SYM_CODE_START(arm64_relocate_new_kernel) mov x14, xzr /* x14 = entry ptr */ mov x13, xzr /* x13 = copy dest */ - /* Clear the sctlr_el2 flags. */ - mrs x0, CurrentEL - cmp x0, #CurrentEL_EL2 - b.ne 1f - mrs x0, sctlr_el2 - mov_q x1, SCTLR_ELx_FLAGS - bic x0, x0, x1 - pre_disable_mmu_workaround - msr sctlr_el2, x0 - isb -1: - /* Check if the new image needs relocation. */ tbnz x16, IND_DONE_BIT, .Ldone diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index a5e8b3b9d798..a6d18755652f 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -18,16 +18,16 @@ struct return_address_data { void *addr; }; -static int save_return_addr(struct stackframe *frame, void *d) +static bool save_return_addr(void *d, unsigned long pc) { struct return_address_data *data = d; if (!data->level) { - data->addr = (void *)frame->pc; - return 1; + data->addr = (void *)pc; + return false; } else { --data->level; - return 0; + return true; } } NOKPROBE_SYMBOL(save_return_addr); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 3b4f31f35e45..bdcaaf091e1e 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -244,7 +244,8 @@ static int preserve_sve_context(struct sve_context __user *ctx) if (vq) { /* * This assumes that the SVE state has already been saved to - * the task struct by calling preserve_fpsimd_context(). + * the task struct by calling the function + * fpsimd_signal_preserve_current_state(). */ err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET, current->thread.sve_state, @@ -748,6 +749,9 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, regs->pstate |= PSR_BTYPE_C; } + /* TCO (Tag Check Override) always cleared for signal handlers */ + regs->pstate &= ~PSR_TCO_BIT; + if (ka->sa.sa_flags & SA_RESTORER) sigtramp = ka->sa.sa_restorer; else @@ -932,6 +936,12 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_flags & _TIF_UPROBE) uprobe_notify_resume(regs); + if (thread_flags & _TIF_MTE_ASYNC_FAULT) { + clear_thread_flag(TIF_MTE_ASYNC_FAULT); + send_sig_fault(SIGSEGV, SEGV_MTEAERR, + (void __user *)NULL, current); + } + if (thread_flags & _TIF_SIGPENDING) do_signal(regs); diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index c8a3fee00c11..5892e79fa429 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -83,9 +83,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) /* * We write the release address as LE regardless of the native - * endianess of the kernel. Therefore, any boot-loaders that + * endianness of the kernel. Therefore, any boot-loaders that * read this address need to convert this address to the - * boot-loader's endianess before jumping. This is mandated by + * boot-loader's endianness before jumping. This is mandated by * the boot protocol. */ writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr); diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c deleted file mode 100644 index b26955f56750..000000000000 --- a/arch/arm64/kernel/ssbd.c +++ /dev/null @@ -1,129 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2018 ARM Ltd, All Rights Reserved. - */ - -#include <linux/compat.h> -#include <linux/errno.h> -#include <linux/prctl.h> -#include <linux/sched.h> -#include <linux/sched/task_stack.h> -#include <linux/thread_info.h> - -#include <asm/cpufeature.h> - -static void ssbd_ssbs_enable(struct task_struct *task) -{ - u64 val = is_compat_thread(task_thread_info(task)) ? - PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; - - task_pt_regs(task)->pstate |= val; -} - -static void ssbd_ssbs_disable(struct task_struct *task) -{ - u64 val = is_compat_thread(task_thread_info(task)) ? - PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; - - task_pt_regs(task)->pstate &= ~val; -} - -/* - * prctl interface for SSBD - */ -static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) -{ - int state = arm64_get_ssbd_state(); - - /* Unsupported */ - if (state == ARM64_SSBD_UNKNOWN) - return -ENODEV; - - /* Treat the unaffected/mitigated state separately */ - if (state == ARM64_SSBD_MITIGATED) { - switch (ctrl) { - case PR_SPEC_ENABLE: - return -EPERM; - case PR_SPEC_DISABLE: - case PR_SPEC_FORCE_DISABLE: - return 0; - } - } - - /* - * Things are a bit backward here: the arm64 internal API - * *enables the mitigation* when the userspace API *disables - * speculation*. So much fun. - */ - switch (ctrl) { - case PR_SPEC_ENABLE: - /* If speculation is force disabled, enable is not allowed */ - if (state == ARM64_SSBD_FORCE_ENABLE || - task_spec_ssb_force_disable(task)) - return -EPERM; - task_clear_spec_ssb_disable(task); - clear_tsk_thread_flag(task, TIF_SSBD); - ssbd_ssbs_enable(task); - break; - case PR_SPEC_DISABLE: - if (state == ARM64_SSBD_FORCE_DISABLE) - return -EPERM; - task_set_spec_ssb_disable(task); - set_tsk_thread_flag(task, TIF_SSBD); - ssbd_ssbs_disable(task); - break; - case PR_SPEC_FORCE_DISABLE: - if (state == ARM64_SSBD_FORCE_DISABLE) - return -EPERM; - task_set_spec_ssb_disable(task); - task_set_spec_ssb_force_disable(task); - set_tsk_thread_flag(task, TIF_SSBD); - ssbd_ssbs_disable(task); - break; - default: - return -ERANGE; - } - - return 0; -} - -int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, - unsigned long ctrl) -{ - switch (which) { - case PR_SPEC_STORE_BYPASS: - return ssbd_prctl_set(task, ctrl); - default: - return -ENODEV; - } -} - -static int ssbd_prctl_get(struct task_struct *task) -{ - switch (arm64_get_ssbd_state()) { - case ARM64_SSBD_UNKNOWN: - return -ENODEV; - case ARM64_SSBD_FORCE_ENABLE: - return PR_SPEC_DISABLE; - case ARM64_SSBD_KERNEL: - if (task_spec_ssb_force_disable(task)) - return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; - if (task_spec_ssb_disable(task)) - return PR_SPEC_PRCTL | PR_SPEC_DISABLE; - return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - case ARM64_SSBD_FORCE_DISABLE: - return PR_SPEC_ENABLE; - default: - return PR_SPEC_NOT_AFFECTED; - } -} - -int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) -{ - switch (which) { - case PR_SPEC_STORE_BYPASS: - return ssbd_prctl_get(task); - default: - return -ENODEV; - } -} diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 2dd8e3b8b94b..fa56af1a59c3 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -118,12 +118,12 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) NOKPROBE_SYMBOL(unwind_frame); void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data) + bool (*fn)(void *, unsigned long), void *data) { while (1) { int ret; - if (fn(frame, data)) + if (!fn(data, frame->pc)) break; ret = unwind_frame(tsk, frame); if (ret < 0) @@ -132,84 +132,89 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, } NOKPROBE_SYMBOL(walk_stackframe); -#ifdef CONFIG_STACKTRACE -struct stack_trace_data { - struct stack_trace *trace; - unsigned int no_sched_functions; - unsigned int skip; -}; - -static int save_trace(struct stackframe *frame, void *d) +static void dump_backtrace_entry(unsigned long where, const char *loglvl) { - struct stack_trace_data *data = d; - struct stack_trace *trace = data->trace; - unsigned long addr = frame->pc; - - if (data->no_sched_functions && in_sched_functions(addr)) - return 0; - if (data->skip) { - data->skip--; - return 0; - } - - trace->entries[trace->nr_entries++] = addr; - - return trace->nr_entries >= trace->max_entries; + printk("%s %pS\n", loglvl, (void *)where); } -void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, + const char *loglvl) { - struct stack_trace_data data; struct stackframe frame; + int skip = 0; - data.trace = trace; - data.skip = trace->skip; - data.no_sched_functions = 0; + pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); - start_backtrace(&frame, regs->regs[29], regs->pc); - walk_stackframe(current, &frame, save_trace, &data); -} -EXPORT_SYMBOL_GPL(save_stack_trace_regs); + if (regs) { + if (user_mode(regs)) + return; + skip = 1; + } -static noinline void __save_stack_trace(struct task_struct *tsk, - struct stack_trace *trace, unsigned int nosched) -{ - struct stack_trace_data data; - struct stackframe frame; + if (!tsk) + tsk = current; if (!try_get_task_stack(tsk)) return; - data.trace = trace; - data.skip = trace->skip; - data.no_sched_functions = nosched; - - if (tsk != current) { - start_backtrace(&frame, thread_saved_fp(tsk), - thread_saved_pc(tsk)); - } else { - /* We don't want this function nor the caller */ - data.skip += 2; + if (tsk == current) { start_backtrace(&frame, (unsigned long)__builtin_frame_address(0), - (unsigned long)__save_stack_trace); + (unsigned long)dump_backtrace); + } else { + /* + * task blocked in __switch_to + */ + start_backtrace(&frame, + thread_saved_fp(tsk), + thread_saved_pc(tsk)); } - walk_stackframe(tsk, &frame, save_trace, &data); + printk("%sCall trace:\n", loglvl); + do { + /* skip until specified stack frame */ + if (!skip) { + dump_backtrace_entry(frame.pc, loglvl); + } else if (frame.fp == regs->regs[29]) { + skip = 0; + /* + * Mostly, this is the case where this function is + * called in panic/abort. As exception handler's + * stack frame does not contain the corresponding pc + * at which an exception has taken place, use regs->pc + * instead. + */ + dump_backtrace_entry(regs->pc, loglvl); + } + } while (!unwind_frame(tsk, &frame)); put_task_stack(tsk); } -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { - __save_stack_trace(tsk, trace, 1); + dump_backtrace(NULL, tsk, loglvl); + barrier(); } -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); -void save_stack_trace(struct stack_trace *trace) +#ifdef CONFIG_STACKTRACE + +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { - __save_stack_trace(current, trace, 0); + struct stackframe frame; + + if (regs) + start_backtrace(&frame, regs->regs[29], regs->pc); + else if (task == current) + start_backtrace(&frame, + (unsigned long)__builtin_frame_address(0), + (unsigned long)arch_stack_walk); + else + start_backtrace(&frame, thread_saved_fp(task), + thread_saved_pc(task)); + + walk_stackframe(task, &frame, consume_entry, cookie); } -EXPORT_SYMBOL_GPL(save_stack_trace); #endif diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index c1dee9066ff9..96cd347c7a46 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -10,6 +10,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/exec.h> +#include <asm/mte.h> #include <asm/memory.h> #include <asm/mmu_context.h> #include <asm/smp_plat.h> @@ -72,8 +73,10 @@ void notrace __cpu_suspend_exit(void) * have turned the mitigation on. If the user has forcefully * disabled it, make sure their wishes are obeyed. */ - if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) - arm64_set_ssbd_mitigation(false); + spectre_v4_enable_mitigation(NULL); + + /* Restore additional MTE-specific configuration */ + mte_suspend_exit(); } /* diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 5f0c04863d2c..e4c0dadf0d92 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -123,6 +123,16 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, local_daif_restore(DAIF_PROCCTX); user_exit(); + if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { + /* + * Process the asynchronous tag check fault before the actual + * syscall. do_notify_resume() will send a signal to userspace + * before the syscall is restarted. + */ + regs->regs[0] = -ERESTARTNOINTR; + return; + } + if (has_syscall_work(flags)) { /* * The de-facto standard way to skip a system call using ptrace diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 0801a0f3c156..ff1dd1dbfe64 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -36,21 +36,23 @@ void store_cpu_topology(unsigned int cpuid) if (mpidr & MPIDR_UP_BITMASK) return; - /* Create cpu topology mapping based on MPIDR. */ - if (mpidr & MPIDR_MT_BITMASK) { - /* Multiprocessor system : Multi-threads per core */ - cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; - } else { - /* Multiprocessor system : Single-thread per core */ - cpuid_topo->thread_id = -1; - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | - MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16; - } + /* + * This would be the place to create cpu topology based on MPIDR. + * + * However, it cannot be trusted to depict the actual topology; some + * pieces of the architecture enforce an artificial cap on Aff0 values + * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an + * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up + * having absolutely no relationship to the actual underlying system + * topology, and cannot be reasonably used as core / package ID. + * + * If the MT bit is set, Aff0 *could* be used to define a thread ID, but + * we still wouldn't be able to obtain a sane core ID. This means we + * need to entirely ignore MPIDR for any topology deduction. + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = cpuid; + cpuid_topo->package_id = cpu_to_node(cpuid); pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", cpuid, cpuid_topo->package_id, cpuid_topo->core_id, diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 13ebd5ca2070..8af4e0e85736 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -34,6 +34,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> +#include <asm/extable.h> #include <asm/insn.h> #include <asm/kprobes.h> #include <asm/traps.h> @@ -53,11 +54,6 @@ static const char *handler[]= { int show_unhandled_signals = 0; -static void dump_backtrace_entry(unsigned long where, const char *loglvl) -{ - printk("%s %pS\n", loglvl, (void *)where); -} - static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); @@ -83,66 +79,6 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) printk("%sCode: %s\n", lvl, str); } -void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, - const char *loglvl) -{ - struct stackframe frame; - int skip = 0; - - pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); - - if (regs) { - if (user_mode(regs)) - return; - skip = 1; - } - - if (!tsk) - tsk = current; - - if (!try_get_task_stack(tsk)) - return; - - if (tsk == current) { - start_backtrace(&frame, - (unsigned long)__builtin_frame_address(0), - (unsigned long)dump_backtrace); - } else { - /* - * task blocked in __switch_to - */ - start_backtrace(&frame, - thread_saved_fp(tsk), - thread_saved_pc(tsk)); - } - - printk("%sCall trace:\n", loglvl); - do { - /* skip until specified stack frame */ - if (!skip) { - dump_backtrace_entry(frame.pc, loglvl); - } else if (frame.fp == regs->regs[29]) { - skip = 0; - /* - * Mostly, this is the case where this function is - * called in panic/abort. As exception handler's - * stack frame does not contain the corresponding pc - * at which an exception has taken place, use regs->pc - * instead. - */ - dump_backtrace_entry(regs->pc, loglvl); - } - } while (!unwind_frame(tsk, &frame)); - - put_task_stack(tsk); -} - -void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) -{ - dump_backtrace(NULL, tsk, loglvl); - barrier(); -} - #ifdef CONFIG_PREEMPT #define S_PREEMPT " PREEMPT" #elif defined(CONFIG_PREEMPT_RT) @@ -200,9 +136,9 @@ void die(const char *str, struct pt_regs *regs, int err) oops_exit(); if (in_interrupt()) - panic("Fatal exception in interrupt"); + panic("%s: Fatal exception in interrupt", str); if (panic_on_oops) - panic("Fatal exception"); + panic("%s: Fatal exception", str); raw_spin_unlock_irqrestore(&die_lock, flags); @@ -412,7 +348,7 @@ exit: return fn ? fn(regs, instr) : 1; } -void force_signal_inject(int signal, int code, unsigned long address) +void force_signal_inject(int signal, int code, unsigned long address, unsigned int err) { const char *desc; struct pt_regs *regs = current_pt_regs(); @@ -438,7 +374,7 @@ void force_signal_inject(int signal, int code, unsigned long address) signal = SIGKILL; } - arm64_notify_die(desc, regs, signal, code, (void __user *)address, 0); + arm64_notify_die(desc, regs, signal, code, (void __user *)address, err); } /* @@ -455,7 +391,7 @@ void arm64_notify_segfault(unsigned long addr) code = SEGV_ACCERR; mmap_read_unlock(current->mm); - force_signal_inject(SIGSEGV, code, addr); + force_signal_inject(SIGSEGV, code, addr, 0); } void do_undefinstr(struct pt_regs *regs) @@ -468,17 +404,28 @@ void do_undefinstr(struct pt_regs *regs) return; BUG_ON(!user_mode(regs)); - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } NOKPROBE_SYMBOL(do_undefinstr); void do_bti(struct pt_regs *regs) { BUG_ON(!user_mode(regs)); - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } NOKPROBE_SYMBOL(do_bti); +void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr) +{ + /* + * Unexpected FPAC exception or pointer authentication failure in + * the kernel: kill the task before it does any more harm. + */ + BUG_ON(!user_mode(regs)); + force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr); +} +NOKPROBE_SYMBOL(do_ptrauth_fault); + #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ res = -EFAULT; \ @@ -528,7 +475,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) __user_cache_maint("ic ivau", address, ret); break; default: - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); return; } @@ -581,7 +528,7 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs) sysreg = esr_sys64_to_sysreg(esr); if (do_emulate_mrs(regs, sysreg, rt) != 0) - force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); } static void wfi_handler(unsigned int esr, struct pt_regs *regs) @@ -775,6 +722,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)", [ESR_ELx_EC_SVE] = "SVE", [ESR_ELx_EC_ERET] = "ERET/ERETAA/ERETAB", + [ESR_ELx_EC_FPAC] = "FPAC", [ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF", [ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)", [ESR_ELx_EC_IABT_CUR] = "IABT (current EL)", @@ -935,26 +883,6 @@ asmlinkage void enter_from_user_mode(void) } NOKPROBE_SYMBOL(enter_from_user_mode); -void __pte_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pte %016lx.\n", file, line, val); -} - -void __pmd_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pmd %016lx.\n", file, line, val); -} - -void __pud_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pud %016lx.\n", file, line, val); -} - -void __pgd_error(const char *file, int line, unsigned long val) -{ - pr_err("%s:%d: bad pgd %016lx.\n", file, line, val); -} - /* GENERIC_BUG traps */ int is_valid_bugaddr(unsigned long addr) @@ -994,6 +922,21 @@ static struct break_hook bug_break_hook = { .imm = BUG_BRK_IMM, }; +static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr) +{ + pr_err("%s generated an invalid instruction at %pS!\n", + in_bpf_jit(regs) ? "BPF JIT" : "Kernel text patching", + (void *)instruction_pointer(regs)); + + /* We cannot handle this */ + return DBG_HOOK_ERROR; +} + +static struct break_hook fault_break_hook = { + .fn = reserved_fault_handler, + .imm = FAULT_BRK_IMM, +}; + #ifdef CONFIG_KASAN_SW_TAGS #define KASAN_ESR_RECOVER 0x20 @@ -1059,6 +1002,7 @@ int __init early_brk64(unsigned long addr, unsigned int esr, void __init trap_init(void) { register_kernel_break_hook(&bug_break_hook); + register_kernel_break_hook(&fault_break_hook); #ifdef CONFIG_KASAN_SW_TAGS register_kernel_break_hook(&kasan_break_hook); #endif diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index d4202a32abc9..debb8995d57f 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -30,15 +30,11 @@ #include <asm/vdso.h> extern char vdso_start[], vdso_end[]; -#ifdef CONFIG_COMPAT_VDSO extern char vdso32_start[], vdso32_end[]; -#endif /* CONFIG_COMPAT_VDSO */ enum vdso_abi { VDSO_ABI_AA64, -#ifdef CONFIG_COMPAT_VDSO VDSO_ABI_AA32, -#endif /* CONFIG_COMPAT_VDSO */ }; enum vvar_pages { @@ -284,21 +280,17 @@ up_fail: /* * Create and map the vectors page for AArch32 tasks. */ -#ifdef CONFIG_COMPAT_VDSO static int aarch32_vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { return __vdso_remap(VDSO_ABI_AA32, sm, new_vma); } -#endif /* CONFIG_COMPAT_VDSO */ enum aarch32_map { AA32_MAP_VECTORS, /* kuser helpers */ -#ifdef CONFIG_COMPAT_VDSO + AA32_MAP_SIGPAGE, AA32_MAP_VVAR, AA32_MAP_VDSO, -#endif - AA32_MAP_SIGPAGE }; static struct page *aarch32_vectors_page __ro_after_init; @@ -309,7 +301,10 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { .name = "[vectors]", /* ABI */ .pages = &aarch32_vectors_page, }, -#ifdef CONFIG_COMPAT_VDSO + [AA32_MAP_SIGPAGE] = { + .name = "[sigpage]", /* ABI */ + .pages = &aarch32_sig_page, + }, [AA32_MAP_VVAR] = { .name = "[vvar]", .fault = vvar_fault, @@ -319,11 +314,6 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { .name = "[vdso]", .mremap = aarch32_vdso_mremap, }, -#endif /* CONFIG_COMPAT_VDSO */ - [AA32_MAP_SIGPAGE] = { - .name = "[sigpage]", /* ABI */ - .pages = &aarch32_sig_page, - }, }; static int aarch32_alloc_kuser_vdso_page(void) @@ -362,25 +352,25 @@ static int aarch32_alloc_sigpage(void) return 0; } -#ifdef CONFIG_COMPAT_VDSO static int __aarch32_alloc_vdso_pages(void) { + + if (!IS_ENABLED(CONFIG_COMPAT_VDSO)) + return 0; + vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; return __vdso_init(VDSO_ABI_AA32); } -#endif /* CONFIG_COMPAT_VDSO */ static int __init aarch32_alloc_vdso_pages(void) { int ret; -#ifdef CONFIG_COMPAT_VDSO ret = __aarch32_alloc_vdso_pages(); if (ret) return ret; -#endif ret = aarch32_alloc_sigpage(); if (ret) @@ -449,14 +439,12 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (ret) goto out; -#ifdef CONFIG_COMPAT_VDSO - ret = __setup_additional_pages(VDSO_ABI_AA32, - mm, - bprm, - uses_interp); - if (ret) - goto out; -#endif /* CONFIG_COMPAT_VDSO */ + if (IS_ENABLED(CONFIG_COMPAT_VDSO)) { + ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm, + uses_interp); + if (ret) + goto out; + } ret = aarch32_sigreturn_setup(mm); out: @@ -497,8 +485,7 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int arch_setup_additional_pages(struct linux_binprm *bprm, - int uses_interp) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; int ret; @@ -506,11 +493,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, if (mmap_write_lock_killable(mm)) return -EINTR; - ret = __setup_additional_pages(VDSO_ABI_AA64, - mm, - bprm, - uses_interp); - + ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp); mmap_write_unlock(mm); return ret; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 7cba7623fcec..82801d98a2b7 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -105,7 +105,7 @@ SECTIONS *(.eh_frame) } - . = KIMAGE_VADDR + TEXT_OFFSET; + . = KIMAGE_VADDR; .head.text : { _text = .; @@ -274,4 +274,4 @@ ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ -ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned") +ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned") diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 318c8f2df245..043756db8f6e 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -57,9 +57,6 @@ config KVM_ARM_PMU Adds support for a virtual Performance Monitoring Unit (PMU) in virtual machines. -config KVM_INDIRECT_VECTORS - def_bool HARDEN_BRANCH_PREDICTOR || RANDOMIZE_BASE - endif # KVM endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index b588c3b5c2f0..acf9a993dfb6 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1259,6 +1259,40 @@ long kvm_arch_vm_ioctl(struct file *filp, } } +static int kvm_map_vectors(void) +{ + /* + * SV2 = ARM64_SPECTRE_V2 + * HEL2 = ARM64_HARDEN_EL2_VECTORS + * + * !SV2 + !HEL2 -> use direct vectors + * SV2 + !HEL2 -> use hardened vectors in place + * !SV2 + HEL2 -> allocate one vector slot and use exec mapping + * SV2 + HEL2 -> use hardened vectors and use exec mapping + */ + if (cpus_have_const_cap(ARM64_SPECTRE_V2)) { + __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs); + __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); + } + + if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { + phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs); + unsigned long size = __BP_HARDEN_HYP_VECS_SZ; + + /* + * Always allocate a spare vector slot, as we don't + * know yet which CPUs have a BP hardening slot that + * we can reuse. + */ + __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); + BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); + return create_hyp_exec_mappings(vect_pa, size, + &__kvm_bp_vect_base); + } + + return 0; +} + static void cpu_init_hyp_mode(void) { phys_addr_t pgd_ptr; @@ -1295,7 +1329,7 @@ static void cpu_init_hyp_mode(void) * at EL2. */ if (this_cpu_has_cap(ARM64_SSBS) && - arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + arm64_get_spectre_v4_state() == SPECTRE_VULNERABLE) { kvm_call_hyp_nvhe(__kvm_enable_ssbs); } } @@ -1552,10 +1586,6 @@ static int init_hyp_mode(void) } } - err = hyp_map_aux_data(); - if (err) - kvm_err("Cannot map host auxiliary data: %d\n", err); - return 0; out_err: diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index f54f0e89a71c..d898f0da5802 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,5 +10,4 @@ subdir-ccflags-y := -I$(incdir) \ -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += vhe/ nvhe/ -obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o +obj-$(CONFIG_KVM) += vhe/ nvhe/ smccc_wa.o diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 46b4dab933d0..7ea277b82967 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -116,35 +116,6 @@ el1_hvc_guest: ARM_SMCCC_ARCH_WORKAROUND_2) cbnz w1, el1_trap -#ifdef CONFIG_ARM64_SSBD -alternative_cb arm64_enable_wa2_handling - b wa2_end -alternative_cb_end - get_vcpu_ptr x2, x0 - ldr x0, [x2, #VCPU_WORKAROUND_FLAGS] - - // Sanitize the argument and update the guest flags - ldr x1, [sp, #8] // Guest's x1 - clz w1, w1 // Murphy's device: - lsr w1, w1, #5 // w1 = !!w1 without using - eor w1, w1, #1 // the flags... - bfi x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1 - str x0, [x2, #VCPU_WORKAROUND_FLAGS] - - /* Check that we actually need to perform the call */ - hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2 - cbz x0, wa2_end - - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 - smc #0 - - /* Don't leak data from the SMC call */ - mov x3, xzr -wa2_end: - mov x2, xzr - mov x1, xzr -#endif - wa_epilogue: mov x0, xzr add sp, sp, #16 @@ -288,7 +259,6 @@ SYM_CODE_START(__kvm_hyp_vector) valid_vect el1_error // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_vector) -#ifdef CONFIG_KVM_INDIRECT_VECTORS .macro hyp_ventry .align 7 1: esb @@ -338,4 +308,3 @@ SYM_CODE_START(__bp_harden_hyp_vecs) 1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ .org 1b SYM_CODE_END(__bp_harden_hyp_vecs) -#endif diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 0261308bf944..d0f07e8cc3ff 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -479,39 +479,6 @@ exit: return false; } -static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu) -{ - if (!cpus_have_final_cap(ARM64_SSBD)) - return false; - - return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); -} - -static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * The host runs with the workaround always present. If the - * guest wants it disabled, so be it... - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); -#endif -} - -static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * If the guest has disabled the workaround, bring it back on. - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); -#endif -} - static inline void __kvm_unexpected_el2_exception(void) { unsigned long addr, fixup; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 0970442d2dbc..8d3dd4f47924 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -202,8 +202,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __debug_switch_to_guest(vcpu); - __set_guest_arch_workaround_state(vcpu); - do { /* Jump in the fire! */ exit_code = __guest_enter(vcpu, host_ctxt); @@ -211,8 +209,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); - __set_host_arch_workaround_state(vcpu); - __sysreg_save_state_nvhe(guest_ctxt); __sysreg32_save_state(vcpu); __timer_disable_traps(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index c1da4f86ccac..ecf67e678203 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -131,8 +131,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); - __set_guest_arch_workaround_state(vcpu); - do { /* Jump in the fire! */ exit_code = __guest_enter(vcpu, host_ctxt); @@ -140,8 +138,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); - __set_host_arch_workaround_state(vcpu); - sysreg_save_guest_state_vhe(guest_ctxt); __deactivate_traps(vcpu); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 550dfa3e53cd..9824025ccc5c 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -24,27 +24,36 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) feature = smccc_get_arg1(vcpu); switch (feature) { case ARM_SMCCC_ARCH_WORKAROUND_1: - switch (kvm_arm_harden_branch_predictor()) { - case KVM_BP_HARDEN_UNKNOWN: + switch (arm64_get_spectre_v2_state()) { + case SPECTRE_VULNERABLE: break; - case KVM_BP_HARDEN_WA_NEEDED: + case SPECTRE_MITIGATED: val = SMCCC_RET_SUCCESS; break; - case KVM_BP_HARDEN_NOT_REQUIRED: + case SPECTRE_UNAFFECTED: val = SMCCC_RET_NOT_REQUIRED; break; } break; case ARM_SMCCC_ARCH_WORKAROUND_2: - switch (kvm_arm_have_ssbd()) { - case KVM_SSBD_FORCE_DISABLE: - case KVM_SSBD_UNKNOWN: + switch (arm64_get_spectre_v4_state()) { + case SPECTRE_VULNERABLE: break; - case KVM_SSBD_KERNEL: - val = SMCCC_RET_SUCCESS; - break; - case KVM_SSBD_FORCE_ENABLE: - case KVM_SSBD_MITIGATED: + case SPECTRE_MITIGATED: + /* + * SSBS everywhere: Indicate no firmware + * support, as the SSBS support will be + * indicated to the guest and the default is + * safe. + * + * Otherwise, expose a permanent mitigation + * to the guest, and hide SSBS so that the + * guest stays protected. + */ + if (cpus_have_final_cap(ARM64_SSBS)) + break; + fallthrough; + case SPECTRE_UNAFFECTED: val = SMCCC_RET_NOT_REQUIRED; break; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index f0d0312c0a55..81916e360b1e 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -269,6 +269,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) kvm_pmu_release_perf_event(&pmu->pmc[i]); + irq_work_sync(&vcpu->arch.pmu.overflow_work); } u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) @@ -434,6 +435,22 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) } /** + * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding + * to the event. + * This is why we need a callback to do it once outside of the NMI context. + */ +static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) +{ + struct kvm_vcpu *vcpu; + struct kvm_pmu *pmu; + + pmu = container_of(work, struct kvm_pmu, overflow_work); + vcpu = kvm_pmc_to_vcpu(pmu->pmc); + + kvm_vcpu_kick(vcpu); +} + +/** * When the perf event overflows, set the overflow status and inform the vcpu. */ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, @@ -465,7 +482,11 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, if (kvm_pmu_overflow_status(vcpu)) { kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); - kvm_vcpu_kick(vcpu); + + if (!in_nmi()) + kvm_vcpu_kick(vcpu); + else + irq_work_queue(&vcpu->arch.pmu.overflow_work); } cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); @@ -764,6 +785,9 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) return ret; } + init_irq_work(&vcpu->arch.pmu.overflow_work, + kvm_pmu_perf_overflow_notify_vcpu); + vcpu->arch.pmu.created = true; return 0; } diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 83415e96b589..db4056ecccfd 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -425,27 +425,30 @@ static int get_kernel_wa_level(u64 regid) { switch (regid) { case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: - switch (kvm_arm_harden_branch_predictor()) { - case KVM_BP_HARDEN_UNKNOWN: + switch (arm64_get_spectre_v2_state()) { + case SPECTRE_VULNERABLE: return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; - case KVM_BP_HARDEN_WA_NEEDED: + case SPECTRE_MITIGATED: return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL; - case KVM_BP_HARDEN_NOT_REQUIRED: + case SPECTRE_UNAFFECTED: return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED; } return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: - switch (kvm_arm_have_ssbd()) { - case KVM_SSBD_FORCE_DISABLE: - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; - case KVM_SSBD_KERNEL: - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL; - case KVM_SSBD_FORCE_ENABLE: - case KVM_SSBD_MITIGATED: + switch (arm64_get_spectre_v4_state()) { + case SPECTRE_MITIGATED: + /* + * As for the hypercall discovery, we pretend we + * don't have any FW mitigation if SSBS is there at + * all times. + */ + if (cpus_have_final_cap(ARM64_SSBS)) + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; + fallthrough; + case SPECTRE_UNAFFECTED: return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; - case KVM_SSBD_UNKNOWN: - default: - return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN; + case SPECTRE_VULNERABLE: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; } } @@ -462,14 +465,8 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) val = kvm_psci_version(vcpu, vcpu->kvm); break; case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: - val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; - break; case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; - - if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL && - kvm_arm_get_vcpu_workaround_2_flag(vcpu)) - val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED; break; default: return -ENOENT; @@ -527,34 +524,35 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED)) return -EINVAL; - wa_level = val & KVM_REG_FEATURE_LEVEL_MASK; - - if (get_kernel_wa_level(reg->id) < wa_level) - return -EINVAL; - /* The enabled bit must not be set unless the level is AVAIL. */ - if (wa_level != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL && - wa_level != val) + if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) && + (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL) return -EINVAL; - /* Are we finished or do we need to check the enable bit ? */ - if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL) - return 0; - /* - * If this kernel supports the workaround to be switched on - * or off, make sure it matches the requested setting. + * Map all the possible incoming states to the only two we + * really want to deal with. */ - switch (wa_level) { - case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: - kvm_arm_set_vcpu_workaround_2_flag(vcpu, - val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED); + switch (val & KVM_REG_FEATURE_LEVEL_MASK) { + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: + wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: - kvm_arm_set_vcpu_workaround_2_flag(vcpu, true); + wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; break; + default: + return -EINVAL; } + /* + * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the + * other way around. + */ + if (get_kernel_wa_level(reg->id) < wa_level) + return -EINVAL; + return 0; default: return -ENOENT; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index ee33875c5c2a..f6e8b4a75cbb 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -319,10 +319,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.reset_state.reset = false; } - /* Default workaround setup is enabled (if supported) */ - if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL) - vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; - /* Reset timer */ ret = kvm_timer_vcpu_reset(vcpu); out: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 077293b5115f..9ca270603980 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1131,6 +1131,11 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, if (!vcpu_has_sve(vcpu)) val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT); val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT); + if (!(val & (0xfUL << ID_AA64PFR0_CSV2_SHIFT)) && + arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) + val |= (1UL << ID_AA64PFR0_CSV2_SHIFT); + } else if (id == SYS_ID_AA64PFR1_EL1) { + val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT); } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) { val &= ~((0xfUL << ID_AA64ISAR1_APA_SHIFT) | (0xfUL << ID_AA64ISAR1_API_SHIFT) | @@ -1382,6 +1387,13 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } +static bool access_mte_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + kvm_inject_undefined(vcpu); + return false; +} + /* sys_reg_desc initialiser for known cpufeature ID registers */ #define ID_SANITISED(name) { \ SYS_DESC(SYS_##name), \ @@ -1547,6 +1559,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, + + { SYS_DESC(SYS_RGSR_EL1), access_mte_regs }, + { SYS_DESC(SYS_GCR_EL1), access_mte_regs }, + { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, @@ -1571,6 +1587,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi }, { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, + { SYS_DESC(SYS_TFSR_EL1), access_mte_regs }, + { SYS_DESC(SYS_TFSRE0_EL1), access_mte_regs }, + { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 5c786b915cd3..52d6f24f65dc 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -1001,8 +1001,8 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) raw_spin_lock_irqsave(&irq->irq_lock, flags); /* - * An access targetting Group0 SGIs can only generate - * those, while an access targetting Group1 SGIs can + * An access targeting Group0 SGIs can only generate + * those, while an access targeting Group1 SGIs can * generate interrupts of either group. */ if (!irq->group || allow_group1) { diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 2fc253466dbf..d31e1169d9b8 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -16,3 +16,5 @@ lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o obj-$(CONFIG_CRC32) += crc32.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o + +obj-$(CONFIG_ARM64_MTE) += mte.o diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S new file mode 100644 index 000000000000..03ca6d8b8670 --- /dev/null +++ b/arch/arm64/lib/mte.S @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 ARM Ltd. + */ +#include <linux/linkage.h> + +#include <asm/alternative.h> +#include <asm/assembler.h> +#include <asm/mte.h> +#include <asm/page.h> +#include <asm/sysreg.h> + + .arch armv8.5-a+memtag + +/* + * multitag_transfer_size - set \reg to the block size that is accessed by the + * LDGM/STGM instructions. + */ + .macro multitag_transfer_size, reg, tmp + mrs_s \reg, SYS_GMID_EL1 + ubfx \reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE + mov \tmp, #4 + lsl \reg, \tmp, \reg + .endm + +/* + * Clear the tags in a page + * x0 - address of the page to be cleared + */ +SYM_FUNC_START(mte_clear_page_tags) + multitag_transfer_size x1, x2 +1: stgm xzr, [x0] + add x0, x0, x1 + tst x0, #(PAGE_SIZE - 1) + b.ne 1b + ret +SYM_FUNC_END(mte_clear_page_tags) + +/* + * Copy the tags from the source page to the destination one + * x0 - address of the destination page + * x1 - address of the source page + */ +SYM_FUNC_START(mte_copy_page_tags) + mov x2, x0 + mov x3, x1 + multitag_transfer_size x5, x6 +1: ldgm x4, [x3] + stgm x4, [x2] + add x2, x2, x5 + add x3, x3, x5 + tst x2, #(PAGE_SIZE - 1) + b.ne 1b + ret +SYM_FUNC_END(mte_copy_page_tags) + +/* + * Read tags from a user buffer (one tag per byte) and set the corresponding + * tags at the given kernel address. Used by PTRACE_POKEMTETAGS. + * x0 - kernel address (to) + * x1 - user buffer (from) + * x2 - number of tags/bytes (n) + * Returns: + * x0 - number of tags read/set + */ +SYM_FUNC_START(mte_copy_tags_from_user) + mov x3, x1 + cbz x2, 2f +1: + uao_user_alternative 2f, ldrb, ldtrb, w4, x1, 0 + lsl x4, x4, #MTE_TAG_SHIFT + stg x4, [x0], #MTE_GRANULE_SIZE + add x1, x1, #1 + subs x2, x2, #1 + b.ne 1b + + // exception handling and function return +2: sub x0, x1, x3 // update the number of tags set + ret +SYM_FUNC_END(mte_copy_tags_from_user) + +/* + * Get the tags from a kernel address range and write the tag values to the + * given user buffer (one tag per byte). Used by PTRACE_PEEKMTETAGS. + * x0 - user buffer (to) + * x1 - kernel address (from) + * x2 - number of tags/bytes (n) + * Returns: + * x0 - number of tags read/set + */ +SYM_FUNC_START(mte_copy_tags_to_user) + mov x3, x0 + cbz x2, 2f +1: + ldg x4, [x1] + ubfx x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE + uao_user_alternative 2f, strb, sttrb, w4, x0, 0 + add x0, x0, #1 + add x1, x1, #MTE_GRANULE_SIZE + subs x2, x2, #1 + b.ne 1b + + // exception handling and function return +2: sub x0, x0, x3 // update the number of tags copied + ret +SYM_FUNC_END(mte_copy_tags_to_user) + +/* + * Save the tags in a page + * x0 - page address + * x1 - tag storage + */ +SYM_FUNC_START(mte_save_page_tags) + multitag_transfer_size x7, x5 +1: + mov x2, #0 +2: + ldgm x5, [x0] + orr x2, x2, x5 + add x0, x0, x7 + tst x0, #0xFF // 16 tag values fit in a register, + b.ne 2b // which is 16*16=256 bytes + + str x2, [x1], #8 + + tst x0, #(PAGE_SIZE - 1) + b.ne 1b + + ret +SYM_FUNC_END(mte_save_page_tags) + +/* + * Restore the tags in a page + * x0 - page address + * x1 - tag storage + */ +SYM_FUNC_START(mte_restore_page_tags) + multitag_transfer_size x7, x5 +1: + ldr x2, [x1], #8 +2: + stgm x2, [x0] + add x0, x0, x7 + tst x0, #0xFF + b.ne 2b + + tst x0, #(PAGE_SIZE - 1) + b.ne 1b + + ret +SYM_FUNC_END(mte_restore_page_tags) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index d91030f0ffee..5ead3c3de3b6 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,10 +4,11 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_CORE) += dump.o +obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o +obj-$(CONFIG_ARM64_MTE) += mteswap.o KASAN_SANITIZE_physaddr.o += n obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 9b11c096a042..001737a8f309 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -27,6 +27,10 @@ static DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; +static unsigned long max_pinned_asids; +static unsigned long nr_pinned_asids; +static unsigned long *pinned_asid_map; + #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) #define ASID_FIRST_VERSION (1UL << asid_bits) @@ -72,7 +76,7 @@ void verify_cpu_asid_bits(void) } } -static void set_kpti_asid_bits(void) +static void set_kpti_asid_bits(unsigned long *map) { unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); /* @@ -81,13 +85,15 @@ static void set_kpti_asid_bits(void) * is set, then the ASID will map only userspace. Thus * mark even as reserved for kernel. */ - memset(asid_map, 0xaa, len); + memset(map, 0xaa, len); } static void set_reserved_asid_bits(void) { - if (arm64_kernel_unmapped_at_el0()) - set_kpti_asid_bits(); + if (pinned_asid_map) + bitmap_copy(asid_map, pinned_asid_map, NUM_USER_ASIDS); + else if (arm64_kernel_unmapped_at_el0()) + set_kpti_asid_bits(asid_map); else bitmap_clear(asid_map, 0, NUM_USER_ASIDS); } @@ -166,6 +172,14 @@ static u64 new_context(struct mm_struct *mm) return newasid; /* + * If it is pinned, we can keep using it. Note that reserved + * takes priority, because even if it is also pinned, we need to + * update the generation into the reserved_asids. + */ + if (refcount_read(&mm->context.pinned)) + return newasid; + + /* * We had a valid ASID in a previous life, so try to re-use * it if possible. */ @@ -256,6 +270,71 @@ switch_mm_fastpath: cpu_switch_mm(mm->pgd, mm); } +unsigned long arm64_mm_context_get(struct mm_struct *mm) +{ + unsigned long flags; + u64 asid; + + if (!pinned_asid_map) + return 0; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + + asid = atomic64_read(&mm->context.id); + + if (refcount_inc_not_zero(&mm->context.pinned)) + goto out_unlock; + + if (nr_pinned_asids >= max_pinned_asids) { + asid = 0; + goto out_unlock; + } + + if (!asid_gen_match(asid)) { + /* + * We went through one or more rollover since that ASID was + * used. Ensure that it is still valid, or generate a new one. + */ + asid = new_context(mm); + atomic64_set(&mm->context.id, asid); + } + + nr_pinned_asids++; + __set_bit(asid2idx(asid), pinned_asid_map); + refcount_set(&mm->context.pinned, 1); + +out_unlock: + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + + asid &= ~ASID_MASK; + + /* Set the equivalent of USER_ASID_BIT */ + if (asid && arm64_kernel_unmapped_at_el0()) + asid |= 1; + + return asid; +} +EXPORT_SYMBOL_GPL(arm64_mm_context_get); + +void arm64_mm_context_put(struct mm_struct *mm) +{ + unsigned long flags; + u64 asid = atomic64_read(&mm->context.id); + + if (!pinned_asid_map) + return; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + + if (refcount_dec_and_test(&mm->context.pinned)) { + __clear_bit(asid2idx(asid), pinned_asid_map); + nr_pinned_asids--; + } + + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +EXPORT_SYMBOL_GPL(arm64_mm_context_put); + /* Errata workaround post TTBRx_EL1 update. */ asmlinkage void post_ttbr_update_workaround(void) { @@ -296,8 +375,11 @@ static int asids_update_limit(void) { unsigned long num_available_asids = NUM_USER_ASIDS; - if (arm64_kernel_unmapped_at_el0()) + if (arm64_kernel_unmapped_at_el0()) { num_available_asids /= 2; + if (pinned_asid_map) + set_kpti_asid_bits(pinned_asid_map); + } /* * Expect allocation after rollover to fail if we don't have at least * one more ASID than CPUs. ASID #0 is reserved for init_mm. @@ -305,6 +387,13 @@ static int asids_update_limit(void) WARN_ON(num_available_asids - 1 <= num_possible_cpus()); pr_info("ASID allocator initialised with %lu entries\n", num_available_asids); + + /* + * There must always be an ASID available after rollover. Ensure that, + * even if all CPUs have a reserved ASID and the maximum number of ASIDs + * are pinned, there still is at least one empty slot in the ASID map. + */ + max_pinned_asids = num_available_asids - num_possible_cpus() - 2; return 0; } arch_initcall(asids_update_limit); @@ -319,13 +408,17 @@ static int asids_init(void) panic("Failed to allocate bitmap for %lu ASIDs\n", NUM_USER_ASIDS); + pinned_asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), + sizeof(*pinned_asid_map), GFP_KERNEL); + nr_pinned_asids = 0; + /* * We cannot call set_reserved_asid_bits() here because CPU * caps are not finalized yet, so it is safer to assume KPTI * and reserve kernel ASID's from beginning. */ if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) - set_kpti_asid_bits(); + set_kpti_asid_bits(asid_map); return 0; } early_initcall(asids_init); diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 2ee7b73433a5..70a71f38b6a9 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -6,21 +6,32 @@ * Copyright (C) 2012 ARM Ltd. */ +#include <linux/bitops.h> #include <linux/mm.h> #include <asm/page.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> +#include <asm/mte.h> -void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) +void copy_highpage(struct page *to, struct page *from) { - struct page *page = virt_to_page(kto); + struct page *kto = page_address(to); + struct page *kfrom = page_address(from); + copy_page(kto, kfrom); - flush_dcache_page(page); + + if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) { + set_bit(PG_mte_tagged, &to->flags); + mte_copy_page_tags(kto, kfrom); + } } -EXPORT_SYMBOL_GPL(__cpu_copy_user_page); +EXPORT_SYMBOL(copy_highpage); -void __cpu_clear_user_page(void *kaddr, unsigned long vaddr) +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) { - clear_page(kaddr); + copy_highpage(to, from); + flush_dcache_page(to); } -EXPORT_SYMBOL_GPL(__cpu_clear_user_page); +EXPORT_SYMBOL_GPL(copy_user_highpage); diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index eee1732ab6cd..aa0060178343 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -14,9 +14,7 @@ int fixup_exception(struct pt_regs *regs) if (!fixup) return 0; - if (IS_ENABLED(CONFIG_BPF_JIT) && - regs->pc >= BPF_JIT_REGION_START && - regs->pc < BPF_JIT_REGION_END) + if (in_bpf_jit(regs)) return arm64_bpf_fixup_exception(fixup, regs); regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f07333e86c2f..94c99c1c19e3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -218,7 +218,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma, pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); } while (pteval != old_pteval); - flush_tlb_fix_spurious_fault(vma, address); + /* Invalidate a stale read-only entry */ + if (dirty) + flush_tlb_page(vma, address); return 1; } @@ -641,6 +643,13 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 0; } +static int do_tag_check_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + do_bad_area(addr, esr, regs); + return 0; +} + static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "ttbr address size fault" }, { do_bad, SIGKILL, SI_KERNEL, "level 1 address size fault" }, @@ -659,7 +668,7 @@ static const struct fault_info fault_info[] = { { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" }, - { do_bad, SIGKILL, SI_KERNEL, "unknown 17" }, + { do_tag_check_fault, SIGSEGV, SEGV_MTESERR, "synchronous tag check fault" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 18" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 19" }, { do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" }, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 75df62fea1b6..936c4762dadf 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -122,7 +122,7 @@ static bool pgattr_change_is_safe(u64 old, u64 new) * The following mapping attributes may be updated in live * kernel mappings without the need for break-before-make. */ - static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG; + pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG; /* creating or taking down mappings is always safe */ if (old == 0 || new == 0) @@ -136,6 +136,17 @@ static bool pgattr_change_is_safe(u64 old, u64 new) if (old & ~new & PTE_NG) return false; + /* + * Changing the memory type between Normal and Normal-Tagged is safe + * since Tagged is considered a permission attribute from the + * mismatched attribute aliases perspective. + */ + if (((old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) || + (old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED)) && + ((new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) || + (new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED))) + mask |= PTE_ATTRINDX_MASK; + return ((old ^ new) & ~mask) == 0; } @@ -491,7 +502,12 @@ static void __init map_mem(pgd_t *pgdp) if (memblock_is_nomap(reg)) continue; - __map_memblock(pgdp, start, end, PAGE_KERNEL, flags); + /* + * The linear map must allow allocation tags reading/writing + * if MTE is present. Otherwise, it has the same attributes as + * PAGE_KERNEL. + */ + __map_memblock(pgdp, start, end, PAGE_KERNEL_TAGGED, flags); } /* diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c new file mode 100644 index 000000000000..c52c1847079c --- /dev/null +++ b/arch/arm64/mm/mteswap.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/pagemap.h> +#include <linux/xarray.h> +#include <linux/slab.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <asm/mte.h> + +static DEFINE_XARRAY(mte_pages); + +void *mte_allocate_tag_storage(void) +{ + /* tags granule is 16 bytes, 2 tags stored per byte */ + return kmalloc(PAGE_SIZE / 16 / 2, GFP_KERNEL); +} + +void mte_free_tag_storage(char *storage) +{ + kfree(storage); +} + +int mte_save_tags(struct page *page) +{ + void *tag_storage, *ret; + + if (!test_bit(PG_mte_tagged, &page->flags)) + return 0; + + tag_storage = mte_allocate_tag_storage(); + if (!tag_storage) + return -ENOMEM; + + mte_save_page_tags(page_address(page), tag_storage); + + /* page_private contains the swap entry.val set in do_swap_page */ + ret = xa_store(&mte_pages, page_private(page), tag_storage, GFP_KERNEL); + if (WARN(xa_is_err(ret), "Failed to store MTE tags")) { + mte_free_tag_storage(tag_storage); + return xa_err(ret); + } else if (ret) { + /* Entry is being replaced, free the old entry */ + mte_free_tag_storage(ret); + } + + return 0; +} + +bool mte_restore_tags(swp_entry_t entry, struct page *page) +{ + void *tags = xa_load(&mte_pages, entry.val); + + if (!tags) + return false; + + mte_restore_page_tags(page_address(page), tags); + + return true; +} + +void mte_invalidate_tags(int type, pgoff_t offset) +{ + swp_entry_t entry = swp_entry(type, offset); + void *tags = xa_erase(&mte_pages, entry.val); + + mte_free_tag_storage(tags); +} + +void mte_invalidate_tags_area(int type) +{ + swp_entry_t entry = swp_entry(type, 0); + swp_entry_t last_entry = swp_entry(type + 1, 0); + void *tags; + + XA_STATE(xa_state, &mte_pages, entry.val); + + xa_lock(&mte_pages); + xas_for_each(&xa_state, tags, last_entry.val - 1) { + __xa_erase(&mte_pages, xa_state.xa_index); + mte_free_tag_storage(tags); + } + xa_unlock(&mte_pages); +} diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 73f8b49d485c..676deb220b99 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -46,7 +46,11 @@ EXPORT_SYMBOL(node_to_cpumask_map); */ const struct cpumask *cpumask_of_node(int node) { - if (WARN_ON(node >= nr_node_ids)) + + if (node == NUMA_NO_NODE) + return cpu_all_mask; + + if (WARN_ON(node < 0 || node >= nr_node_ids)) return cpu_none_mask; if (WARN_ON(node_to_cpumask_map[node] == NULL)) @@ -448,7 +452,7 @@ static int __init dummy_numa_init(void) * arm64_numa_init() - Initialize NUMA * * Try each configured NUMA initialization method until one succeeds. The - * last fallback is dummy single node config encomapssing whole memory. + * last fallback is dummy single node config encompassing whole memory. */ void __init arm64_numa_init(void) { diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 23f648c2a199..1b94f5b82654 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/vmalloc.h> +#include <asm/cacheflush.h> #include <asm/set_memory.h> #include <asm/tlbflush.h> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 796e47a571e6..23c326a06b2d 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -18,6 +18,7 @@ #include <asm/cpufeature.h> #include <asm/alternative.h> #include <asm/smp.h> +#include <asm/sysreg.h> #ifdef CONFIG_ARM64_64K_PAGES #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K @@ -44,14 +45,18 @@ #define TCR_KASAN_FLAGS 0 #endif -/* Default MAIR_EL1 */ +/* + * Default MAIR_EL1. MT_NORMAL_TAGGED is initially mapped as Normal memory and + * changed during __cpu_setup to Normal Tagged if the system supports MTE. + */ #define MAIR_EL1_SET \ (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \ MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ - MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL_TAGGED)) #ifdef CONFIG_CPU_PM /** @@ -421,6 +426,29 @@ SYM_FUNC_START(__cpu_setup) * Memory region attributes */ mov_q x5, MAIR_EL1_SET +#ifdef CONFIG_ARM64_MTE + /* + * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported + * (ID_AA64PFR1_EL1[11:8] > 1). + */ + mrs x10, ID_AA64PFR1_EL1 + ubfx x10, x10, #ID_AA64PFR1_MTE_SHIFT, #4 + cmp x10, #ID_AA64PFR1_MTE + b.lt 1f + + /* Normal Tagged memory type at the corresponding MAIR index */ + mov x10, #MAIR_ATTR_NORMAL_TAGGED + bfi x5, x10, #(8 * MT_NORMAL_TAGGED), #8 + + /* initialize GCR_EL1: all non-zero tags excluded by default */ + mov x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK) + msr_s SYS_GCR_EL1, x10 + + /* clear any pending tag check faults in TFSR*_EL1 */ + msr_s SYS_TFSR_EL1, xzr + msr_s SYS_TFSRE0_EL1, xzr +1: +#endif msr mair_el1, x5 /* * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/ptdump.c index 0b8da1cc1c07..807dc634bbd2 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/ptdump.c @@ -41,6 +41,8 @@ static struct addr_marker address_markers[] = { { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" }, { KASAN_SHADOW_END, "Kasan shadow end" }, #endif + { BPF_JIT_REGION_START, "BPF start" }, + { BPF_JIT_REGION_END, "BPF end" }, { MODULES_VADDR, "Modules start" }, { MODULES_END, "Modules end" }, { VMALLOC_START, "vmalloc() area" }, @@ -169,6 +171,10 @@ static const struct prot_bits pte_bits[] = { .mask = PTE_ATTRINDX_MASK, .val = PTE_ATTRINDX(MT_NORMAL), .set = "MEM/NORMAL", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL_TAGGED), + .set = "MEM/NORMAL-TAGGED", } }; diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index fc5419ac64c8..7f1266c24f6b 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -19,7 +19,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, unwind_for_each_frame(&state, task, regs, 0) { addr = unwind_get_return_address(&state); - if (!addr || !consume_entry(cookie, addr, false)) + if (!addr || !consume_entry(cookie, addr)) break; } } @@ -56,7 +56,7 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, return -EINVAL; #endif - if (!consume_entry(cookie, addr, false)) + if (!consume_entry(cookie, addr)) return -EINVAL; } diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 9ccbf0576cd0..a7f3e12cfbdb 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -27,7 +27,7 @@ static inline void signal_compat_build_tests(void) */ BUILD_BUG_ON(NSIGILL != 11); BUILD_BUG_ON(NSIGFPE != 15); - BUILD_BUG_ON(NSIGSEGV != 7); + BUILD_BUG_ON(NSIGSEGV != 9); BUILD_BUG_ON(NSIGBUS != 5); BUILD_BUG_ON(NSIGTRAP != 5); BUILD_BUG_ON(NSIGCHLD != 6); diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 2fd698e28e4d..8627fda8d993 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -18,13 +18,13 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct unwind_state state; unsigned long addr; - if (regs && !consume_entry(cookie, regs->ip, false)) + if (regs && !consume_entry(cookie, regs->ip)) return; for (unwind_start(&state, task, regs, NULL); !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); - if (!addr || !consume_entry(cookie, addr, false)) + if (!addr || !consume_entry(cookie, addr)) break; } } @@ -72,7 +72,7 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, if (!addr) return -EINVAL; - if (!consume_entry(cookie, addr, false)) + if (!consume_entry(cookie, addr)) return -EINVAL; } @@ -114,7 +114,7 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, { const void __user *fp = (const void __user *)regs->bp; - if (!consume_entry(cookie, regs->ip, false)) + if (!consume_entry(cookie, regs->ip)) return; while (1) { @@ -128,7 +128,7 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, break; if (!frame.ret_addr) break; - if (!consume_entry(cookie, frame.ret_addr, false)) + if (!consume_entry(cookie, frame.ret_addr)) break; fp = frame.next_fp; } diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index ec782e4a0fe4..e670785a6201 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -811,8 +811,7 @@ static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) return (fwspec && fwspec->ops) ? fwspec->ops : NULL; } -static inline int iort_add_device_replay(const struct iommu_ops *ops, - struct device *dev) +static inline int iort_add_device_replay(struct device *dev) { int err = 0; @@ -1072,7 +1071,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, */ if (!err) { ops = iort_fwspec_iommu_ops(dev); - err = iort_add_device_replay(ops, dev); + err = iort_add_device_replay(dev); } /* Ignore all other errors apart from EPROBE_DEFER */ @@ -1087,11 +1086,6 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev, } #else -static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev) -{ return NULL; } -static inline int iort_add_device_replay(const struct iommu_ops *ops, - struct device *dev) -{ return 0; } int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } const struct iommu_ops *iort_iommu_configure_id(struct device *dev, diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index b4b9ce97f415..840754dcc6ca 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -78,11 +78,26 @@ struct sdei_crosscall_args { int first_error; }; -#define CROSSCALL_INIT(arg, event) (arg.event = event, \ - arg.first_error = 0, \ - atomic_set(&arg.errors, 0)) +#define CROSSCALL_INIT(arg, event) \ + do { \ + arg.event = event; \ + arg.first_error = 0; \ + atomic_set(&arg.errors, 0); \ + } while (0) + +static inline int sdei_do_local_call(smp_call_func_t fn, + struct sdei_event *event) +{ + struct sdei_crosscall_args arg; + + CROSSCALL_INIT(arg, event); + fn(&arg); -static inline int sdei_do_cross_call(void *fn, struct sdei_event * event) + return arg.first_error; +} + +static inline int sdei_do_cross_call(smp_call_func_t fn, + struct sdei_event *event) { struct sdei_crosscall_args arg; @@ -114,26 +129,7 @@ static int sdei_to_linux_errno(unsigned long sdei_err) return -ENOMEM; } - /* Not an error value ... */ - return sdei_err; -} - -/* - * If x0 is any of these values, then the call failed, use sdei_to_linux_errno() - * to translate. - */ -static int sdei_is_err(struct arm_smccc_res *res) -{ - switch (res->a0) { - case SDEI_NOT_SUPPORTED: - case SDEI_INVALID_PARAMETERS: - case SDEI_DENIED: - case SDEI_PENDING: - case SDEI_OUT_OF_RESOURCE: - return true; - } - - return false; + return 0; } static int invoke_sdei_fn(unsigned long function_id, unsigned long arg0, @@ -141,14 +137,13 @@ static int invoke_sdei_fn(unsigned long function_id, unsigned long arg0, unsigned long arg3, unsigned long arg4, u64 *result) { - int err = 0; + int err; struct arm_smccc_res res; if (sdei_firmware_call) { sdei_firmware_call(function_id, arg0, arg1, arg2, arg3, arg4, &res); - if (sdei_is_err(&res)) - err = sdei_to_linux_errno(res.a0); + err = sdei_to_linux_errno(res.a0); } else { /* * !sdei_firmware_call means we failed to probe or called @@ -210,36 +205,34 @@ static struct sdei_event *sdei_event_create(u32 event_num, lockdep_assert_held(&sdei_events_lock); event = kzalloc(sizeof(*event), GFP_KERNEL); - if (!event) - return ERR_PTR(-ENOMEM); + if (!event) { + err = -ENOMEM; + goto fail; + } INIT_LIST_HEAD(&event->list); event->event_num = event_num; err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_PRIORITY, &result); - if (err) { - kfree(event); - return ERR_PTR(err); - } + if (err) + goto fail; event->priority = result; err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_TYPE, &result); - if (err) { - kfree(event); - return ERR_PTR(err); - } + if (err) + goto fail; event->type = result; if (event->type == SDEI_EVENT_TYPE_SHARED) { reg = kzalloc(sizeof(*reg), GFP_KERNEL); if (!reg) { - kfree(event); - return ERR_PTR(-ENOMEM); + err = -ENOMEM; + goto fail; } - reg->event_num = event_num; + reg->event_num = event->event_num; reg->priority = event->priority; reg->callback = cb; @@ -251,8 +244,8 @@ static struct sdei_event *sdei_event_create(u32 event_num, regs = alloc_percpu(struct sdei_registered_event); if (!regs) { - kfree(event); - return ERR_PTR(-ENOMEM); + err = -ENOMEM; + goto fail; } for_each_possible_cpu(cpu) { @@ -272,6 +265,10 @@ static struct sdei_event *sdei_event_create(u32 event_num, spin_unlock(&sdei_list_lock); return event; + +fail: + kfree(event); + return ERR_PTR(err); } static void sdei_event_destroy_llocked(struct sdei_event *event) @@ -490,16 +487,6 @@ static void _local_event_unregister(void *data) sdei_cross_call_return(arg, err); } -static int _sdei_event_unregister(struct sdei_event *event) -{ - lockdep_assert_held(&sdei_events_lock); - - if (event->type == SDEI_EVENT_TYPE_SHARED) - return sdei_api_event_unregister(event->event_num); - - return sdei_do_cross_call(_local_event_unregister, event); -} - int sdei_event_unregister(u32 event_num) { int err; @@ -509,24 +496,27 @@ int sdei_event_unregister(u32 event_num) mutex_lock(&sdei_events_lock); event = sdei_event_find(event_num); - do { - if (!event) { - pr_warn("Event %u not registered\n", event_num); - err = -ENOENT; - break; - } + if (!event) { + pr_warn("Event %u not registered\n", event_num); + err = -ENOENT; + goto unlock; + } - spin_lock(&sdei_list_lock); - event->reregister = false; - event->reenable = false; - spin_unlock(&sdei_list_lock); + spin_lock(&sdei_list_lock); + event->reregister = false; + event->reenable = false; + spin_unlock(&sdei_list_lock); - err = _sdei_event_unregister(event); - if (err) - break; + if (event->type == SDEI_EVENT_TYPE_SHARED) + err = sdei_api_event_unregister(event->event_num); + else + err = sdei_do_cross_call(_local_event_unregister, event); - sdei_event_destroy(event); - } while (0); + if (err) + goto unlock; + + sdei_event_destroy(event); +unlock: mutex_unlock(&sdei_events_lock); return err; @@ -547,7 +537,7 @@ static int sdei_unregister_shared(void) if (event->type != SDEI_EVENT_TYPE_SHARED) continue; - err = _sdei_event_unregister(event); + err = sdei_api_event_unregister(event->event_num); if (err) break; } @@ -581,25 +571,6 @@ static void _local_event_register(void *data) sdei_cross_call_return(arg, err); } -static int _sdei_event_register(struct sdei_event *event) -{ - int err; - - lockdep_assert_held(&sdei_events_lock); - - if (event->type == SDEI_EVENT_TYPE_SHARED) - return sdei_api_event_register(event->event_num, - sdei_entry_point, - event->registered, - SDEI_EVENT_REGISTER_RM_ANY, 0); - - err = sdei_do_cross_call(_local_event_register, event); - if (err) - sdei_do_cross_call(_local_event_unregister, event); - - return err; -} - int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg) { int err; @@ -608,63 +579,44 @@ int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg) WARN_ON(in_nmi()); mutex_lock(&sdei_events_lock); - do { - if (sdei_event_find(event_num)) { - pr_warn("Event %u already registered\n", event_num); - err = -EBUSY; - break; - } - - event = sdei_event_create(event_num, cb, arg); - if (IS_ERR(event)) { - err = PTR_ERR(event); - pr_warn("Failed to create event %u: %d\n", event_num, - err); - break; - } - - cpus_read_lock(); - err = _sdei_event_register(event); - if (err) { - sdei_event_destroy(event); - pr_warn("Failed to register event %u: %d\n", event_num, - err); - } else { - spin_lock(&sdei_list_lock); - event->reregister = true; - spin_unlock(&sdei_list_lock); - } - cpus_read_unlock(); - } while (0); - mutex_unlock(&sdei_events_lock); - - return err; -} - -static int sdei_reregister_event_llocked(struct sdei_event *event) -{ - int err; - - lockdep_assert_held(&sdei_events_lock); - lockdep_assert_held(&sdei_list_lock); + if (sdei_event_find(event_num)) { + pr_warn("Event %u already registered\n", event_num); + err = -EBUSY; + goto unlock; + } - err = _sdei_event_register(event); - if (err) { - pr_err("Failed to re-register event %u\n", event->event_num); - sdei_event_destroy_llocked(event); - return err; + event = sdei_event_create(event_num, cb, arg); + if (IS_ERR(event)) { + err = PTR_ERR(event); + pr_warn("Failed to create event %u: %d\n", event_num, err); + goto unlock; } - if (event->reenable) { - if (event->type == SDEI_EVENT_TYPE_SHARED) - err = sdei_api_event_enable(event->event_num); - else - err = sdei_do_cross_call(_local_event_enable, event); + cpus_read_lock(); + if (event->type == SDEI_EVENT_TYPE_SHARED) { + err = sdei_api_event_register(event->event_num, + sdei_entry_point, + event->registered, + SDEI_EVENT_REGISTER_RM_ANY, 0); + } else { + err = sdei_do_cross_call(_local_event_register, event); + if (err) + sdei_do_cross_call(_local_event_unregister, event); } - if (err) - pr_err("Failed to re-enable event %u\n", event->event_num); + if (err) { + sdei_event_destroy(event); + pr_warn("Failed to register event %u: %d\n", event_num, err); + goto cpu_unlock; + } + spin_lock(&sdei_list_lock); + event->reregister = true; + spin_unlock(&sdei_list_lock); +cpu_unlock: + cpus_read_unlock(); +unlock: + mutex_unlock(&sdei_events_lock); return err; } @@ -680,9 +632,24 @@ static int sdei_reregister_shared(void) continue; if (event->reregister) { - err = sdei_reregister_event_llocked(event); - if (err) + err = sdei_api_event_register(event->event_num, + sdei_entry_point, event->registered, + SDEI_EVENT_REGISTER_RM_ANY, 0); + if (err) { + pr_err("Failed to re-register event %u\n", + event->event_num); + sdei_event_destroy_llocked(event); break; + } + } + + if (event->reenable) { + err = sdei_api_event_enable(event->event_num); + if (err) { + pr_err("Failed to re-enable event %u\n", + event->event_num); + break; + } } } spin_unlock(&sdei_list_lock); @@ -694,7 +661,7 @@ static int sdei_reregister_shared(void) static int sdei_cpuhp_down(unsigned int cpu) { struct sdei_event *event; - struct sdei_crosscall_args arg; + int err; /* un-register private events */ spin_lock(&sdei_list_lock); @@ -702,12 +669,11 @@ static int sdei_cpuhp_down(unsigned int cpu) if (event->type == SDEI_EVENT_TYPE_SHARED) continue; - CROSSCALL_INIT(arg, event); - /* call the cross-call function locally... */ - _local_event_unregister(&arg); - if (arg.first_error) + err = sdei_do_local_call(_local_event_unregister, event); + if (err) { pr_err("Failed to unregister event %u: %d\n", - event->event_num, arg.first_error); + event->event_num, err); + } } spin_unlock(&sdei_list_lock); @@ -717,7 +683,7 @@ static int sdei_cpuhp_down(unsigned int cpu) static int sdei_cpuhp_up(unsigned int cpu) { struct sdei_event *event; - struct sdei_crosscall_args arg; + int err; /* re-register/enable private events */ spin_lock(&sdei_list_lock); @@ -726,20 +692,19 @@ static int sdei_cpuhp_up(unsigned int cpu) continue; if (event->reregister) { - CROSSCALL_INIT(arg, event); - /* call the cross-call function locally... */ - _local_event_register(&arg); - if (arg.first_error) + err = sdei_do_local_call(_local_event_register, event); + if (err) { pr_err("Failed to re-register event %u: %d\n", - event->event_num, arg.first_error); + event->event_num, err); + } } if (event->reenable) { - CROSSCALL_INIT(arg, event); - _local_event_enable(&arg); - if (arg.first_error) + err = sdei_do_local_call(_local_event_enable, event); + if (err) { pr_err("Failed to re-enable event %u: %d\n", - event->event_num, arg.first_error); + event->event_num, err); + } } } spin_unlock(&sdei_list_lock); @@ -976,7 +941,7 @@ static int sdei_get_conduit(struct platform_device *pdev) } pr_warn("invalid \"method\" property: %s\n", method); - } else if (IS_ENABLED(CONFIG_ACPI) && !acpi_disabled) { + } else if (!acpi_disabled) { if (acpi_psci_use_hvc()) { sdei_firmware_call = &sdei_smccc_hvc; return SMCCC_CONDUIT_HVC; @@ -1000,8 +965,6 @@ static int sdei_probe(struct platform_device *pdev) return 0; err = sdei_api_get_version(&ver); - if (err == -EOPNOTSUPP) - pr_err("advertised but not implemented in platform firmware\n"); if (err) { pr_err("Failed to get SDEI version: %d\n", err); sdei_mark_interface_broken(); @@ -1099,16 +1062,20 @@ static bool __init sdei_present_acpi(void) static int __init sdei_init(void) { - int ret = platform_driver_register(&sdei_driver); - - if (!ret && sdei_present_acpi()) { - struct platform_device *pdev; - - pdev = platform_device_register_simple(sdei_driver.driver.name, - 0, NULL, 0); - if (IS_ERR(pdev)) - pr_info("Failed to register ACPI:SDEI platform device %ld\n", - PTR_ERR(pdev)); + struct platform_device *pdev; + int ret; + + ret = platform_driver_register(&sdei_driver); + if (ret || !sdei_present_acpi()) + return ret; + + pdev = platform_device_register_simple(sdei_driver.driver.name, + 0, NULL, 0); + if (IS_ERR(pdev)) { + ret = PTR_ERR(pdev); + platform_driver_unregister(&sdei_driver); + pr_info("Failed to register ACPI:SDEI platform device %d\n", + ret); } return ret; diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 296b18fbd7a2..5a80cf6bd606 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -64,7 +64,6 @@ lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o lib-$(CONFIG_X86) += x86-stub.o CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -CFLAGS_arm64-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) # # For x86, bootloaders like systemd-boot or grub-efi do not zero-initialize the diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index e5bfac79e5ac..fc178398f1ac 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -77,7 +77,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, kernel_size = _edata - _text; kernel_memsize = kernel_size + (_end - _edata); - *reserve_size = kernel_memsize + TEXT_OFFSET % min_kimg_align(); + *reserve_size = kernel_memsize; if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) { /* @@ -91,7 +91,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } if (status != EFI_SUCCESS) { - if (IS_ALIGNED((u64)_text - TEXT_OFFSET, min_kimg_align())) { + if (IS_ALIGNED((u64)_text, min_kimg_align())) { /* * Just execute from wherever we were loaded by the * UEFI PE/COFF loader if the alignment is suitable. @@ -111,7 +111,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } } - *image_addr = *reserve_addr + TEXT_OFFSET % min_kimg_align(); + *image_addr = *reserve_addr; memcpy((void *)*image_addr, _text, kernel_size); return EFI_SUCCESS; diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 7305d57d1890..130327ff0b0e 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -41,6 +41,13 @@ config ARM_CCN PMU (perf) driver supporting the ARM CCN (Cache Coherent Network) interconnect. +config ARM_CMN + tristate "Arm CMN-600 PMU support" + depends on ARM64 || (COMPILE_TEST && 64BIT) + help + Support for PMU events monitoring on the Arm CMN-600 Coherent Mesh + Network interconnect. + config ARM_PMU depends on ARM || ARM64 bool "ARM PMU framework" diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 2ebb4de17815..5365fd56f88f 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARM_CCI_PMU) += arm-cci.o obj-$(CONFIG_ARM_CCN) += arm-ccn.o +obj-$(CONFIG_ARM_CMN) += arm-cmn.o obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c new file mode 100644 index 000000000000..a76ff594f3ca --- /dev/null +++ b/drivers/perf/arm-cmn.c @@ -0,0 +1,1641 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2016-2020 Arm Limited +// CMN-600 Coherent Mesh Network PMU driver + +#include <linux/acpi.h> +#include <linux/bitfield.h> +#include <linux/bitops.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/perf_event.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/sort.h> + +/* Common register stuff */ +#define CMN_NODE_INFO 0x0000 +#define CMN_NI_NODE_TYPE GENMASK_ULL(15, 0) +#define CMN_NI_NODE_ID GENMASK_ULL(31, 16) +#define CMN_NI_LOGICAL_ID GENMASK_ULL(47, 32) + +#define CMN_NODEID_DEVID(reg) ((reg) & 3) +#define CMN_NODEID_PID(reg) (((reg) >> 2) & 1) +#define CMN_NODEID_X(reg, bits) ((reg) >> (3 + (bits))) +#define CMN_NODEID_Y(reg, bits) (((reg) >> 3) & ((1U << (bits)) - 1)) + +#define CMN_CHILD_INFO 0x0080 +#define CMN_CI_CHILD_COUNT GENMASK_ULL(15, 0) +#define CMN_CI_CHILD_PTR_OFFSET GENMASK_ULL(31, 16) + +#define CMN_CHILD_NODE_ADDR GENMASK(27,0) +#define CMN_CHILD_NODE_EXTERNAL BIT(31) + +#define CMN_ADDR_NODE_PTR GENMASK(27, 14) + +#define CMN_NODE_PTR_DEVID(ptr) (((ptr) >> 2) & 3) +#define CMN_NODE_PTR_PID(ptr) ((ptr) & 1) +#define CMN_NODE_PTR_X(ptr, bits) ((ptr) >> (6 + (bits))) +#define CMN_NODE_PTR_Y(ptr, bits) (((ptr) >> 6) & ((1U << (bits)) - 1)) + +#define CMN_MAX_XPS (8 * 8) + +/* The CFG node has one other useful purpose */ +#define CMN_CFGM_PERIPH_ID_2 0x0010 +#define CMN_CFGM_PID2_REVISION GENMASK(7, 4) + +/* PMU registers occupy the 3rd 4KB page of each node's 16KB space */ +#define CMN_PMU_OFFSET 0x2000 + +/* For most nodes, this is all there is */ +#define CMN_PMU_EVENT_SEL 0x000 +#define CMN_PMU_EVENTn_ID_SHIFT(n) ((n) * 8) + +/* DTMs live in the PMU space of XP registers */ +#define CMN_DTM_WPn(n) (0x1A0 + (n) * 0x18) +#define CMN_DTM_WPn_CONFIG(n) (CMN_DTM_WPn(n) + 0x00) +#define CMN_DTM_WPn_CONFIG_WP_COMBINE BIT(6) +#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE BIT(5) +#define CMN_DTM_WPn_CONFIG_WP_GRP BIT(4) +#define CMN_DTM_WPn_CONFIG_WP_CHN_SEL GENMASK_ULL(3, 1) +#define CMN_DTM_WPn_CONFIG_WP_DEV_SEL BIT(0) +#define CMN_DTM_WPn_VAL(n) (CMN_DTM_WPn(n) + 0x08) +#define CMN_DTM_WPn_MASK(n) (CMN_DTM_WPn(n) + 0x10) + +#define CMN_DTM_PMU_CONFIG 0x210 +#define CMN__PMEVCNT0_INPUT_SEL GENMASK_ULL(37, 32) +#define CMN__PMEVCNT0_INPUT_SEL_WP 0x00 +#define CMN__PMEVCNT0_INPUT_SEL_XP 0x04 +#define CMN__PMEVCNT0_INPUT_SEL_DEV 0x10 +#define CMN__PMEVCNT0_GLOBAL_NUM GENMASK_ULL(18, 16) +#define CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(n) ((n) * 4) +#define CMN__PMEVCNT_PAIRED(n) BIT(4 + (n)) +#define CMN__PMEVCNT23_COMBINED BIT(2) +#define CMN__PMEVCNT01_COMBINED BIT(1) +#define CMN_DTM_PMU_CONFIG_PMU_EN BIT(0) + +#define CMN_DTM_PMEVCNT 0x220 + +#define CMN_DTM_PMEVCNTSR 0x240 + +#define CMN_DTM_NUM_COUNTERS 4 + +/* The DTC node is where the magic happens */ +#define CMN_DT_DTC_CTL 0x0a00 +#define CMN_DT_DTC_CTL_DT_EN BIT(0) + +/* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */ +#define _CMN_DT_CNT_REG(n) ((((n) / 2) * 4 + (n) % 2) * 4) +#define CMN_DT_PMEVCNT(n) (CMN_PMU_OFFSET + _CMN_DT_CNT_REG(n)) +#define CMN_DT_PMCCNTR (CMN_PMU_OFFSET + 0x40) + +#define CMN_DT_PMEVCNTSR(n) (CMN_PMU_OFFSET + 0x50 + _CMN_DT_CNT_REG(n)) +#define CMN_DT_PMCCNTRSR (CMN_PMU_OFFSET + 0x90) + +#define CMN_DT_PMCR (CMN_PMU_OFFSET + 0x100) +#define CMN_DT_PMCR_PMU_EN BIT(0) +#define CMN_DT_PMCR_CNTR_RST BIT(5) +#define CMN_DT_PMCR_OVFL_INTR_EN BIT(6) + +#define CMN_DT_PMOVSR (CMN_PMU_OFFSET + 0x118) +#define CMN_DT_PMOVSR_CLR (CMN_PMU_OFFSET + 0x120) + +#define CMN_DT_PMSSR (CMN_PMU_OFFSET + 0x128) +#define CMN_DT_PMSSR_SS_STATUS(n) BIT(n) + +#define CMN_DT_PMSRR (CMN_PMU_OFFSET + 0x130) +#define CMN_DT_PMSRR_SS_REQ BIT(0) + +#define CMN_DT_NUM_COUNTERS 8 +#define CMN_MAX_DTCS 4 + +/* + * Even in the worst case a DTC counter can't wrap in fewer than 2^42 cycles, + * so throwing away one bit to make overflow handling easy is no big deal. + */ +#define CMN_COUNTER_INIT 0x80000000 +/* Similarly for the 40-bit cycle counter */ +#define CMN_CC_INIT 0x8000000000ULL + + +/* Event attributes */ +#define CMN_CONFIG_TYPE GENMASK(15, 0) +#define CMN_CONFIG_EVENTID GENMASK(23, 16) +#define CMN_CONFIG_OCCUPID GENMASK(27, 24) +#define CMN_CONFIG_BYNODEID BIT(31) +#define CMN_CONFIG_NODEID GENMASK(47, 32) + +#define CMN_EVENT_TYPE(event) FIELD_GET(CMN_CONFIG_TYPE, (event)->attr.config) +#define CMN_EVENT_EVENTID(event) FIELD_GET(CMN_CONFIG_EVENTID, (event)->attr.config) +#define CMN_EVENT_OCCUPID(event) FIELD_GET(CMN_CONFIG_OCCUPID, (event)->attr.config) +#define CMN_EVENT_BYNODEID(event) FIELD_GET(CMN_CONFIG_BYNODEID, (event)->attr.config) +#define CMN_EVENT_NODEID(event) FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config) + +#define CMN_CONFIG_WP_COMBINE GENMASK(27, 24) +#define CMN_CONFIG_WP_DEV_SEL BIT(48) +#define CMN_CONFIG_WP_CHN_SEL GENMASK(50, 49) +#define CMN_CONFIG_WP_GRP BIT(52) +#define CMN_CONFIG_WP_EXCLUSIVE BIT(53) +#define CMN_CONFIG1_WP_VAL GENMASK(63, 0) +#define CMN_CONFIG2_WP_MASK GENMASK(63, 0) + +#define CMN_EVENT_WP_COMBINE(event) FIELD_GET(CMN_CONFIG_WP_COMBINE, (event)->attr.config) +#define CMN_EVENT_WP_DEV_SEL(event) FIELD_GET(CMN_CONFIG_WP_DEV_SEL, (event)->attr.config) +#define CMN_EVENT_WP_CHN_SEL(event) FIELD_GET(CMN_CONFIG_WP_CHN_SEL, (event)->attr.config) +#define CMN_EVENT_WP_GRP(event) FIELD_GET(CMN_CONFIG_WP_GRP, (event)->attr.config) +#define CMN_EVENT_WP_EXCLUSIVE(event) FIELD_GET(CMN_CONFIG_WP_EXCLUSIVE, (event)->attr.config) +#define CMN_EVENT_WP_VAL(event) FIELD_GET(CMN_CONFIG1_WP_VAL, (event)->attr.config1) +#define CMN_EVENT_WP_MASK(event) FIELD_GET(CMN_CONFIG2_WP_MASK, (event)->attr.config2) + +/* Made-up event IDs for watchpoint direction */ +#define CMN_WP_UP 0 +#define CMN_WP_DOWN 2 + + +/* r0px probably don't exist in silicon, thankfully */ +enum cmn_revision { + CMN600_R1P0, + CMN600_R1P1, + CMN600_R1P2, + CMN600_R1P3, + CMN600_R2P0, + CMN600_R3P0, +}; + +enum cmn_node_type { + CMN_TYPE_INVALID, + CMN_TYPE_DVM, + CMN_TYPE_CFG, + CMN_TYPE_DTC, + CMN_TYPE_HNI, + CMN_TYPE_HNF, + CMN_TYPE_XP, + CMN_TYPE_SBSX, + CMN_TYPE_RNI = 0xa, + CMN_TYPE_RND = 0xd, + CMN_TYPE_RNSAM = 0xf, + CMN_TYPE_CXRA = 0x100, + CMN_TYPE_CXHA = 0x101, + CMN_TYPE_CXLA = 0x102, + /* Not a real node type */ + CMN_TYPE_WP = 0x7770 +}; + +struct arm_cmn_node { + void __iomem *pmu_base; + u16 id, logid; + enum cmn_node_type type; + + union { + /* Device node */ + struct { + int to_xp; + /* DN/HN-F/CXHA */ + unsigned int occupid_val; + unsigned int occupid_count; + }; + /* XP */ + struct { + int dtc; + u32 pmu_config_low; + union { + u8 input_sel[4]; + __le32 pmu_config_high; + }; + s8 wp_event[4]; + }; + }; + + union { + u8 event[4]; + __le32 event_sel; + }; +}; + +struct arm_cmn_dtc { + void __iomem *base; + int irq; + int irq_friend; + bool cc_active; + + struct perf_event *counters[CMN_DT_NUM_COUNTERS]; + struct perf_event *cycles; +}; + +#define CMN_STATE_DISABLED BIT(0) +#define CMN_STATE_TXN BIT(1) + +struct arm_cmn { + struct device *dev; + void __iomem *base; + + enum cmn_revision rev; + u8 mesh_x; + u8 mesh_y; + u16 num_xps; + u16 num_dns; + struct arm_cmn_node *xps; + struct arm_cmn_node *dns; + + struct arm_cmn_dtc *dtc; + unsigned int num_dtcs; + + int cpu; + struct hlist_node cpuhp_node; + + unsigned int state; + struct pmu pmu; +}; + +#define to_cmn(p) container_of(p, struct arm_cmn, pmu) + +static int arm_cmn_hp_state; + +struct arm_cmn_hw_event { + struct arm_cmn_node *dn; + u64 dtm_idx[2]; + unsigned int dtc_idx; + u8 dtcs_used; + u8 num_dns; +}; + +#define for_each_hw_dn(hw, dn, i) \ + for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++) + +static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event) +{ + BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target)); + return (struct arm_cmn_hw_event *)&event->hw; +} + +static void arm_cmn_set_index(u64 x[], unsigned int pos, unsigned int val) +{ + x[pos / 32] |= (u64)val << ((pos % 32) * 2); +} + +static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos) +{ + return (x[pos / 32] >> ((pos % 32) * 2)) & 3; +} + +struct arm_cmn_event_attr { + struct device_attribute attr; + enum cmn_node_type type; + u8 eventid; + u8 occupid; +}; + +struct arm_cmn_format_attr { + struct device_attribute attr; + u64 field; + int config; +}; + +static int arm_cmn_xyidbits(const struct arm_cmn *cmn) +{ + return cmn->mesh_x > 4 || cmn->mesh_y > 4 ? 3 : 2; +} + +static void arm_cmn_init_node_to_xp(const struct arm_cmn *cmn, + struct arm_cmn_node *dn) +{ + int bits = arm_cmn_xyidbits(cmn); + int x = CMN_NODEID_X(dn->id, bits); + int y = CMN_NODEID_Y(dn->id, bits); + int xp_idx = cmn->mesh_x * y + x; + + dn->to_xp = (cmn->xps + xp_idx) - dn; +} + +static struct arm_cmn_node *arm_cmn_node_to_xp(struct arm_cmn_node *dn) +{ + return dn->type == CMN_TYPE_XP ? dn : dn + dn->to_xp; +} + +static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn, + enum cmn_node_type type) +{ + int i; + + for (i = 0; i < cmn->num_dns; i++) + if (cmn->dns[i].type == type) + return &cmn->dns[i]; + return NULL; +} + +#define CMN_EVENT_ATTR(_name, _type, _eventid, _occupid) \ + (&((struct arm_cmn_event_attr[]) {{ \ + .attr = __ATTR(_name, 0444, arm_cmn_event_show, NULL), \ + .type = _type, \ + .eventid = _eventid, \ + .occupid = _occupid, \ + }})[0].attr.attr) + +static bool arm_cmn_is_occup_event(enum cmn_node_type type, unsigned int id) +{ + return (type == CMN_TYPE_DVM && id == 0x05) || + (type == CMN_TYPE_HNF && id == 0x0f); +} + +static ssize_t arm_cmn_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_cmn_event_attr *eattr; + + eattr = container_of(attr, typeof(*eattr), attr); + + if (eattr->type == CMN_TYPE_DTC) + return snprintf(buf, PAGE_SIZE, "type=0x%x\n", eattr->type); + + if (eattr->type == CMN_TYPE_WP) + return snprintf(buf, PAGE_SIZE, + "type=0x%x,eventid=0x%x,wp_dev_sel=?,wp_chn_sel=?,wp_grp=?,wp_val=?,wp_mask=?\n", + eattr->type, eattr->eventid); + + if (arm_cmn_is_occup_event(eattr->type, eattr->eventid)) + return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x,occupid=0x%x\n", + eattr->type, eattr->eventid, eattr->occupid); + + return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x\n", + eattr->type, eattr->eventid); +} + +static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, + struct attribute *attr, + int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); + struct arm_cmn_event_attr *eattr; + enum cmn_node_type type; + + eattr = container_of(attr, typeof(*eattr), attr.attr); + type = eattr->type; + + /* Watchpoints aren't nodes */ + if (type == CMN_TYPE_WP) + type = CMN_TYPE_XP; + + /* Revision-specific differences */ + if (cmn->rev < CMN600_R1P2) { + if (type == CMN_TYPE_HNF && eattr->eventid == 0x1b) + return 0; + } + + if (!arm_cmn_node(cmn, type)) + return 0; + + return attr->mode; +} + +#define _CMN_EVENT_DVM(_name, _event, _occup) \ + CMN_EVENT_ATTR(dn_##_name, CMN_TYPE_DVM, _event, _occup) +#define CMN_EVENT_DTC(_name) \ + CMN_EVENT_ATTR(dtc_##_name, CMN_TYPE_DTC, 0, 0) +#define _CMN_EVENT_HNF(_name, _event, _occup) \ + CMN_EVENT_ATTR(hnf_##_name, CMN_TYPE_HNF, _event, _occup) +#define CMN_EVENT_HNI(_name, _event) \ + CMN_EVENT_ATTR(hni_##_name, CMN_TYPE_HNI, _event, 0) +#define __CMN_EVENT_XP(_name, _event) \ + CMN_EVENT_ATTR(mxp_##_name, CMN_TYPE_XP, _event, 0) +#define CMN_EVENT_SBSX(_name, _event) \ + CMN_EVENT_ATTR(sbsx_##_name, CMN_TYPE_SBSX, _event, 0) +#define CMN_EVENT_RNID(_name, _event) \ + CMN_EVENT_ATTR(rnid_##_name, CMN_TYPE_RNI, _event, 0) + +#define CMN_EVENT_DVM(_name, _event) \ + _CMN_EVENT_DVM(_name, _event, 0) +#define CMN_EVENT_HNF(_name, _event) \ + _CMN_EVENT_HNF(_name, _event, 0) +#define _CMN_EVENT_XP(_name, _event) \ + __CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)), \ + __CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)), \ + __CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)), \ + __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)), \ + __CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)), \ + __CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)) + +/* Good thing there are only 3 fundamental XP events... */ +#define CMN_EVENT_XP(_name, _event) \ + _CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)), \ + _CMN_EVENT_XP(rsp_##_name, (_event) | (1 << 5)), \ + _CMN_EVENT_XP(snp_##_name, (_event) | (2 << 5)), \ + _CMN_EVENT_XP(dat_##_name, (_event) | (3 << 5)) + + +static struct attribute *arm_cmn_event_attrs[] = { + CMN_EVENT_DTC(cycles), + + /* + * DVM node events conflict with HN-I events in the equivalent PMU + * slot, but our lazy short-cut of using the DTM counter index for + * the PMU index as well happens to avoid that by construction. + */ + CMN_EVENT_DVM(rxreq_dvmop, 0x01), + CMN_EVENT_DVM(rxreq_dvmsync, 0x02), + CMN_EVENT_DVM(rxreq_dvmop_vmid_filtered, 0x03), + CMN_EVENT_DVM(rxreq_retried, 0x04), + _CMN_EVENT_DVM(rxreq_trk_occupancy_all, 0x05, 0), + _CMN_EVENT_DVM(rxreq_trk_occupancy_dvmop, 0x05, 1), + _CMN_EVENT_DVM(rxreq_trk_occupancy_dvmsync, 0x05, 2), + + CMN_EVENT_HNF(cache_miss, 0x01), + CMN_EVENT_HNF(slc_sf_cache_access, 0x02), + CMN_EVENT_HNF(cache_fill, 0x03), + CMN_EVENT_HNF(pocq_retry, 0x04), + CMN_EVENT_HNF(pocq_reqs_recvd, 0x05), + CMN_EVENT_HNF(sf_hit, 0x06), + CMN_EVENT_HNF(sf_evictions, 0x07), + CMN_EVENT_HNF(dir_snoops_sent, 0x08), + CMN_EVENT_HNF(brd_snoops_sent, 0x09), + CMN_EVENT_HNF(slc_eviction, 0x0a), + CMN_EVENT_HNF(slc_fill_invalid_way, 0x0b), + CMN_EVENT_HNF(mc_retries, 0x0c), + CMN_EVENT_HNF(mc_reqs, 0x0d), + CMN_EVENT_HNF(qos_hh_retry, 0x0e), + _CMN_EVENT_HNF(qos_pocq_occupancy_all, 0x0f, 0), + _CMN_EVENT_HNF(qos_pocq_occupancy_read, 0x0f, 1), + _CMN_EVENT_HNF(qos_pocq_occupancy_write, 0x0f, 2), + _CMN_EVENT_HNF(qos_pocq_occupancy_atomic, 0x0f, 3), + _CMN_EVENT_HNF(qos_pocq_occupancy_stash, 0x0f, 4), + CMN_EVENT_HNF(pocq_addrhaz, 0x10), + CMN_EVENT_HNF(pocq_atomic_addrhaz, 0x11), + CMN_EVENT_HNF(ld_st_swp_adq_full, 0x12), + CMN_EVENT_HNF(cmp_adq_full, 0x13), + CMN_EVENT_HNF(txdat_stall, 0x14), + CMN_EVENT_HNF(txrsp_stall, 0x15), + CMN_EVENT_HNF(seq_full, 0x16), + CMN_EVENT_HNF(seq_hit, 0x17), + CMN_EVENT_HNF(snp_sent, 0x18), + CMN_EVENT_HNF(sfbi_dir_snp_sent, 0x19), + CMN_EVENT_HNF(sfbi_brd_snp_sent, 0x1a), + CMN_EVENT_HNF(snp_sent_untrk, 0x1b), + CMN_EVENT_HNF(intv_dirty, 0x1c), + CMN_EVENT_HNF(stash_snp_sent, 0x1d), + CMN_EVENT_HNF(stash_data_pull, 0x1e), + CMN_EVENT_HNF(snp_fwded, 0x1f), + + CMN_EVENT_HNI(rrt_rd_occ_cnt_ovfl, 0x20), + CMN_EVENT_HNI(rrt_wr_occ_cnt_ovfl, 0x21), + CMN_EVENT_HNI(rdt_rd_occ_cnt_ovfl, 0x22), + CMN_EVENT_HNI(rdt_wr_occ_cnt_ovfl, 0x23), + CMN_EVENT_HNI(wdb_occ_cnt_ovfl, 0x24), + CMN_EVENT_HNI(rrt_rd_alloc, 0x25), + CMN_EVENT_HNI(rrt_wr_alloc, 0x26), + CMN_EVENT_HNI(rdt_rd_alloc, 0x27), + CMN_EVENT_HNI(rdt_wr_alloc, 0x28), + CMN_EVENT_HNI(wdb_alloc, 0x29), + CMN_EVENT_HNI(txrsp_retryack, 0x2a), + CMN_EVENT_HNI(arvalid_no_arready, 0x2b), + CMN_EVENT_HNI(arready_no_arvalid, 0x2c), + CMN_EVENT_HNI(awvalid_no_awready, 0x2d), + CMN_EVENT_HNI(awready_no_awvalid, 0x2e), + CMN_EVENT_HNI(wvalid_no_wready, 0x2f), + CMN_EVENT_HNI(txdat_stall, 0x30), + CMN_EVENT_HNI(nonpcie_serialization, 0x31), + CMN_EVENT_HNI(pcie_serialization, 0x32), + + CMN_EVENT_XP(txflit_valid, 0x01), + CMN_EVENT_XP(txflit_stall, 0x02), + CMN_EVENT_XP(partial_dat_flit, 0x03), + /* We treat watchpoints as a special made-up class of XP events */ + CMN_EVENT_ATTR(watchpoint_up, CMN_TYPE_WP, 0, 0), + CMN_EVENT_ATTR(watchpoint_down, CMN_TYPE_WP, 2, 0), + + CMN_EVENT_SBSX(rd_req, 0x01), + CMN_EVENT_SBSX(wr_req, 0x02), + CMN_EVENT_SBSX(cmo_req, 0x03), + CMN_EVENT_SBSX(txrsp_retryack, 0x04), + CMN_EVENT_SBSX(txdat_flitv, 0x05), + CMN_EVENT_SBSX(txrsp_flitv, 0x06), + CMN_EVENT_SBSX(rd_req_trkr_occ_cnt_ovfl, 0x11), + CMN_EVENT_SBSX(wr_req_trkr_occ_cnt_ovfl, 0x12), + CMN_EVENT_SBSX(cmo_req_trkr_occ_cnt_ovfl, 0x13), + CMN_EVENT_SBSX(wdb_occ_cnt_ovfl, 0x14), + CMN_EVENT_SBSX(rd_axi_trkr_occ_cnt_ovfl, 0x15), + CMN_EVENT_SBSX(cmo_axi_trkr_occ_cnt_ovfl, 0x16), + CMN_EVENT_SBSX(arvalid_no_arready, 0x21), + CMN_EVENT_SBSX(awvalid_no_awready, 0x22), + CMN_EVENT_SBSX(wvalid_no_wready, 0x23), + CMN_EVENT_SBSX(txdat_stall, 0x24), + CMN_EVENT_SBSX(txrsp_stall, 0x25), + + CMN_EVENT_RNID(s0_rdata_beats, 0x01), + CMN_EVENT_RNID(s1_rdata_beats, 0x02), + CMN_EVENT_RNID(s2_rdata_beats, 0x03), + CMN_EVENT_RNID(rxdat_flits, 0x04), + CMN_EVENT_RNID(txdat_flits, 0x05), + CMN_EVENT_RNID(txreq_flits_total, 0x06), + CMN_EVENT_RNID(txreq_flits_retried, 0x07), + CMN_EVENT_RNID(rrt_occ_ovfl, 0x08), + CMN_EVENT_RNID(wrt_occ_ovfl, 0x09), + CMN_EVENT_RNID(txreq_flits_replayed, 0x0a), + CMN_EVENT_RNID(wrcancel_sent, 0x0b), + CMN_EVENT_RNID(s0_wdata_beats, 0x0c), + CMN_EVENT_RNID(s1_wdata_beats, 0x0d), + CMN_EVENT_RNID(s2_wdata_beats, 0x0e), + CMN_EVENT_RNID(rrt_alloc, 0x0f), + CMN_EVENT_RNID(wrt_alloc, 0x10), + CMN_EVENT_RNID(rdb_unord, 0x11), + CMN_EVENT_RNID(rdb_replay, 0x12), + CMN_EVENT_RNID(rdb_hybrid, 0x13), + CMN_EVENT_RNID(rdb_ord, 0x14), + + NULL +}; + +static const struct attribute_group arm_cmn_event_attrs_group = { + .name = "events", + .attrs = arm_cmn_event_attrs, + .is_visible = arm_cmn_event_attr_is_visible, +}; + +static ssize_t arm_cmn_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_cmn_format_attr *fmt = container_of(attr, typeof(*fmt), attr); + int lo = __ffs(fmt->field), hi = __fls(fmt->field); + + if (lo == hi) + return snprintf(buf, PAGE_SIZE, "config:%d\n", lo); + + if (!fmt->config) + return snprintf(buf, PAGE_SIZE, "config:%d-%d\n", lo, hi); + + return snprintf(buf, PAGE_SIZE, "config%d:%d-%d\n", fmt->config, lo, hi); +} + +#define _CMN_FORMAT_ATTR(_name, _cfg, _fld) \ + (&((struct arm_cmn_format_attr[]) {{ \ + .attr = __ATTR(_name, 0444, arm_cmn_format_show, NULL), \ + .config = _cfg, \ + .field = _fld, \ + }})[0].attr.attr) +#define CMN_FORMAT_ATTR(_name, _fld) _CMN_FORMAT_ATTR(_name, 0, _fld) + +static struct attribute *arm_cmn_format_attrs[] = { + CMN_FORMAT_ATTR(type, CMN_CONFIG_TYPE), + CMN_FORMAT_ATTR(eventid, CMN_CONFIG_EVENTID), + CMN_FORMAT_ATTR(occupid, CMN_CONFIG_OCCUPID), + CMN_FORMAT_ATTR(bynodeid, CMN_CONFIG_BYNODEID), + CMN_FORMAT_ATTR(nodeid, CMN_CONFIG_NODEID), + + CMN_FORMAT_ATTR(wp_dev_sel, CMN_CONFIG_WP_DEV_SEL), + CMN_FORMAT_ATTR(wp_chn_sel, CMN_CONFIG_WP_CHN_SEL), + CMN_FORMAT_ATTR(wp_grp, CMN_CONFIG_WP_GRP), + CMN_FORMAT_ATTR(wp_exclusive, CMN_CONFIG_WP_EXCLUSIVE), + CMN_FORMAT_ATTR(wp_combine, CMN_CONFIG_WP_COMBINE), + + _CMN_FORMAT_ATTR(wp_val, 1, CMN_CONFIG1_WP_VAL), + _CMN_FORMAT_ATTR(wp_mask, 2, CMN_CONFIG2_WP_MASK), + + NULL +}; + +static const struct attribute_group arm_cmn_format_attrs_group = { + .name = "format", + .attrs = arm_cmn_format_attrs, +}; + +static ssize_t arm_cmn_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(cmn->cpu)); +} + +static struct device_attribute arm_cmn_cpumask_attr = + __ATTR(cpumask, 0444, arm_cmn_cpumask_show, NULL); + +static struct attribute *arm_cmn_cpumask_attrs[] = { + &arm_cmn_cpumask_attr.attr, + NULL, +}; + +static struct attribute_group arm_cmn_cpumask_attr_group = { + .attrs = arm_cmn_cpumask_attrs, +}; + +static const struct attribute_group *arm_cmn_attr_groups[] = { + &arm_cmn_event_attrs_group, + &arm_cmn_format_attrs_group, + &arm_cmn_cpumask_attr_group, + NULL +}; + +static int arm_cmn_wp_idx(struct perf_event *event) +{ + return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event); +} + +static u32 arm_cmn_wp_config(struct perf_event *event) +{ + u32 config; + u32 dev = CMN_EVENT_WP_DEV_SEL(event); + u32 chn = CMN_EVENT_WP_CHN_SEL(event); + u32 grp = CMN_EVENT_WP_GRP(event); + u32 exc = CMN_EVENT_WP_EXCLUSIVE(event); + u32 combine = CMN_EVENT_WP_COMBINE(event); + + config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) | + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) | + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) | + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc); + if (combine && !grp) + config |= CMN_DTM_WPn_CONFIG_WP_COMBINE; + + return config; +} + +static void arm_cmn_set_state(struct arm_cmn *cmn, u32 state) +{ + if (!cmn->state) + writel_relaxed(0, cmn->dtc[0].base + CMN_DT_PMCR); + cmn->state |= state; +} + +static void arm_cmn_clear_state(struct arm_cmn *cmn, u32 state) +{ + cmn->state &= ~state; + if (!cmn->state) + writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, + cmn->dtc[0].base + CMN_DT_PMCR); +} + +static void arm_cmn_pmu_enable(struct pmu *pmu) +{ + arm_cmn_clear_state(to_cmn(pmu), CMN_STATE_DISABLED); +} + +static void arm_cmn_pmu_disable(struct pmu *pmu) +{ + arm_cmn_set_state(to_cmn(pmu), CMN_STATE_DISABLED); +} + +static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw, + bool snapshot) +{ + struct arm_cmn_node *dn; + unsigned int i, offset; + u64 count = 0; + + offset = snapshot ? CMN_DTM_PMEVCNTSR : CMN_DTM_PMEVCNT; + for_each_hw_dn(hw, dn, i) { + struct arm_cmn_node *xp = arm_cmn_node_to_xp(dn); + int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); + u64 reg = readq_relaxed(xp->pmu_base + offset); + u16 dtm_count = reg >> (dtm_idx * 16); + + count += dtm_count; + } + return count; +} + +static u64 arm_cmn_read_cc(struct arm_cmn_dtc *dtc) +{ + u64 val = readq_relaxed(dtc->base + CMN_DT_PMCCNTR); + + writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR); + return (val - CMN_CC_INIT) & ((CMN_CC_INIT << 1) - 1); +} + +static u32 arm_cmn_read_counter(struct arm_cmn_dtc *dtc, int idx) +{ + u32 val, pmevcnt = CMN_DT_PMEVCNT(idx); + + val = readl_relaxed(dtc->base + pmevcnt); + writel_relaxed(CMN_COUNTER_INIT, dtc->base + pmevcnt); + return val - CMN_COUNTER_INIT; +} + +static void arm_cmn_init_counter(struct perf_event *event) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx); + u64 count; + + for (i = 0; hw->dtcs_used & (1U << i); i++) { + writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt); + cmn->dtc[i].counters[hw->dtc_idx] = event; + } + + count = arm_cmn_read_dtm(cmn, hw, false); + local64_set(&event->hw.prev_count, count); +} + +static void arm_cmn_event_read(struct perf_event *event) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + u64 delta, new, prev; + unsigned long flags; + unsigned int i; + + if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) { + i = __ffs(hw->dtcs_used); + delta = arm_cmn_read_cc(cmn->dtc + i); + local64_add(delta, &event->count); + return; + } + new = arm_cmn_read_dtm(cmn, hw, false); + prev = local64_xchg(&event->hw.prev_count, new); + + delta = new - prev; + + local_irq_save(flags); + for (i = 0; hw->dtcs_used & (1U << i); i++) { + new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx); + delta += new << 16; + } + local_irq_restore(flags); + local64_add(delta, &event->count); +} + +static void arm_cmn_event_start(struct perf_event *event, int flags) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + struct arm_cmn_node *dn; + enum cmn_node_type type = CMN_EVENT_TYPE(event); + int i; + + if (type == CMN_TYPE_DTC) { + i = __ffs(hw->dtcs_used); + writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR); + cmn->dtc[i].cc_active = true; + } else if (type == CMN_TYPE_WP) { + int wp_idx = arm_cmn_wp_idx(event); + u64 val = CMN_EVENT_WP_VAL(event); + u64 mask = CMN_EVENT_WP_MASK(event); + + for_each_hw_dn(hw, dn, i) { + writeq_relaxed(val, dn->pmu_base + CMN_DTM_WPn_VAL(wp_idx)); + writeq_relaxed(mask, dn->pmu_base + CMN_DTM_WPn_MASK(wp_idx)); + } + } else for_each_hw_dn(hw, dn, i) { + int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); + + dn->event[dtm_idx] = CMN_EVENT_EVENTID(event); + writel_relaxed(le32_to_cpu(dn->event_sel), dn->pmu_base + CMN_PMU_EVENT_SEL); + } +} + +static void arm_cmn_event_stop(struct perf_event *event, int flags) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + struct arm_cmn_node *dn; + enum cmn_node_type type = CMN_EVENT_TYPE(event); + int i; + + if (type == CMN_TYPE_DTC) { + i = __ffs(hw->dtcs_used); + cmn->dtc[i].cc_active = false; + } else if (type == CMN_TYPE_WP) { + int wp_idx = arm_cmn_wp_idx(event); + + for_each_hw_dn(hw, dn, i) { + writeq_relaxed(0, dn->pmu_base + CMN_DTM_WPn_MASK(wp_idx)); + writeq_relaxed(~0ULL, dn->pmu_base + CMN_DTM_WPn_VAL(wp_idx)); + } + } else for_each_hw_dn(hw, dn, i) { + int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); + + dn->event[dtm_idx] = 0; + writel_relaxed(le32_to_cpu(dn->event_sel), dn->pmu_base + CMN_PMU_EVENT_SEL); + } + + arm_cmn_event_read(event); +} + +struct arm_cmn_val { + u8 dtm_count[CMN_MAX_XPS]; + u8 occupid[CMN_MAX_XPS]; + u8 wp[CMN_MAX_XPS][4]; + int dtc_count; + bool cycles; +}; + +static void arm_cmn_val_add_event(struct arm_cmn_val *val, struct perf_event *event) +{ + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + struct arm_cmn_node *dn; + enum cmn_node_type type; + int i; + u8 occupid; + + if (is_software_event(event)) + return; + + type = CMN_EVENT_TYPE(event); + if (type == CMN_TYPE_DTC) { + val->cycles = true; + return; + } + + val->dtc_count++; + if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) + occupid = CMN_EVENT_OCCUPID(event) + 1; + else + occupid = 0; + + for_each_hw_dn(hw, dn, i) { + int wp_idx, xp = arm_cmn_node_to_xp(dn)->logid; + + val->dtm_count[xp]++; + val->occupid[xp] = occupid; + + if (type != CMN_TYPE_WP) + continue; + + wp_idx = arm_cmn_wp_idx(event); + val->wp[xp][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1; + } +} + +static int arm_cmn_validate_group(struct perf_event *event) +{ + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + struct arm_cmn_node *dn; + struct perf_event *sibling, *leader = event->group_leader; + enum cmn_node_type type; + struct arm_cmn_val val; + int i; + u8 occupid; + + if (leader == event) + return 0; + + if (event->pmu != leader->pmu && !is_software_event(leader)) + return -EINVAL; + + memset(&val, 0, sizeof(val)); + + arm_cmn_val_add_event(&val, leader); + for_each_sibling_event(sibling, leader) + arm_cmn_val_add_event(&val, sibling); + + type = CMN_EVENT_TYPE(event); + if (type == CMN_TYPE_DTC) + return val.cycles ? -EINVAL : 0; + + if (val.dtc_count == CMN_DT_NUM_COUNTERS) + return -EINVAL; + + if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) + occupid = CMN_EVENT_OCCUPID(event) + 1; + else + occupid = 0; + + for_each_hw_dn(hw, dn, i) { + int wp_idx, wp_cmb, xp = arm_cmn_node_to_xp(dn)->logid; + + if (val.dtm_count[xp] == CMN_DTM_NUM_COUNTERS) + return -EINVAL; + + if (occupid && val.occupid[xp] && occupid != val.occupid[xp]) + return -EINVAL; + + if (type != CMN_TYPE_WP) + continue; + + wp_idx = arm_cmn_wp_idx(event); + if (val.wp[xp][wp_idx]) + return -EINVAL; + + wp_cmb = val.wp[xp][wp_idx ^ 1]; + if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1) + return -EINVAL; + } + + return 0; +} + +static int arm_cmn_event_init(struct perf_event *event) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + enum cmn_node_type type; + unsigned int i; + bool bynodeid; + u16 nodeid, eventid; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + event->cpu = cmn->cpu; + if (event->cpu < 0) + return -EINVAL; + + type = CMN_EVENT_TYPE(event); + /* DTC events (i.e. cycles) already have everything they need */ + if (type == CMN_TYPE_DTC) + return 0; + + /* For watchpoints we need the actual XP node here */ + if (type == CMN_TYPE_WP) { + type = CMN_TYPE_XP; + /* ...and we need a "real" direction */ + eventid = CMN_EVENT_EVENTID(event); + if (eventid != CMN_WP_UP && eventid != CMN_WP_DOWN) + return -EINVAL; + } + + bynodeid = CMN_EVENT_BYNODEID(event); + nodeid = CMN_EVENT_NODEID(event); + + hw->dn = arm_cmn_node(cmn, type); + for (i = hw->dn - cmn->dns; i < cmn->num_dns && cmn->dns[i].type == type; i++) { + if (!bynodeid) { + hw->num_dns++; + } else if (cmn->dns[i].id != nodeid) { + hw->dn++; + } else { + hw->num_dns = 1; + break; + } + } + + if (!hw->num_dns) { + int bits = arm_cmn_xyidbits(cmn); + + dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n", + nodeid, CMN_NODEID_X(nodeid, bits), CMN_NODEID_Y(nodeid, bits), + CMN_NODEID_PID(nodeid), CMN_NODEID_DEVID(nodeid), type); + return -EINVAL; + } + /* + * By assuming events count in all DTC domains, we cunningly avoid + * needing to know anything about how XPs are assigned to domains. + */ + hw->dtcs_used = (1U << cmn->num_dtcs) - 1; + + return arm_cmn_validate_group(event); +} + +static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event, + int i) +{ + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + enum cmn_node_type type = CMN_EVENT_TYPE(event); + + while (i--) { + struct arm_cmn_node *xp = arm_cmn_node_to_xp(hw->dn + i); + unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i); + + if (type == CMN_TYPE_WP) + hw->dn[i].wp_event[arm_cmn_wp_idx(event)] = -1; + + if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) + hw->dn[i].occupid_count--; + + xp->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(dtm_idx); + writel_relaxed(xp->pmu_config_low, xp->pmu_base + CMN_DTM_PMU_CONFIG); + } + memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx)); + + for (i = 0; hw->dtcs_used & (1U << i); i++) + cmn->dtc[i].counters[hw->dtc_idx] = NULL; +} + +static int arm_cmn_event_add(struct perf_event *event, int flags) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + struct arm_cmn_dtc *dtc = &cmn->dtc[0]; + struct arm_cmn_node *dn; + enum cmn_node_type type = CMN_EVENT_TYPE(event); + unsigned int i, dtc_idx, input_sel; + + if (type == CMN_TYPE_DTC) { + i = 0; + while (cmn->dtc[i].cycles) + if (++i == cmn->num_dtcs) + return -ENOSPC; + + cmn->dtc[i].cycles = event; + hw->dtc_idx = CMN_DT_NUM_COUNTERS; + hw->dtcs_used = 1U << i; + + if (flags & PERF_EF_START) + arm_cmn_event_start(event, 0); + return 0; + } + + /* Grab a free global counter first... */ + dtc_idx = 0; + while (dtc->counters[dtc_idx]) + if (++dtc_idx == CMN_DT_NUM_COUNTERS) + return -ENOSPC; + + hw->dtc_idx = dtc_idx; + + /* ...then the local counters to feed it. */ + for_each_hw_dn(hw, dn, i) { + struct arm_cmn_node *xp = arm_cmn_node_to_xp(dn); + unsigned int dtm_idx, shift; + u64 reg; + + dtm_idx = 0; + while (xp->pmu_config_low & CMN__PMEVCNT_PAIRED(dtm_idx)) + if (++dtm_idx == CMN_DTM_NUM_COUNTERS) + goto free_dtms; + + if (type == CMN_TYPE_XP) { + input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx; + } else if (type == CMN_TYPE_WP) { + int tmp, wp_idx = arm_cmn_wp_idx(event); + u32 cfg = arm_cmn_wp_config(event); + + if (dn->wp_event[wp_idx] >= 0) + goto free_dtms; + + tmp = dn->wp_event[wp_idx ^ 1]; + if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) != + CMN_EVENT_WP_COMBINE(dtc->counters[tmp])) + goto free_dtms; + + input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx; + dn->wp_event[wp_idx] = dtc_idx; + writel_relaxed(cfg, dn->pmu_base + CMN_DTM_WPn_CONFIG(wp_idx)); + } else { + unsigned int port = CMN_NODEID_PID(dn->id); + unsigned int dev = CMN_NODEID_DEVID(dn->id); + + input_sel = CMN__PMEVCNT0_INPUT_SEL_DEV + dtm_idx + + (port << 4) + (dev << 2); + + if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) { + int occupid = CMN_EVENT_OCCUPID(event); + + if (dn->occupid_count == 0) { + dn->occupid_val = occupid; + writel_relaxed(occupid, + dn->pmu_base + CMN_PMU_EVENT_SEL + 4); + } else if (dn->occupid_val != occupid) { + goto free_dtms; + } + dn->occupid_count++; + } + } + + arm_cmn_set_index(hw->dtm_idx, i, dtm_idx); + + xp->input_sel[dtm_idx] = input_sel; + shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx); + xp->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift); + xp->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift; + xp->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx); + reg = (u64)le32_to_cpu(xp->pmu_config_high) << 32 | xp->pmu_config_low; + writeq_relaxed(reg, xp->pmu_base + CMN_DTM_PMU_CONFIG); + } + + /* Go go go! */ + arm_cmn_init_counter(event); + + if (flags & PERF_EF_START) + arm_cmn_event_start(event, 0); + + return 0; + +free_dtms: + arm_cmn_event_clear(cmn, event, i); + return -ENOSPC; +} + +static void arm_cmn_event_del(struct perf_event *event, int flags) +{ + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_hw_event *hw = to_cmn_hw(event); + enum cmn_node_type type = CMN_EVENT_TYPE(event); + + arm_cmn_event_stop(event, PERF_EF_UPDATE); + + if (type == CMN_TYPE_DTC) + cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL; + else + arm_cmn_event_clear(cmn, event, hw->num_dns); +} + +/* + * We stop the PMU for both add and read, to avoid skew across DTM counters. + * In theory we could use snapshots to read without stopping, but then it + * becomes a lot trickier to deal with overlow and racing against interrupts, + * plus it seems they don't work properly on some hardware anyway :( + */ +static void arm_cmn_start_txn(struct pmu *pmu, unsigned int flags) +{ + arm_cmn_set_state(to_cmn(pmu), CMN_STATE_TXN); +} + +static void arm_cmn_end_txn(struct pmu *pmu) +{ + arm_cmn_clear_state(to_cmn(pmu), CMN_STATE_TXN); +} + +static int arm_cmn_commit_txn(struct pmu *pmu) +{ + arm_cmn_end_txn(pmu); + return 0; +} + +static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct arm_cmn *cmn; + unsigned int target; + + cmn = hlist_entry_safe(node, struct arm_cmn, cpuhp_node); + if (cpu != cmn->cpu) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&cmn->pmu, cpu, target); + cmn->cpu = target; + return 0; +} + +static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id) +{ + struct arm_cmn_dtc *dtc = dev_id; + irqreturn_t ret = IRQ_NONE; + + for (;;) { + u32 status = readl_relaxed(dtc->base + CMN_DT_PMOVSR); + u64 delta; + int i; + + for (i = 0; i < CMN_DTM_NUM_COUNTERS; i++) { + if (status & (1U << i)) { + ret = IRQ_HANDLED; + if (WARN_ON(!dtc->counters[i])) + continue; + delta = (u64)arm_cmn_read_counter(dtc, i) << 16; + local64_add(delta, &dtc->counters[i]->count); + } + } + + if (status & (1U << CMN_DT_NUM_COUNTERS)) { + ret = IRQ_HANDLED; + if (dtc->cc_active && !WARN_ON(!dtc->cycles)) { + delta = arm_cmn_read_cc(dtc); + local64_add(delta, &dtc->cycles->count); + } + } + + writel_relaxed(status, dtc->base + CMN_DT_PMOVSR_CLR); + + if (!dtc->irq_friend) + return ret; + dtc += dtc->irq_friend; + } +} + +/* We can reasonably accommodate DTCs of the same CMN sharing IRQs */ +static int arm_cmn_init_irqs(struct arm_cmn *cmn) +{ + int i, j, irq, err; + + for (i = 0; i < cmn->num_dtcs; i++) { + irq = cmn->dtc[i].irq; + for (j = i; j--; ) { + if (cmn->dtc[j].irq == irq) { + cmn->dtc[j].irq_friend = j - i; + goto next; + } + } + err = devm_request_irq(cmn->dev, irq, arm_cmn_handle_irq, + IRQF_NOBALANCING | IRQF_NO_THREAD, + dev_name(cmn->dev), &cmn->dtc[i]); + if (err) + return err; + + err = irq_set_affinity_hint(irq, cpumask_of(cmn->cpu)); + if (err) + return err; + next: + ; /* isn't C great? */ + } + return 0; +} + +static void arm_cmn_init_dtm(struct arm_cmn_node *xp) +{ + int i; + + for (i = 0; i < 4; i++) { + xp->wp_event[i] = -1; + writeq_relaxed(0, xp->pmu_base + CMN_DTM_WPn_MASK(i)); + writeq_relaxed(~0ULL, xp->pmu_base + CMN_DTM_WPn_VAL(i)); + } + xp->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN; + xp->dtc = -1; +} + +static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int idx) +{ + struct arm_cmn_dtc *dtc = cmn->dtc + idx; + struct arm_cmn_node *xp; + + dtc->base = dn->pmu_base - CMN_PMU_OFFSET; + dtc->irq = platform_get_irq(to_platform_device(cmn->dev), idx); + if (dtc->irq < 0) + return dtc->irq; + + writel_relaxed(0, dtc->base + CMN_DT_PMCR); + writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR); + writel_relaxed(CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR); + + /* We do at least know that a DTC's XP must be in that DTC's domain */ + xp = arm_cmn_node_to_xp(dn); + xp->dtc = idx; + + return 0; +} + +static int arm_cmn_node_cmp(const void *a, const void *b) +{ + const struct arm_cmn_node *dna = a, *dnb = b; + int cmp; + + cmp = dna->type - dnb->type; + if (!cmp) + cmp = dna->logid - dnb->logid; + return cmp; +} + +static int arm_cmn_init_dtcs(struct arm_cmn *cmn) +{ + struct arm_cmn_node *dn; + int dtc_idx = 0; + + cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL); + if (!cmn->dtc) + return -ENOMEM; + + sort(cmn->dns, cmn->num_dns, sizeof(cmn->dns[0]), arm_cmn_node_cmp, NULL); + + cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP); + + for (dn = cmn->dns; dn < cmn->dns + cmn->num_dns; dn++) { + if (dn->type != CMN_TYPE_XP) + arm_cmn_init_node_to_xp(cmn, dn); + else if (cmn->num_dtcs == 1) + dn->dtc = 0; + + if (dn->type == CMN_TYPE_DTC) + arm_cmn_init_dtc(cmn, dn, dtc_idx++); + + /* To the PMU, RN-Ds don't add anything over RN-Is, so smoosh them together */ + if (dn->type == CMN_TYPE_RND) + dn->type = CMN_TYPE_RNI; + } + + writel_relaxed(CMN_DT_DTC_CTL_DT_EN, cmn->dtc[0].base + CMN_DT_DTC_CTL); + + return 0; +} + +static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node) +{ + int level; + u64 reg = readq_relaxed(cmn->base + offset + CMN_NODE_INFO); + + node->type = FIELD_GET(CMN_NI_NODE_TYPE, reg); + node->id = FIELD_GET(CMN_NI_NODE_ID, reg); + node->logid = FIELD_GET(CMN_NI_LOGICAL_ID, reg); + + node->pmu_base = cmn->base + offset + CMN_PMU_OFFSET; + + if (node->type == CMN_TYPE_CFG) + level = 0; + else if (node->type == CMN_TYPE_XP) + level = 1; + else + level = 2; + + dev_dbg(cmn->dev, "node%*c%#06hx%*ctype:%-#6x id:%-4hd off:%#x\n", + (level * 2) + 1, ' ', node->id, 5 - (level * 2), ' ', + node->type, node->logid, offset); +} + +static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) +{ + void __iomem *cfg_region; + struct arm_cmn_node cfg, *dn; + u16 child_count, child_poff; + u32 xp_offset[CMN_MAX_XPS]; + u64 reg; + int i, j; + + cfg_region = cmn->base + rgn_offset; + reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2); + cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg); + dev_dbg(cmn->dev, "periph_id_2 revision: %d\n", cmn->rev); + + arm_cmn_init_node_info(cmn, rgn_offset, &cfg); + if (cfg.type != CMN_TYPE_CFG) + return -ENODEV; + + reg = readq_relaxed(cfg_region + CMN_CHILD_INFO); + child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); + child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg); + + cmn->num_xps = child_count; + cmn->num_dns = cmn->num_xps; + + /* Pass 1: visit the XPs, enumerate their children */ + for (i = 0; i < cmn->num_xps; i++) { + reg = readq_relaxed(cfg_region + child_poff + i * 8); + xp_offset[i] = reg & CMN_CHILD_NODE_ADDR; + + reg = readq_relaxed(cmn->base + xp_offset[i] + CMN_CHILD_INFO); + cmn->num_dns += FIELD_GET(CMN_CI_CHILD_COUNT, reg); + } + + /* Cheeky +1 to help terminate pointer-based iteration */ + cmn->dns = devm_kcalloc(cmn->dev, cmn->num_dns + 1, + sizeof(*cmn->dns), GFP_KERNEL); + if (!cmn->dns) + return -ENOMEM; + + /* Pass 2: now we can actually populate the nodes */ + dn = cmn->dns; + for (i = 0; i < cmn->num_xps; i++) { + void __iomem *xp_region = cmn->base + xp_offset[i]; + struct arm_cmn_node *xp = dn++; + + arm_cmn_init_node_info(cmn, xp_offset[i], xp); + arm_cmn_init_dtm(xp); + /* + * Thanks to the order in which XP logical IDs seem to be + * assigned, we can handily infer the mesh X dimension by + * looking out for the XP at (0,1) without needing to know + * the exact node ID format, which we can later derive. + */ + if (xp->id == (1 << 3)) + cmn->mesh_x = xp->logid; + + reg = readq_relaxed(xp_region + CMN_CHILD_INFO); + child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); + child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg); + + for (j = 0; j < child_count; j++) { + reg = readq_relaxed(xp_region + child_poff + j * 8); + /* + * Don't even try to touch anything external, since in general + * we haven't a clue how to power up arbitrary CHI requesters. + * As of CMN-600r1 these could only be RN-SAMs or CXLAs, + * neither of which have any PMU events anyway. + * (Actually, CXLAs do seem to have grown some events in r1p2, + * but they don't go to regular XP DTMs, and they depend on + * secure configuration which we can't easily deal with) + */ + if (reg & CMN_CHILD_NODE_EXTERNAL) { + dev_dbg(cmn->dev, "ignoring external node %llx\n", reg); + continue; + } + + arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn); + + switch (dn->type) { + case CMN_TYPE_DTC: + cmn->num_dtcs++; + dn++; + break; + /* These guys have PMU events */ + case CMN_TYPE_DVM: + case CMN_TYPE_HNI: + case CMN_TYPE_HNF: + case CMN_TYPE_SBSX: + case CMN_TYPE_RNI: + case CMN_TYPE_RND: + case CMN_TYPE_CXRA: + case CMN_TYPE_CXHA: + dn++; + break; + /* Nothing to see here */ + case CMN_TYPE_RNSAM: + case CMN_TYPE_CXLA: + break; + /* Something has gone horribly wrong */ + default: + dev_err(cmn->dev, "invalid device node type: 0x%x\n", dn->type); + return -ENODEV; + } + } + } + + /* Correct for any nodes we skipped */ + cmn->num_dns = dn - cmn->dns; + + /* + * If mesh_x wasn't set during discovery then we never saw + * an XP at (0,1), thus we must have an Nx1 configuration. + */ + if (!cmn->mesh_x) + cmn->mesh_x = cmn->num_xps; + cmn->mesh_y = cmn->num_xps / cmn->mesh_x; + + dev_dbg(cmn->dev, "mesh %dx%d, ID width %d\n", + cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn)); + + return 0; +} + +static int arm_cmn_acpi_probe(struct platform_device *pdev, struct arm_cmn *cmn) +{ + struct resource *cfg, *root; + + cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!cfg) + return -EINVAL; + + root = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!root) + return -EINVAL; + + if (!resource_contains(cfg, root)) + swap(cfg, root); + /* + * Note that devm_ioremap_resource() is dumb and won't let the platform + * device claim cfg when the ACPI companion device has already claimed + * root within it. But since they *are* already both claimed in the + * appropriate name, we don't really need to do it again here anyway. + */ + cmn->base = devm_ioremap(cmn->dev, cfg->start, resource_size(cfg)); + if (!cmn->base) + return -ENOMEM; + + return root->start - cfg->start; +} + +static int arm_cmn_of_probe(struct platform_device *pdev, struct arm_cmn *cmn) +{ + struct device_node *np = pdev->dev.of_node; + u32 rootnode; + int ret; + + cmn->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(cmn->base)) + return PTR_ERR(cmn->base); + + ret = of_property_read_u32(np, "arm,root-node", &rootnode); + if (ret) + return ret; + + return rootnode; +} + +static int arm_cmn_probe(struct platform_device *pdev) +{ + struct arm_cmn *cmn; + const char *name; + static atomic_t id; + int err, rootnode, this_id; + + cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL); + if (!cmn) + return -ENOMEM; + + cmn->dev = &pdev->dev; + platform_set_drvdata(pdev, cmn); + + if (has_acpi_companion(cmn->dev)) + rootnode = arm_cmn_acpi_probe(pdev, cmn); + else + rootnode = arm_cmn_of_probe(pdev, cmn); + if (rootnode < 0) + return rootnode; + + err = arm_cmn_discover(cmn, rootnode); + if (err) + return err; + + err = arm_cmn_init_dtcs(cmn); + if (err) + return err; + + err = arm_cmn_init_irqs(cmn); + if (err) + return err; + + cmn->cpu = raw_smp_processor_id(); + cmn->pmu = (struct pmu) { + .module = THIS_MODULE, + .attr_groups = arm_cmn_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .pmu_enable = arm_cmn_pmu_enable, + .pmu_disable = arm_cmn_pmu_disable, + .event_init = arm_cmn_event_init, + .add = arm_cmn_event_add, + .del = arm_cmn_event_del, + .start = arm_cmn_event_start, + .stop = arm_cmn_event_stop, + .read = arm_cmn_event_read, + .start_txn = arm_cmn_start_txn, + .commit_txn = arm_cmn_commit_txn, + .cancel_txn = arm_cmn_end_txn, + }; + + this_id = atomic_fetch_inc(&id); + if (this_id == 0) { + name = "arm_cmn"; + } else { + name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", this_id); + if (!name) + return -ENOMEM; + } + + err = cpuhp_state_add_instance(arm_cmn_hp_state, &cmn->cpuhp_node); + if (err) + return err; + + err = perf_pmu_register(&cmn->pmu, name, -1); + if (err) + cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node); + return err; +} + +static int arm_cmn_remove(struct platform_device *pdev) +{ + struct arm_cmn *cmn = platform_get_drvdata(pdev); + int i; + + writel_relaxed(0, cmn->dtc[0].base + CMN_DT_DTC_CTL); + + perf_pmu_unregister(&cmn->pmu); + cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node); + + for (i = 0; i < cmn->num_dtcs; i++) + irq_set_affinity_hint(cmn->dtc[i].irq, NULL); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id arm_cmn_of_match[] = { + { .compatible = "arm,cmn-600", }, + {} +}; +MODULE_DEVICE_TABLE(of, arm_cmn_of_match); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id arm_cmn_acpi_match[] = { + { "ARMHC600", }, + {} +}; +MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match); +#endif + +static struct platform_driver arm_cmn_driver = { + .driver = { + .name = "arm-cmn", + .of_match_table = of_match_ptr(arm_cmn_of_match), + .acpi_match_table = ACPI_PTR(arm_cmn_acpi_match), + }, + .probe = arm_cmn_probe, + .remove = arm_cmn_remove, +}; + +static int __init arm_cmn_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/arm/cmn:online", NULL, + arm_cmn_pmu_offline_cpu); + if (ret < 0) + return ret; + + arm_cmn_hp_state = ret; + ret = platform_driver_register(&arm_cmn_driver); + if (ret) + cpuhp_remove_multi_state(arm_cmn_hp_state); + return ret; +} + +static void __exit arm_cmn_exit(void) +{ + platform_driver_unregister(&arm_cmn_driver); + cpuhp_remove_multi_state(arm_cmn_hp_state); +} + +module_init(arm_cmn_init); +module_exit(arm_cmn_exit); + +MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>"); +MODULE_DESCRIPTION("Arm CMN-600 PMU driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index 96ed93cc78e6..98e68ed7db85 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -11,6 +11,7 @@ #define DRVNAME PMUNAME "_pmu" #define pr_fmt(fmt) DRVNAME ": " fmt +#include <linux/acpi.h> #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/bug.h> @@ -603,18 +604,19 @@ static struct dsu_pmu *dsu_pmu_alloc(struct platform_device *pdev) } /** - * dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster. + * dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster + * from device tree. */ -static int dsu_pmu_dt_get_cpus(struct device_node *dev, cpumask_t *mask) +static int dsu_pmu_dt_get_cpus(struct device *dev, cpumask_t *mask) { int i = 0, n, cpu; struct device_node *cpu_node; - n = of_count_phandle_with_args(dev, "cpus", NULL); + n = of_count_phandle_with_args(dev->of_node, "cpus", NULL); if (n <= 0) return -ENODEV; for (; i < n; i++) { - cpu_node = of_parse_phandle(dev, "cpus", i); + cpu_node = of_parse_phandle(dev->of_node, "cpus", i); if (!cpu_node) break; cpu = of_cpu_node_to_id(cpu_node); @@ -631,6 +633,36 @@ static int dsu_pmu_dt_get_cpus(struct device_node *dev, cpumask_t *mask) return 0; } +/** + * dsu_pmu_acpi_get_cpus: Get the list of CPUs in the cluster + * from ACPI. + */ +static int dsu_pmu_acpi_get_cpus(struct device *dev, cpumask_t *mask) +{ +#ifdef CONFIG_ACPI + int cpu; + + /* + * A dsu pmu node is inside a cluster parent node along with cpu nodes. + * We need to find out all cpus that have the same parent with this pmu. + */ + for_each_possible_cpu(cpu) { + struct acpi_device *acpi_dev; + struct device *cpu_dev = get_cpu_device(cpu); + + if (!cpu_dev) + continue; + + acpi_dev = ACPI_COMPANION(cpu_dev); + if (acpi_dev && + acpi_dev->parent == ACPI_COMPANION(dev)->parent) + cpumask_set_cpu(cpu, mask); + } +#endif + + return 0; +} + /* * dsu_pmu_probe_pmu: Probe the PMU details on a CPU in the cluster. */ @@ -676,6 +708,7 @@ static int dsu_pmu_device_probe(struct platform_device *pdev) { int irq, rc; struct dsu_pmu *dsu_pmu; + struct fwnode_handle *fwnode = dev_fwnode(&pdev->dev); char *name; static atomic_t pmu_idx = ATOMIC_INIT(-1); @@ -683,7 +716,16 @@ static int dsu_pmu_device_probe(struct platform_device *pdev) if (IS_ERR(dsu_pmu)) return PTR_ERR(dsu_pmu); - rc = dsu_pmu_dt_get_cpus(pdev->dev.of_node, &dsu_pmu->associated_cpus); + if (IS_ERR_OR_NULL(fwnode)) + return -ENOENT; + + if (is_of_node(fwnode)) + rc = dsu_pmu_dt_get_cpus(&pdev->dev, &dsu_pmu->associated_cpus); + else if (is_acpi_device_node(fwnode)) + rc = dsu_pmu_acpi_get_cpus(&pdev->dev, &dsu_pmu->associated_cpus); + else + return -ENOENT; + if (rc) { dev_warn(&pdev->dev, "Failed to parse the CPUs\n"); return rc; @@ -752,11 +794,21 @@ static const struct of_device_id dsu_pmu_of_match[] = { { .compatible = "arm,dsu-pmu", }, {}, }; +MODULE_DEVICE_TABLE(of, dsu_pmu_of_match); + +#ifdef CONFIG_ACPI +static const struct acpi_device_id dsu_pmu_acpi_match[] = { + { "ARMHD500", 0}, + {}, +}; +MODULE_DEVICE_TABLE(acpi, dsu_pmu_acpi_match); +#endif static struct platform_driver dsu_pmu_driver = { .driver = { .name = DRVNAME, .of_match_table = of_match_ptr(dsu_pmu_of_match), + .acpi_match_table = ACPI_PTR(dsu_pmu_acpi_match), .suppress_bind_attrs = true, }, .probe = dsu_pmu_device_probe, @@ -826,7 +878,6 @@ static void __exit dsu_pmu_exit(void) module_init(dsu_pmu_init); module_exit(dsu_pmu_exit); -MODULE_DEVICE_TABLE(of, dsu_pmu_of_match); MODULE_DESCRIPTION("Perf driver for ARM DynamIQ Shared Unit"); MODULE_AUTHOR("Suzuki K Poulose <suzuki.poulose@arm.com>"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index df352b334ea7..cb2f55f450e4 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -26,8 +26,84 @@ #include <asm/irq_regs.h> +static int armpmu_count_irq_users(const int irq); + +struct pmu_irq_ops { + void (*enable_pmuirq)(unsigned int irq); + void (*disable_pmuirq)(unsigned int irq); + void (*free_pmuirq)(unsigned int irq, int cpu, void __percpu *devid); +}; + +static void armpmu_free_pmuirq(unsigned int irq, int cpu, void __percpu *devid) +{ + free_irq(irq, per_cpu_ptr(devid, cpu)); +} + +static const struct pmu_irq_ops pmuirq_ops = { + .enable_pmuirq = enable_irq, + .disable_pmuirq = disable_irq_nosync, + .free_pmuirq = armpmu_free_pmuirq +}; + +static void armpmu_free_pmunmi(unsigned int irq, int cpu, void __percpu *devid) +{ + free_nmi(irq, per_cpu_ptr(devid, cpu)); +} + +static const struct pmu_irq_ops pmunmi_ops = { + .enable_pmuirq = enable_nmi, + .disable_pmuirq = disable_nmi_nosync, + .free_pmuirq = armpmu_free_pmunmi +}; + +static void armpmu_enable_percpu_pmuirq(unsigned int irq) +{ + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static void armpmu_free_percpu_pmuirq(unsigned int irq, int cpu, + void __percpu *devid) +{ + if (armpmu_count_irq_users(irq) == 1) + free_percpu_irq(irq, devid); +} + +static const struct pmu_irq_ops percpu_pmuirq_ops = { + .enable_pmuirq = armpmu_enable_percpu_pmuirq, + .disable_pmuirq = disable_percpu_irq, + .free_pmuirq = armpmu_free_percpu_pmuirq +}; + +static void armpmu_enable_percpu_pmunmi(unsigned int irq) +{ + if (!prepare_percpu_nmi(irq)) + enable_percpu_nmi(irq, IRQ_TYPE_NONE); +} + +static void armpmu_disable_percpu_pmunmi(unsigned int irq) +{ + disable_percpu_nmi(irq); + teardown_percpu_nmi(irq); +} + +static void armpmu_free_percpu_pmunmi(unsigned int irq, int cpu, + void __percpu *devid) +{ + if (armpmu_count_irq_users(irq) == 1) + free_percpu_nmi(irq, devid); +} + +static const struct pmu_irq_ops percpu_pmunmi_ops = { + .enable_pmuirq = armpmu_enable_percpu_pmunmi, + .disable_pmuirq = armpmu_disable_percpu_pmunmi, + .free_pmuirq = armpmu_free_percpu_pmunmi +}; + static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu); static DEFINE_PER_CPU(int, cpu_irq); +static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops); + +static bool has_nmi; static inline u64 arm_pmu_event_max_period(struct perf_event *event) { @@ -544,6 +620,23 @@ static int armpmu_count_irq_users(const int irq) return count; } +static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq) +{ + const struct pmu_irq_ops *ops = NULL; + int cpu; + + for_each_possible_cpu(cpu) { + if (per_cpu(cpu_irq, cpu) != irq) + continue; + + ops = per_cpu(cpu_irq_ops, cpu); + if (ops) + break; + } + + return ops; +} + void armpmu_free_irq(int irq, int cpu) { if (per_cpu(cpu_irq, cpu) == 0) @@ -551,18 +644,18 @@ void armpmu_free_irq(int irq, int cpu) if (WARN_ON(irq != per_cpu(cpu_irq, cpu))) return; - if (!irq_is_percpu_devid(irq)) - free_irq(irq, per_cpu_ptr(&cpu_armpmu, cpu)); - else if (armpmu_count_irq_users(irq) == 1) - free_percpu_irq(irq, &cpu_armpmu); + per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, &cpu_armpmu); per_cpu(cpu_irq, cpu) = 0; + per_cpu(cpu_irq_ops, cpu) = NULL; } int armpmu_request_irq(int irq, int cpu) { int err = 0; const irq_handler_t handler = armpmu_dispatch_irq; + const struct pmu_irq_ops *irq_ops; + if (!irq) return 0; @@ -582,17 +675,44 @@ int armpmu_request_irq(int irq, int cpu) IRQF_NO_THREAD; irq_set_status_flags(irq, IRQ_NOAUTOEN); - err = request_irq(irq, handler, irq_flags, "arm-pmu", + + err = request_nmi(irq, handler, irq_flags, "arm-pmu", per_cpu_ptr(&cpu_armpmu, cpu)); + + /* If cannot get an NMI, get a normal interrupt */ + if (err) { + err = request_irq(irq, handler, irq_flags, "arm-pmu", + per_cpu_ptr(&cpu_armpmu, cpu)); + irq_ops = &pmuirq_ops; + } else { + has_nmi = true; + irq_ops = &pmunmi_ops; + } } else if (armpmu_count_irq_users(irq) == 0) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &cpu_armpmu); + err = request_percpu_nmi(irq, handler, "arm-pmu", &cpu_armpmu); + + /* If cannot get an NMI, get a normal interrupt */ + if (err) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &cpu_armpmu); + irq_ops = &percpu_pmuirq_ops; + } else { + has_nmi= true; + irq_ops = &percpu_pmunmi_ops; + } + } else { + /* Per cpudevid irq was already requested by another CPU */ + irq_ops = armpmu_find_irq_ops(irq); + + if (WARN_ON(!irq_ops)) + err = -EINVAL; } if (err) goto err_out; per_cpu(cpu_irq, cpu) = irq; + per_cpu(cpu_irq_ops, cpu) = irq_ops; return 0; err_out: @@ -625,12 +745,8 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node) per_cpu(cpu_armpmu, cpu) = pmu; irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq) { - if (irq_is_percpu_devid(irq)) - enable_percpu_irq(irq, IRQ_TYPE_NONE); - else - enable_irq(irq); - } + if (irq) + per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq); return 0; } @@ -644,12 +760,8 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) return 0; irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq) { - if (irq_is_percpu_devid(irq)) - disable_percpu_irq(irq); - else - disable_irq_nosync(irq); - } + if (irq) + per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq); per_cpu(cpu_armpmu, cpu) = NULL; @@ -870,8 +982,9 @@ int armpmu_register(struct arm_pmu *pmu) if (!__oprofile_cpu_pmu) __oprofile_cpu_pmu = pmu; - pr_info("enabled with %s PMU driver, %d counters available\n", - pmu->name, pmu->num_events); + pr_info("enabled with %s PMU driver, %d counters available%s\n", + pmu->name, pmu->num_events, + has_nmi ? ", using NMIs" : ""); return 0; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 25b0c97b3eb0..b59ec22169ab 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -14,6 +14,7 @@ #include <linux/cpumask.h> #include <linux/device.h> #include <linux/kernel.h> +#include <linux/module.h> #include <linux/perf_event.h> #include <linux/types.h> diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index aac9823b0c6b..e116815fa809 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -805,14 +805,17 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, list_for_each_entry(rentry, &list, node) { if (resource_type(rentry->res) == IORESOURCE_MEM) { res = *rentry->res; + rentry = NULL; break; } } + acpi_dev_free_resource_list(&list); - if (!rentry->res) + if (rentry) { + dev_err(dev, "PMU type %d: Fail to find resource\n", type); return NULL; + } - acpi_dev_free_resource_list(&list); base = devm_ioremap_resource(dev, &res); if (IS_ERR(base)) { dev_err(dev, "PMU type %d: Fail to map resource\n", type); diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index edac28cd25dd..633cf07ba672 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1453,17 +1453,6 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id) } #if defined(CONFIG_ACPI) -static int acpi_pmu_dev_add_resource(struct acpi_resource *ares, void *data) -{ - struct resource *res = data; - - if (ares->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) - acpi_dev_resource_memory(ares, res); - - /* Always tell the ACPI core to skip this resource */ - return 1; -} - static struct xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu, struct acpi_device *adev, u32 type) @@ -1475,6 +1464,7 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu, struct hw_pmu_info *inf; void __iomem *dev_csr; struct resource res; + struct resource_entry *rentry; int enable_bit; int rc; @@ -1483,11 +1473,23 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu, return NULL; INIT_LIST_HEAD(&resource_list); - rc = acpi_dev_get_resources(adev, &resource_list, - acpi_pmu_dev_add_resource, &res); + rc = acpi_dev_get_resources(adev, &resource_list, NULL, NULL); + if (rc <= 0) { + dev_err(dev, "PMU type %d: No resources found\n", type); + return NULL; + } + + list_for_each_entry(rentry, &resource_list, node) { + if (resource_type(rentry->res) == IORESOURCE_MEM) { + res = *rentry->res; + rentry = NULL; + break; + } + } acpi_dev_free_resource_list(&resource_list); - if (rc < 0) { - dev_err(dev, "PMU type %d: No resource address found\n", type); + + if (rentry) { + dev_err(dev, "PMU type %d: No memory resource found\n", type); return NULL; } diff --git a/fs/namespace.c b/fs/namespace.c index bae0e95b3713..32a0b9146757 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3075,7 +3075,7 @@ static void shrink_submounts(struct mount *mnt) void *copy_mount_options(const void __user * data) { char *copy; - unsigned size; + unsigned left, offset; if (!data) return NULL; @@ -3084,16 +3084,27 @@ void *copy_mount_options(const void __user * data) if (!copy) return ERR_PTR(-ENOMEM); - size = PAGE_SIZE - offset_in_page(data); + left = copy_from_user(copy, data, PAGE_SIZE); - if (copy_from_user(copy, data, size)) { + /* + * Not all architectures have an exact copy_from_user(). Resort to + * byte at a time. + */ + offset = PAGE_SIZE - left; + while (left) { + char c; + if (get_user(c, (const char __user *)data + offset)) + break; + copy[offset] = c; + left--; + offset++; + } + + if (left == PAGE_SIZE) { kfree(copy); return ERR_PTR(-EFAULT); } - if (size != PAGE_SIZE) { - if (copy_from_user(copy + size, data + size, PAGE_SIZE - size)) - memset(copy + size, 0, PAGE_SIZE - size); - } + return copy; } diff --git a/fs/proc/page.c b/fs/proc/page.c index f909243d4a66..9f1077d94cde 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -217,6 +217,9 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); +#ifdef CONFIG_64BIT + u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); +#endif return u; }; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 5066b0251ed8..35172a91148e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -653,6 +653,10 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_MERGEABLE)] = "mg", [ilog2(VM_UFFD_MISSING)]= "um", [ilog2(VM_UFFD_WP)] = "uw", +#ifdef CONFIG_ARM64_MTE + [ilog2(VM_MTE)] = "mt", + [ilog2(VM_MTE_ALLOWED)] = "", +#endif #ifdef CONFIG_ARCH_HAS_PKEYS /* These come out via ProtectionKey: */ [ilog2(VM_PKEY_BIT0)] = "", diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 6db030439e29..dbf4f08d42e5 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -27,6 +27,7 @@ struct kvm_pmu { bool ready; bool created; bool irq_level; + struct irq_work overflow_work; }; #define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready) diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h index abd20ef93c98..eee1877a354e 100644 --- a/include/linux/kernel-page-flags.h +++ b/include/linux/kernel-page-flags.h @@ -17,5 +17,6 @@ #define KPF_ARCH 38 #define KPF_UNCACHED 39 #define KPF_SOFTDIRTY 40 +#define KPF_ARCH_2 41 #endif /* LINUX_KERNEL_PAGE_FLAGS_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 16b799a0522c..13dc9b9ccf8e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -342,6 +342,14 @@ extern unsigned int kobjsize(const void *objp); # define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ #endif +#if defined(CONFIG_ARM64_MTE) +# define VM_MTE VM_HIGH_ARCH_0 /* Use Tagged memory for access control */ +# define VM_MTE_ALLOWED VM_HIGH_ARCH_1 /* Tagged memory permitted */ +#else +# define VM_MTE VM_NONE +# define VM_MTE_ALLOWED VM_NONE +#endif + #ifndef VM_GROWSUP # define VM_GROWSUP VM_NONE #endif diff --git a/include/linux/mman.h b/include/linux/mman.h index 6f34c33075f9..629cefc4ecba 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -78,13 +78,18 @@ static inline void vm_unacct_memory(long pages) } /* - * Allow architectures to handle additional protection bits + * Allow architectures to handle additional protection and flag bits. The + * overriding macros must be defined in the arch-specific asm/mman.h file. */ #ifndef arch_calc_vm_prot_bits #define arch_calc_vm_prot_bits(prot, pkey) 0 #endif +#ifndef arch_calc_vm_flag_bits +#define arch_calc_vm_flag_bits(flags) 0 +#endif + #ifndef arch_vm_get_page_prot #define arch_vm_get_page_prot(vm_flags) __pgprot(0) #endif @@ -103,6 +108,19 @@ static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) #define arch_validate_prot arch_validate_prot #endif +#ifndef arch_validate_flags +/* + * This is called from mmap() and mprotect() with the updated vma->vm_flags. + * + * Returns true if the VM_* flags are valid. + */ +static inline bool arch_validate_flags(unsigned long flags) +{ + return true; +} +#define arch_validate_flags arch_validate_flags +#endif + /* * Optimisation macro. It is equivalent to: * (x & bit1) ? bit2 : 0 @@ -135,7 +153,8 @@ calc_vm_flag_bits(unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | - _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ); + _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | + arch_calc_vm_flag_bits(flags); } unsigned long vm_commit_limit(void); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6be1aa559b1e..276140c94f4a 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -136,6 +136,9 @@ enum pageflags { PG_young, PG_idle, #endif +#ifdef CONFIG_64BIT + PG_arch_2, +#endif __NR_PAGEFLAGS, /* Filesystems */ diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 5b616dde9a4c..505480217cf1 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -73,6 +73,7 @@ enum armpmu_attr_groups { ARMPMU_ATTR_GROUP_COMMON, ARMPMU_ATTR_GROUP_EVENTS, ARMPMU_ATTR_GROUP_FORMATS, + ARMPMU_ATTR_GROUP_CAPS, ARMPMU_NR_ATTR_GROUPS }; @@ -109,6 +110,8 @@ struct arm_pmu { struct notifier_block cpu_pm_nb; /* the attr_groups array must be NULL-terminated */ const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; + /* store the PMMIR_EL1 to expose slots */ + u64 reg_pmmir; /* Only to be used by ACPI probing code */ unsigned long acpi_cpuid; diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 90654cb63e9e..38c33eabea89 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -633,6 +633,34 @@ static inline int arch_unmap_one(struct mm_struct *mm, } #endif +/* + * Allow architectures to preserve additional metadata associated with + * swapped-out pages. The corresponding __HAVE_ARCH_SWAP_* macros and function + * prototypes must be defined in the arch-specific asm/pgtable.h file. + */ +#ifndef __HAVE_ARCH_PREPARE_TO_SWAP +static inline int arch_prepare_to_swap(struct page *page) +{ + return 0; +} +#endif + +#ifndef __HAVE_ARCH_SWAP_INVALIDATE +static inline void arch_swap_invalidate_page(int type, pgoff_t offset) +{ +} + +static inline void arch_swap_invalidate_area(int type) +{ +} +#endif + +#ifndef __HAVE_ARCH_SWAP_RESTORE +static inline void arch_swap_restore(swp_entry_t entry, struct page *page) +{ +} +#endif + #ifndef __HAVE_ARCH_PGD_OFFSET_GATE #define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) #endif diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index b7af8cc13eda..50e2df30b0aa 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -29,14 +29,11 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size); * stack_trace_consume_fn - Callback for arch_stack_walk() * @cookie: Caller supplied pointer handed back by arch_stack_walk() * @addr: The stack entry address to consume - * @reliable: True when the stack entry is reliable. Required by - * some printk based consumers. * * Return: True, if the entry was consumed or skipped * False, if there is no space left to store */ -typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr, - bool reliable); +typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr); /** * arch_stack_walk - Architecture specific function to walk the stack * @consume_entry: Callback which is invoked by the architecture code for diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 5fb752034386..67018d367b9f 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -79,6 +79,12 @@ #define IF_HAVE_PG_IDLE(flag,string) #endif +#ifdef CONFIG_64BIT +#define IF_HAVE_PG_ARCH_2(flag,string) ,{1UL << flag, string} +#else +#define IF_HAVE_PG_ARCH_2(flag,string) +#endif + #define __def_pageflag_names \ {1UL << PG_locked, "locked" }, \ {1UL << PG_waiters, "waiters" }, \ @@ -105,7 +111,8 @@ IF_HAVE_PG_MLOCK(PG_mlocked, "mlocked" ) \ IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ IF_HAVE_PG_IDLE(PG_young, "young" ) \ -IF_HAVE_PG_IDLE(PG_idle, "idle" ) +IF_HAVE_PG_IDLE(PG_idle, "idle" ) \ +IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) #define show_page_flags(flags) \ (flags) ? __print_flags(flags, "|", \ diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index cb3d6c267181..7aacf9389010 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -229,7 +229,9 @@ typedef struct siginfo { #define SEGV_ACCADI 5 /* ADI not enabled for mapped object */ #define SEGV_ADIDERR 6 /* Disrupting MCD error */ #define SEGV_ADIPERR 7 /* Precise MCD exception */ -#define NSIGSEGV 7 +#define SEGV_MTEAERR 8 /* Asynchronous ARM MTE error */ +#define SEGV_MTESERR 9 /* Synchronous ARM MTE exception */ +#define NSIGSEGV 9 /* * SIGBUS si_codes diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 22220945a5fd..30f68b42eeb5 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -425,6 +425,7 @@ typedef struct elf64_shdr { #define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */ #define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */ #define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ +#define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 07b4f8131e36..7f0827705c9a 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -233,6 +233,15 @@ struct prctl_mm_map { #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* MTE tag check fault modes */ +# define PR_MTE_TCF_SHIFT 1 +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +/* MTE tag inclusion mask */ +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 946f44a9e86a..9f8117c7cfdd 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -78,8 +78,7 @@ struct stacktrace_cookie { unsigned int len; }; -static bool stack_trace_consume_entry(void *cookie, unsigned long addr, - bool reliable) +static bool stack_trace_consume_entry(void *cookie, unsigned long addr) { struct stacktrace_cookie *c = cookie; @@ -94,12 +93,11 @@ static bool stack_trace_consume_entry(void *cookie, unsigned long addr, return c->len < c->size; } -static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr, - bool reliable) +static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr) { if (in_sched_functions(addr)) return true; - return stack_trace_consume_entry(cookie, addr, reliable); + return stack_trace_consume_entry(cookie, addr); } /** diff --git a/mm/huge_memory.c b/mm/huge_memory.c index da397779a6d4..ec0f0cc49545 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2370,6 +2370,9 @@ static void __split_huge_page_tail(struct page *head, int tail, (1L << PG_workingset) | (1L << PG_locked) | (1L << PG_unevictable) | +#ifdef CONFIG_64BIT + (1L << PG_arch_2) | +#endif (1L << PG_dirty))); /* ->mapping in first tail page is compound_mapcount */ diff --git a/mm/mmap.c b/mm/mmap.c index bdd19f5b994e..f793eb72a060 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1816,6 +1816,15 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vma_set_anonymous(vma); } + /* Allow architectures to sanity-check the vm_flags */ + if (!arch_validate_flags(vma->vm_flags)) { + error = -EINVAL; + if (file) + goto unmap_and_free_vma; + else + goto free_vma; + } + vma_link(mm, vma, prev, rb_link, rb_parent); /* Once vma denies write, undo our temporary denial count */ if (file) { diff --git a/mm/mprotect.c b/mm/mprotect.c index ce8b8a5eacbb..56c02beb6041 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -603,6 +603,12 @@ static int do_mprotect_pkey(unsigned long start, size_t len, goto out; } + /* Allow architectures to sanity-check the new flags */ + if (!arch_validate_flags(newflags)) { + error = -EINVAL; + goto out; + } + error = security_file_mprotect(vma, reqprot, prot); if (error) goto out; diff --git a/mm/page_io.c b/mm/page_io.c index e485a6e8a6cd..4ca28aad0d94 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -252,6 +252,16 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); goto out; } + /* + * Arch code may have to preserve more data than just the page + * contents, e.g. memory tags. + */ + ret = arch_prepare_to_swap(page); + if (ret) { + set_page_dirty(page); + unlock_page(page); + goto out; + } if (frontswap_store(page) == 0) { set_page_writeback(page); unlock_page(page); diff --git a/mm/shmem.c b/mm/shmem.c index 8e2b35ba93ad..d42c27e4769f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1736,6 +1736,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, } wait_on_page_writeback(page); + /* + * Some architectures may have to restore extra metadata to the + * physical page after reading from swap. + */ + arch_swap_restore(swap, page); + if (shmem_should_replace_page(page, gfp)) { error = shmem_replace_page(&page, gfp, info, index); if (error) @@ -2269,6 +2275,9 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags &= ~(VM_MAYWRITE); } + /* arm64 - allow memory tagging on RAM-based files */ + vma->vm_flags |= VM_MTE_ALLOWED; + file_accessed(file); vma->vm_ops = &shmem_vm_ops; if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && diff --git a/mm/swapfile.c b/mm/swapfile.c index debc94155f74..4951f5339a2f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -717,6 +717,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, else swap_slot_free_notify = NULL; while (offset <= end) { + arch_swap_invalidate_page(si->type, offset); frontswap_invalidate_page(si->type, offset); if (swap_slot_free_notify) swap_slot_free_notify(si->bdev, offset); @@ -2682,6 +2683,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) frontswap_map = frontswap_map_get(p); spin_unlock(&p->lock); spin_unlock(&swap_lock); + arch_swap_invalidate_area(p->type); frontswap_invalidate_area(p->type); frontswap_map_set(p, NULL); mutex_unlock(&swapon_mutex); diff --git a/mm/util.c b/mm/util.c index 5ef378a2a038..4e21fe7eae27 100644 --- a/mm/util.c +++ b/mm/util.c @@ -957,7 +957,7 @@ out: return res; } -int memcmp_pages(struct page *page1, struct page *page2) +int __weak memcmp_pages(struct page *page1, struct page *page2) { char *addr1, *addr2; int ret; diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 93b567d23c8b..2c9d012797a7 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal +ARM64_SUBTARGETS ?= tags signal pauth fp mte else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore new file mode 100644 index 000000000000..d66f76d2a650 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -0,0 +1,5 @@ +fpsimd-test +sve-probe-vls +sve-ptrace +sve-test +vlset diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile new file mode 100644 index 000000000000..a57009d3a0dc --- /dev/null +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +TEST_GEN_PROGS := sve-ptrace sve-probe-vls +TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset + +all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) + +fpsimd-test: fpsimd-test.o + $(CC) -nostdlib $^ -o $@ +sve-ptrace: sve-ptrace.o sve-ptrace-asm.o +sve-probe-vls: sve-probe-vls.o +sve-test: sve-test.o + $(CC) -nostdlib $^ -o $@ +vlset: vlset.o + +include ../../lib.mk diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README new file mode 100644 index 000000000000..03e3dad865d8 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/README @@ -0,0 +1,100 @@ +This directory contains a mix of tests integrated with kselftest and +standalone stress tests. + +kselftest tests +=============== + +sve-probe-vls - Checks the SVE vector length enumeration interface +sve-ptrace - Checks the SVE ptrace interface + +Running the non-kselftest tests +=============================== + +sve-stress performs an SVE context switch stress test, as described +below. + +(The fpsimd-stress test works the same way; just substitute "fpsimd" for +"sve" in the following commands.) + + +The test runs until killed by the user. + +If no context switch error was detected, you will see output such as +the following: + +$ ./sve-stress +(wait for some time) +^C +Vector length: 512 bits +PID: 1573 +Terminated by signal 15, no error, iterations=9467, signals=1014 +Vector length: 512 bits +PID: 1575 +Terminated by signal 15, no error, iterations=9448, signals=1028 +Vector length: 512 bits +PID: 1577 +Terminated by signal 15, no error, iterations=9436, signals=1039 +Vector length: 512 bits +PID: 1579 +Terminated by signal 15, no error, iterations=9421, signals=1039 +Vector length: 512 bits +PID: 1581 +Terminated by signal 15, no error, iterations=9403, signals=1039 +Vector length: 512 bits +PID: 1583 +Terminated by signal 15, no error, iterations=9385, signals=1036 +Vector length: 512 bits +PID: 1585 +Terminated by signal 15, no error, iterations=9376, signals=1039 +Vector length: 512 bits +PID: 1587 +Terminated by signal 15, no error, iterations=9361, signals=1039 +Vector length: 512 bits +PID: 1589 +Terminated by signal 15, no error, iterations=9350, signals=1039 + + +If an error was detected, details of the mismatch will be printed +instead of "no error". + +Ideally, the test should be allowed to run for many minutes or hours +to maximise test coverage. + + +KVM stress testing +================== + +To try to reproduce the bugs that we have been observing, sve-stress +should be run in parallel in two KVM guests, while simultaneously +running on the host. + +1) Start 2 guests, using the following command for each: + +$ lkvm run --console=virtio -pconsole=hvc0 --sve Image + +(Depending on the hardware GIC implementation, you may also need +--irqchip=gicv3. New kvmtool defaults to that if appropriate, but I +can't remember whether my branch is new enough for that. Try without +the option first.) + +Kvmtool occupies the terminal until you kill it (Ctrl+A x), +or until the guest terminates. It is therefore recommended to run +each instance in separate terminal (use screen or ssh etc.) This +allows multiple guests to be run in parallel while running other +commands on the host. + +Within the guest, the host filesystem is accessible, mounted on /host. + +2) Run the sve-stress on *each* guest with the Vector-Length set to 32: +guest$ ./vlset --inherit 32 ./sve-stress + +3) Run the sve-stress on the host with the maximum Vector-Length: +host$ ./vlset --inherit --max ./sve-stress + + +Again, the test should be allowed to run for many minutes or hours to +maximise test coverage. + +If no error is detected, you will see output from each sve-stress +instance similar to that illustrated above; otherwise details of the +observed mismatches will be printed. diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h new file mode 100644 index 000000000000..a180851496ec --- /dev/null +++ b/tools/testing/selftests/arm64/fp/asm-offsets.h @@ -0,0 +1,11 @@ +#define sa_sz 32 +#define sa_flags 8 +#define sa_handler 0 +#define sa_mask_sz 8 +#define SIGUSR1 10 +#define SIGTERM 15 +#define SIGINT 2 +#define SIGABRT 6 +#define SA_NODEFER 1073741824 +#define SA_SIGINFO 4 +#define ucontext_regs 184 diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h new file mode 100644 index 000000000000..8944f2189206 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> + +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +.macro __for from:req, to:req + .if (\from) == (\to) + _for__body %\from + .else + __for \from, %(\from) + ((\to) - (\from)) / 2 + __for %(\from) + ((\to) - (\from)) / 2 + 1, \to + .endif +.endm + +.macro _for var:req, from:req, to:req, insn:vararg + .macro _for__body \var:req + .noaltmacro + \insn + .altmacro + .endm + + .altmacro + __for \from, \to + .noaltmacro + + .purgem _for__body +.endm + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +.macro define_accessor name, num, insn + .macro \name\()_entry n + \insn \n, 1 + ret + .endm + +function \name + adr x2, .L__accessor_tbl\@ + add x2, x2, x0, lsl #3 + br x2 + +.L__accessor_tbl\@: + _for x, 0, (\num) - 1, \name\()_entry \x +endfunction + + .purgem \name\()_entry +.endm + +#endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress new file mode 100755 index 000000000000..781b5b022eaf --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fpsimd-stress @@ -0,0 +1,60 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT +trap child_died CHLD + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./fpsimd-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S new file mode 100644 index 000000000000..1c5556bdd11d --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -0,0 +1,482 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +// +// Simple FPSIMD context switch test +// Repeatedly writes unique test patterns into each FPSIMD register +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" + +#define NVR 32 +#define MAXVL_B (128 / 8) + +.macro _vldr Vn:req, Xt:req + ld1 {v\Vn\().2d}, [x\Xt] +.endm + +.macro _vstr Vn:req, Xt:req + st1 {v\Vn\().2d}, [x\Xt] +.endm + +// Generate accessor functions to read/write programmatically selected +// FPSIMD registers. +// x0 is the register index to access +// x1 is the memory address to read from (getv,setp) or store to (setv,setp) +// All clobber x0-x2 +define_accessor setv, NVR, _vldr +define_accessor getv, NVR, _vstr + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + +// Declare some storate space to shadow the SVE register contents: +.pushsection .text +.data +.align 4 +vref: + .space MAXVL_B * NVR +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in SVE registers +// x0: pid (16 bits) +// x1: register number (6 bits) +// x2: generation (4 bits) +function pattern + orr w1, w0, w1, lsl #16 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for FPSIMD V-register V<xn> +.macro _adrv xd, xn, nrtmp + ldr \xd, =vref + mov x\nrtmp, #16 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a FPSIMD V-register +// x0: pid +// x1: register number +// x2: generation +function setup_vreg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrv x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setv + + ret x4 +endfunction + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 1f + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne barf + subs x2, x2, #1 + b.ne 0b + +1: ret +endfunction + +// Verify that a FPSIMD V-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x5. +function check_vreg + mov x3, x30 + + _adrv x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getv + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Any SVE register modified here can cause corruption in the main +// thread -- but *only* the registers modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random V-regs + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #7 + movi v9.16b, #9 + movi v31.8b, #31 + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + // Sanity-check and report the vector length + + mov x19, #128 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "Bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "Vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + + mov x21, #0 // Set up V-regs & shadow with test pattern +0: mov x0, x20 + mov x1, x21 + and x2, x22, #0xf + bl setup_vreg + add x21, x21, #1 + cmp x21, #NVR + b.lo 0b + +// Can't do this when SVE state is volatile across SVC: + mov x8, #__NR_sched_yield // Encourage preemption + svc #0 + + mov x21, #0 +0: mov x0, x21 + bl check_vreg + add x21, x21, #1 + cmp x21, #NVR + b.lo 0b + + add x22, x22, #1 + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mistatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", reg=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c new file mode 100644 index 000000000000..b29cbc642c57 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 ARM Limited. + * Original author: Dave Martin <Dave.Martin@arm.com> + */ +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <asm/sigcontext.h> + +#include "../../kselftest.h" + +int main(int argc, char **argv) +{ + unsigned int vq; + int vl; + static unsigned int vqs[SVE_VQ_MAX]; + unsigned int nvqs = 0; + + ksft_print_header(); + ksft_set_plan(2); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + ksft_exit_skip("SVE not available"); + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SVE_SET_VL, vq * 16); + if (vl == -1) + ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + vl &= PR_SVE_VL_LEN_MASK; + + if (!sve_vl_valid(vl)) + ksft_exit_fail_msg("VL %d invalid\n", vl); + vq = sve_vq_from_vl(vl); + + if (!(nvqs < SVE_VQ_MAX)) + ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n", + nvqs); + vqs[nvqs++] = vq; + } + ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs); + ksft_test_result_pass("All vector lengths valid\n"); + + /* Print out the vector lengths in ascending order: */ + while (nvqs--) + ksft_print_msg("%u\n", 16 * vqs[nvqs]); + + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S new file mode 100644 index 000000000000..3e81f9fab574 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +#include <asm/unistd.h> + +.arch_extension sve + +.globl sve_store_patterns + +sve_store_patterns: + mov x1, x0 + + index z0.b, #0, #1 + str q0, [x1] + + mov w8, #__NR_getpid + svc #0 + str q0, [x1, #0x10] + + mov z1.d, z0.d + str q0, [x1, #0x20] + + mov w8, #__NR_getpid + svc #0 + str q0, [x1, #0x30] + + mov z1.d, z0.d + str q0, [x1, #0x40] + + ret + +.size sve_store_patterns, . - sve_store_patterns +.type sve_store_patterns, @function diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c new file mode 100644 index 000000000000..b2282be6f938 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 ARM Limited. + * Original author: Dave Martin <Dave.Martin@arm.com> + */ +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <asm/sigcontext.h> +#include <asm/ptrace.h> + +#include "../../kselftest.h" + +/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ +#ifndef NT_ARM_SVE +#define NT_ARM_SVE 0x405 +#endif + +/* Number of registers filled in by sve_store_patterns */ +#define NR_VREGS 5 + +void sve_store_patterns(__uint128_t v[NR_VREGS]); + +static void dump(const void *buf, size_t size) +{ + size_t i; + const unsigned char *p = buf; + + for (i = 0; i < size; ++i) + printf(" %.2x", *p++); +} + +static int check_vregs(const __uint128_t vregs[NR_VREGS]) +{ + int i; + int ok = 1; + + for (i = 0; i < NR_VREGS; ++i) { + printf("# v[%d]:", i); + dump(&vregs[i], sizeof vregs[i]); + putchar('\n'); + + if (vregs[i] != vregs[0]) + ok = 0; + } + + return ok; +} + +static int do_child(void) +{ + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + + return EXIT_SUCCESS; +} + +static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size) +{ + struct user_sve_header *sve; + void *p; + size_t sz = sizeof *sve; + struct iovec iov; + + while (1) { + if (*size < sz) { + p = realloc(*buf, sz); + if (!p) { + errno = ENOMEM; + goto error; + } + + *buf = p; + *size = sz; + } + + iov.iov_base = *buf; + iov.iov_len = sz; + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov)) + goto error; + + sve = *buf; + if (sve->size <= sz) + break; + + sz = sve->size; + } + + return sve; + +error: + return NULL; +} + +static int set_sve(pid_t pid, const struct user_sve_header *sve) +{ + struct iovec iov; + + iov.iov_base = (void *)sve; + iov.iov_len = sve->size; + return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov); +} + +static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num, + unsigned int vlmax) +{ + unsigned int vq; + unsigned int i; + + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) + ksft_exit_fail_msg("Dumping non-SVE register\n"); + + if (vlmax > sve->vl) + vlmax = sve->vl; + + vq = sve_vq_from_vl(sve->vl); + for (i = 0; i < num; ++i) { + printf("# z%u:", i); + dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i), + vlmax); + printf("%s\n", vlmax == sve->vl ? "" : " ..."); + } +} + +static int do_parent(pid_t child) +{ + int ret = EXIT_FAILURE; + pid_t pid; + int status; + siginfo_t si; + void *svebuf = NULL, *newsvebuf; + size_t svebufsz = 0, newsvebufsz; + struct user_sve_header *sve, *new_sve; + struct user_fpsimd_state *fpsimd; + unsigned int i, j; + unsigned char *p; + unsigned int vq; + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + perror("wait"); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n", + status); + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) + goto disappeared; + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) + goto disappeared; + + ksft_test_result_fail("PTRACE_CONT: %s\n", + strerror(errno)); + goto error; + } + } + + sve = get_sve(pid, &svebuf, &svebufsz); + if (!sve) { + int e = errno; + + ksft_test_result_fail("get_sve: %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } else { + ksft_test_result_pass("get_sve\n"); + } + + ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, + "FPSIMD registers\n"); + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) + goto error; + + fpsimd = (struct user_fpsimd_state *)((char *)sve + + SVE_PT_FPSIMD_OFFSET); + for (i = 0; i < 32; ++i) { + p = (unsigned char *)&fpsimd->vregs[i]; + + for (j = 0; j < sizeof fpsimd->vregs[i]; ++j) + p[j] = j; + } + + if (set_sve(pid, sve)) { + int e = errno; + + ksft_test_result_fail("set_sve(FPSIMD): %s\n", + strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + vq = sve_vq_from_vl(sve->vl); + + newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1); + new_sve = newsvebuf = malloc(newsvebufsz); + if (!new_sve) { + errno = ENOMEM; + perror(NULL); + goto error; + } + + *new_sve = *sve; + new_sve->flags &= ~SVE_PT_REGS_MASK; + new_sve->flags |= SVE_PT_REGS_SVE; + memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0), + 0, SVE_PT_SVE_ZREG_SIZE(vq)); + new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1); + if (set_sve(pid, new_sve)) { + int e = errno; + + ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + new_sve = get_sve(pid, &newsvebuf, &newsvebufsz); + if (!new_sve) { + int e = errno; + + ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE, + "SVE registers\n"); + if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) + goto error; + + dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]); + + p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1); + for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) { + unsigned char expected = i; + + if (__BYTE_ORDER == __BIG_ENDIAN) + expected = sizeof fpsimd->vregs[0] - 1 - expected; + + ksft_test_result(p[i] == expected, "p[%d] == expected\n", i); + if (p[i] != expected) + goto error; + } + + ret = EXIT_SUCCESS; + +error: + kill(child, SIGKILL); + +disappeared: + return ret; +} + +int main(void) +{ + int ret = EXIT_SUCCESS; + __uint128_t v[NR_VREGS]; + pid_t child; + + ksft_print_header(); + ksft_set_plan(20); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + ksft_exit_skip("SVE not available\n"); + + sve_store_patterns(v); + + if (!check_vregs(v)) + ksft_exit_fail_msg("Initial check_vregs() failed\n"); + + child = fork(); + if (!child) + return do_child(); + + if (do_parent(child)) + ret = EXIT_FAILURE; + + ksft_print_cnts(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress new file mode 100755 index 000000000000..24dd0922cc02 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-stress @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./sve-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S new file mode 100644 index 000000000000..f95074c9b48b --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -0,0 +1,672 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +// +// Simple Scalable Vector Extension context switch test +// Repeatedly writes unique test patterns into each SVE register +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" + +#define NZR 32 +#define NPR 16 +#define MAXVL_B (2048 / 8) + +.arch_extension sve + +.macro _sve_ldr_v zt, xn + ldr z\zt, [x\xn] +.endm + +.macro _sve_str_v zt, xn + str z\zt, [x\xn] +.endm + +.macro _sve_ldr_p pt, xn + ldr p\pt, [x\xn] +.endm + +.macro _sve_str_p pt, xn + str p\pt, [x\xn] +.endm + +// Generate accessor functions to read/write programmatically selected +// SVE registers. +// x0 is the register index to access +// x1 is the memory address to read from (getz,setp) or store to (setz,setp) +// All clobber x0-x2 +define_accessor setz, NZR, _sve_ldr_v +define_accessor getz, NZR, _sve_str_v +define_accessor setp, NPR, _sve_ldr_p +define_accessor getp, NPR, _sve_str_p + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + +// Declare some storate space to shadow the SVE register contents: +.pushsection .text +.data +.align 4 +zref: + .space MAXVL_B * NZR +pref: + .space MAXVL_B / 8 * NPR +ffrref: + .space MAXVL_B / 8 +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in SVE registers +// x0: pid (16 bits) +// x1: register number (6 bits) +// x2: generation (4 bits) + +// These values are used to constuct a 32-bit pattern that is repeated in the +// scratch buffer as many times as will fit: +// bits 31:28 generation number (increments once per test_loop) +// bits 27:22 32-bit lane index +// bits 21:16 register number +// bits 15: 0 pid + +function pattern + orr w1, w0, w1, lsl #16 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for SVE Z-register Z<xn> +.macro _adrz xd, xn, nrtmp + ldr \xd, =zref + rdvl x\nrtmp, #1 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Get the address of shadow data for SVE P-register P<xn - NZR> +.macro _adrp xd, xn, nrtmp + ldr \xd, =pref + rdvl x\nrtmp, #1 + lsr x\nrtmp, x\nrtmp, #3 + sub \xn, \xn, #NZR + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a SVE Z-register +// x0: pid +// x1: register number +// x2: generation +function setup_zreg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrz x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setz + + ret x4 +endfunction + +// Set up test pattern in a SVE P-register +// x0: pid +// x1: register number +// x2: generation +function setup_preg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrp x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setp + + ret x4 +endfunction + +// Set up test pattern in the FFR +// x0: pid +// x2: generation +// Beware: corrupts P0. +function setup_ffr + mov x4, x30 + + bl pattern + ldr x0, =ffrref + ldr x1, =scratch + rdvl x2, #1 + lsr x2, x2, #3 + bl memcpy + + mov x0, #0 + ldr x1, =ffrref + bl setp + + wrffr p0.b + + ret x4 +endfunction + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 2f + + stp x0, x1, [sp, #-0x20]! + str x2, [sp, #0x10] + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne 1f + subs x2, x2, #1 + b.ne 0b + +1: ldr x2, [sp, #0x10] + ldp x0, x1, [sp], #0x20 + b.ne barf + +2: ret +endfunction + +// Verify that a SVE Z-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x7. +function check_zreg + mov x3, x30 + + _adrz x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getz + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Verify that a SVE P-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x7. +function check_preg + mov x3, x30 + + _adrp x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getp + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Verify that the FFR matches its shadow in memory, else abort +// Beware -- corrupts P0. +// Clobbers x0-x5. +function check_ffr + mov x3, x30 + + ldr x4, =scratch + rdvl x5, #1 + lsr x5, x5, #3 + + mov x0, x4 + mov x1, x5 + bl memfill_ae + + rdffr p0.b + mov x0, #0 + mov x1, x4 + bl getp + + ldr x0, =ffrref + mov x1, x4 + mov x2, x5 + mov x30, x3 + b memcmp +endfunction + +// Any SVE register modified here can cause corruption in the main +// thread -- but *only* the registers modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random Z-regs + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #1 + movi v9.16b, #2 + movi v31.8b, #3 + // And P0 + rdffr p0.b + // And FFR + wrffr p15.b + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + // Sanity-check and report the vector length + + rdvl x19, #8 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "Bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "Vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + rdvl x0, #8 + cmp x0, x19 + b.ne vl_barf + + mov x21, #0 // Set up Z-regs & shadow with test pattern +0: mov x0, x20 + mov x1, x21 + and x2, x22, #0xf + bl setup_zreg + add x21, x21, #1 + cmp x21, #NZR + b.lo 0b + + mov x0, x20 // Set up FFR & shadow with test pattern + mov x1, #NZR + NPR + and x2, x22, #0xf + bl setup_ffr + +0: mov x0, x20 // Set up P-regs & shadow with test pattern + mov x1, x21 + and x2, x22, #0xf + bl setup_preg + add x21, x21, #1 + cmp x21, #NZR + NPR + b.lo 0b + +// Can't do this when SVE state is volatile across SVC: +// mov x8, #__NR_sched_yield // Encourage preemption +// svc #0 + + mov x21, #0 +0: mov x0, x21 + bl check_zreg + add x21, x21, #1 + cmp x21, #NZR + b.lo 0b + +0: mov x0, x21 + bl check_preg + add x21, x21, #1 + cmp x21, #NZR + NPR + b.lo 0b + + bl check_ffr + + add x22, x22, #1 + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// end hack + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mistatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", reg=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_getpid + svc #0 +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// ^ end of hack + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +// mov x8, #__NR_exit +// mov x1, #1 +// svc #0 +endfunction + +function vl_barf + mov x10, x0 + + puts "Bad active VL: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c new file mode 100644 index 000000000000..308d27a68226 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/vlset.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2019 ARM Limited. + * Original author: Dave Martin <Dave.Martin@arm.com> + */ +#define _GNU_SOURCE +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <asm/hwcap.h> +#include <asm/sigcontext.h> + +static int inherit = 0; +static int no_inherit = 0; +static int force = 0; +static unsigned long vl; + +static const struct option options[] = { + { "force", no_argument, NULL, 'f' }, + { "inherit", no_argument, NULL, 'i' }, + { "max", no_argument, NULL, 'M' }, + { "no-inherit", no_argument, &no_inherit, 1 }, + { "help", no_argument, NULL, '?' }, + {} +}; + +static char const *program_name; + +static int parse_options(int argc, char **argv) +{ + int c; + char *rest; + + program_name = strrchr(argv[0], '/'); + if (program_name) + ++program_name; + else + program_name = argv[0]; + + while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1) + switch (c) { + case 'M': vl = SVE_VL_MAX; break; + case 'f': force = 1; break; + case 'i': inherit = 1; break; + case 0: break; + default: goto error; + } + + if (inherit && no_inherit) + goto error; + + if (!vl) { + /* vector length */ + if (optind >= argc) + goto error; + + errno = 0; + vl = strtoul(argv[optind], &rest, 0); + if (*rest) { + vl = ULONG_MAX; + errno = EINVAL; + } + if (vl == ULONG_MAX && errno) { + fprintf(stderr, "%s: %s: %s\n", + program_name, argv[optind], strerror(errno)); + goto error; + } + + ++optind; + } + + /* command */ + if (optind >= argc) + goto error; + + return 0; + +error: + fprintf(stderr, + "Usage: %s [-f | --force] " + "[-i | --inherit | --no-inherit] " + "{-M | --max | <vector length>} " + "<command> [<arguments> ...]\n", + program_name); + return -1; +} + +int main(int argc, char **argv) +{ + int ret = 126; /* same as sh(1) command-not-executable error */ + long flags; + char *path; + int t, e; + + if (parse_options(argc, argv)) + return 2; /* same as sh(1) builtin incorrect-usage */ + + if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) { + fprintf(stderr, "%s: Invalid vector length %lu\n", + program_name, vl); + return 2; /* same as sh(1) builtin incorrect-usage */ + } + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + fprintf(stderr, "%s: Scalable Vector Extension not present\n", + program_name); + + if (!force) + goto error; + + fputs("Going ahead anyway (--force): " + "This is a debug option. Don't rely on it.\n", + stderr); + } + + flags = PR_SVE_SET_VL_ONEXEC; + if (inherit) + flags |= PR_SVE_VL_INHERIT; + + t = prctl(PR_SVE_SET_VL, vl | flags); + if (t < 0) { + fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n", + program_name, strerror(errno)); + goto error; + } + + t = prctl(PR_SVE_GET_VL); + if (t == -1) { + fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n", + program_name, strerror(errno)); + goto error; + } + flags = PR_SVE_VL_LEN_MASK; + flags = t & ~flags; + + assert(optind < argc); + path = argv[optind]; + + execvp(path, &argv[optind]); + e = errno; + if (errno == ENOENT) + ret = 127; /* same as sh(1) not-found error */ + fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e)); + +error: + return ret; /* same as sh(1) not-executable error */ +} diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore new file mode 100644 index 000000000000..bc3ac63f3314 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/.gitignore @@ -0,0 +1,6 @@ +check_buffer_fill +check_tags_inclusion +check_child_memory +check_mmap_options +check_ksm_options +check_user_mem diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile new file mode 100644 index 000000000000..2480226dfe57 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020 ARM Limited + +CFLAGS += -std=gnu99 -I. +SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) +PROGS := $(patsubst %.c,%,$(SRCS)) + +#Add mte compiler option +ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),) +CFLAGS += -march=armv8.5-a+memtag +endif + +#check if the compiler works well +mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) + +ifeq ($(mte_cc_support),1) +# Generated binaries to be installed by top KSFT script +TEST_GEN_PROGS := $(PROGS) + +# Get Kernel headers installed and use them. +KSFT_KHDR_INSTALL := 1 +endif + +# Include KSFT lib.mk. +include ../../lib.mk + +ifeq ($(mte_cc_support),1) +$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S +endif diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c new file mode 100644 index 000000000000..242635d79035 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <stddef.h> +#include <stdio.h> +#include <string.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define OVERFLOW_RANGE MT_GRANULE_SIZE + +static int sizes[] = { + 1, 555, 1033, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, + /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 +}; + +enum mte_block_test_alloc { + UNTAGGED_TAGGED, + TAGGED_UNTAGGED, + TAGGED_TAGGED, + BLOCK_ALLOC_MAX, +}; + +static int check_buffer_by_byte(int mem_type, int mode) +{ + char *ptr; + int i, j, item; + bool err; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + + for (i = 0; i < item; i++) { + ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true); + if (check_allocated_memory(ptr, sizes[i], mem_type, true) != KSFT_PASS) + return KSFT_FAIL; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i]); + /* Set some value in tagged memory */ + for (j = 0; j < sizes[i]; j++) + ptr[j] = '1'; + mte_wait_after_trig(); + err = cur_mte_cxt.fault_valid; + /* Check the buffer whether it is filled. */ + for (j = 0; j < sizes[i] && !err; j++) { + if (ptr[j] != '1') + err = true; + } + mte_free_memory((void *)ptr, sizes[i], mem_type, true); + + if (err) + break; + } + if (!err) + return KSFT_PASS; + else + return KSFT_FAIL; +} + +static int check_buffer_underflow_by_byte(int mem_type, int mode, + int underflow_range) +{ + char *ptr; + int i, j, item, last_index; + bool err; + char *und_ptr = NULL; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + for (i = 0; i < item; i++) { + ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, + underflow_range, 0); + if (check_allocated_memory_range(ptr, sizes[i], mem_type, + underflow_range, 0) != KSFT_PASS) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, -underflow_range); + last_index = 0; + /* Set some value in tagged memory and make the buffer underflow */ + for (j = sizes[i] - 1; (j >= -underflow_range) && + (cur_mte_cxt.fault_valid == false); j--) { + ptr[j] = '1'; + last_index = j; + } + mte_wait_after_trig(); + err = false; + /* Check whether the buffer is filled */ + for (j = 0; j < sizes[i]; j++) { + if (ptr[j] != '1') { + err = true; + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + j, ptr); + break; + } + } + if (err) + goto check_buffer_underflow_by_byte_err; + + switch (mode) { + case MTE_NONE_ERR: + if (cur_mte_cxt.fault_valid == true || last_index != -underflow_range) { + err = true; + break; + } + /* There were no fault so the underflow area should be filled */ + und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr - underflow_range); + for (j = 0 ; j < underflow_range; j++) { + if (und_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_ASYNC_ERR: + /* Imprecise fault should occur otherwise return error */ + if (cur_mte_cxt.fault_valid == false) { + err = true; + break; + } + /* + * The imprecise fault is checked after the write to the buffer, + * so the underflow area before the fault should be filled. + */ + und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr); + for (j = last_index ; j < 0 ; j++) { + if (und_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_SYNC_ERR: + /* Precise fault should occur otherwise return error */ + if (!cur_mte_cxt.fault_valid || (last_index != (-1))) { + err = true; + break; + } + /* Underflow area should not be filled */ + und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr); + if (und_ptr[-1] == '1') + err = true; + break; + default: + err = true; + break; + } +check_buffer_underflow_by_byte_err: + mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, underflow_range, 0); + if (err) + break; + } + return (err ? KSFT_FAIL : KSFT_PASS); +} + +static int check_buffer_overflow_by_byte(int mem_type, int mode, + int overflow_range) +{ + char *ptr; + int i, j, item, last_index; + bool err; + size_t tagged_size, overflow_size; + char *over_ptr = NULL; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + for (i = 0; i < item; i++) { + ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, + 0, overflow_range); + if (check_allocated_memory_range(ptr, sizes[i], mem_type, + 0, overflow_range) != KSFT_PASS) + return KSFT_FAIL; + + tagged_size = MT_ALIGN_UP(sizes[i]); + + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i] + overflow_range); + + /* Set some value in tagged memory and make the buffer underflow */ + for (j = 0, last_index = 0 ; (j < (sizes[i] + overflow_range)) && + (cur_mte_cxt.fault_valid == false); j++) { + ptr[j] = '1'; + last_index = j; + } + mte_wait_after_trig(); + err = false; + /* Check whether the buffer is filled */ + for (j = 0; j < sizes[i]; j++) { + if (ptr[j] != '1') { + err = true; + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + j, ptr); + break; + } + } + if (err) + goto check_buffer_overflow_by_byte_err; + + overflow_size = overflow_range - (tagged_size - sizes[i]); + + switch (mode) { + case MTE_NONE_ERR: + if ((cur_mte_cxt.fault_valid == true) || + (last_index != (sizes[i] + overflow_range - 1))) { + err = true; + break; + } + /* There were no fault so the overflow area should be filled */ + over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size); + for (j = 0 ; j < overflow_size; j++) { + if (over_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_ASYNC_ERR: + /* Imprecise fault should occur otherwise return error */ + if (cur_mte_cxt.fault_valid == false) { + err = true; + break; + } + /* + * The imprecise fault is checked after the write to the buffer, + * so the overflow area should be filled before the fault. + */ + over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr); + for (j = tagged_size ; j < last_index; j++) { + if (over_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_SYNC_ERR: + /* Precise fault should occur otherwise return error */ + if (!cur_mte_cxt.fault_valid || (last_index != tagged_size)) { + err = true; + break; + } + /* Underflow area should not be filled */ + over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size); + for (j = 0 ; j < overflow_size; j++) { + if (over_ptr[j] == '1') + err = true; + } + break; + default: + err = true; + break; + } +check_buffer_overflow_by_byte_err: + mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, 0, overflow_range); + if (err) + break; + } + return (err ? KSFT_FAIL : KSFT_PASS); +} + +static int check_buffer_by_block_iterate(int mem_type, int mode, size_t size) +{ + char *src, *dst; + int j, result = KSFT_PASS; + enum mte_block_test_alloc alloc_type = UNTAGGED_TAGGED; + + for (alloc_type = UNTAGGED_TAGGED; alloc_type < (int) BLOCK_ALLOC_MAX; alloc_type++) { + switch (alloc_type) { + case UNTAGGED_TAGGED: + src = (char *)mte_allocate_memory(size, mem_type, 0, false); + if (check_allocated_memory(src, size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + dst = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) { + mte_free_memory((void *)src, size, mem_type, false); + return KSFT_FAIL; + } + + break; + case TAGGED_UNTAGGED: + dst = (char *)mte_allocate_memory(size, mem_type, 0, false); + if (check_allocated_memory(dst, size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + src = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) { + mte_free_memory((void *)dst, size, mem_type, false); + return KSFT_FAIL; + } + break; + case TAGGED_TAGGED: + src = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) + return KSFT_FAIL; + + dst = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) { + mte_free_memory((void *)src, size, mem_type, true); + return KSFT_FAIL; + } + break; + default: + return KSFT_FAIL; + } + + cur_mte_cxt.fault_valid = false; + result = KSFT_PASS; + mte_initialize_current_context(mode, (uintptr_t)dst, size); + /* Set some value in memory and copy*/ + memset((void *)src, (int)'1', size); + memcpy((void *)dst, (void *)src, size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid) { + result = KSFT_FAIL; + goto check_buffer_by_block_err; + } + /* Check the buffer whether it is filled. */ + for (j = 0; j < size; j++) { + if (src[j] != dst[j] || src[j] != '1') { + result = KSFT_FAIL; + break; + } + } +check_buffer_by_block_err: + mte_free_memory((void *)src, size, mem_type, + MT_FETCH_TAG((uintptr_t)src) ? true : false); + mte_free_memory((void *)dst, size, mem_type, + MT_FETCH_TAG((uintptr_t)dst) ? true : false); + if (result != KSFT_PASS) + return result; + } + return result; +} + +static int check_buffer_by_block(int mem_type, int mode) +{ + int i, item, result = KSFT_PASS; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + cur_mte_cxt.fault_valid = false; + for (i = 0; i < item; i++) { + result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]); + if (result != KSFT_PASS) + break; + } + return result; +} + +static int compare_memory_tags(char *ptr, size_t size, int tag) +{ + int i, new_tag; + + for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) { + new_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i))); + if (tag != new_tag) { + ksft_print_msg("FAIL: child mte tag mismatch\n"); + return KSFT_FAIL; + } + } + return KSFT_PASS; +} + +static int check_memory_initial_tags(int mem_type, int mode, int mapping) +{ + char *ptr; + int run, fd; + int total = sizeof(sizes)/sizeof(int); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + /* check initial tags for anonymous mmap */ + ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false); + if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) { + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + return KSFT_FAIL; + } + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + + /* check initial tags for file mmap */ + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + ptr = (char *)mte_allocate_file_memory(sizes[run], mem_type, mapping, false, fd); + if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) { + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + close(fd); + return KSFT_FAIL; + } + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + close(fd); + } + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + size_t page_size = getpagesize(); + int item = sizeof(sizes)/sizeof(int); + + sizes[item - 3] = page_size - 1; + sizes[item - 2] = page_size; + sizes[item - 1] = page_size + 1; + + err = mte_default_setup(); + if (err) + return err; + + /* Register SIGSEGV handler */ + mte_register_signal(SIGSEGV, mte_default_handler); + + /* Buffer by byte tests */ + evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR), + "Check buffer correctness by byte with sync err mode and mmap memory\n"); + evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_ASYNC_ERR), + "Check buffer correctness by byte with async err mode and mmap memory\n"); + evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_SYNC_ERR), + "Check buffer correctness by byte with sync err mode and mmap/mprotect memory\n"); + evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_ASYNC_ERR), + "Check buffer correctness by byte with async err mode and mmap/mprotect memory\n"); + + /* Check buffer underflow with underflow size as 16 */ + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write underflow by byte with sync mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write underflow by byte with async mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE), + "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n"); + + /* Check buffer underflow with underflow size as page size */ + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, page_size), + "Check buffer write underflow by byte with sync mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, page_size), + "Check buffer write underflow by byte with async mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, page_size), + "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n"); + + /* Check buffer overflow with overflow size as 16 */ + evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write overflow by byte with sync mode and mmap memory\n"); + evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write overflow by byte with async mode and mmap memory\n"); + evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE), + "Check buffer write overflow by byte with tag fault ignore mode and mmap memory\n"); + + /* Buffer by block tests */ + evaluate_test(check_buffer_by_block(USE_MMAP, MTE_SYNC_ERR), + "Check buffer write correctness by block with sync mode and mmap memory\n"); + evaluate_test(check_buffer_by_block(USE_MMAP, MTE_ASYNC_ERR), + "Check buffer write correctness by block with async mode and mmap memory\n"); + evaluate_test(check_buffer_by_block(USE_MMAP, MTE_NONE_ERR), + "Check buffer write correctness by block with tag fault ignore and mmap memory\n"); + + /* Initial tags are supposed to be 0 */ + evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check initial tags with private mapping, sync error mode and mmap memory\n"); + evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check initial tags with private mapping, sync error mode and mmap/mprotect memory\n"); + evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check initial tags with shared mapping, sync error mode and mmap memory\n"); + evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED), + "Check initial tags with shared mapping, sync error mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c new file mode 100644 index 000000000000..97bebdecd29e --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/wait.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define BUFFER_SIZE (5 * MT_GRANULE_SIZE) +#define RUNS (MT_TAG_COUNT) +#define UNDERFLOW MT_GRANULE_SIZE +#define OVERFLOW MT_GRANULE_SIZE + +static size_t page_size; +static int sizes[] = { + 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, + /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 +}; + +static int check_child_tag_inheritance(char *ptr, int size, int mode) +{ + int i, parent_tag, child_tag, fault, child_status; + pid_t child; + + parent_tag = MT_FETCH_TAG((uintptr_t)ptr); + fault = 0; + + child = fork(); + if (child == -1) { + ksft_print_msg("FAIL: child process creation\n"); + return KSFT_FAIL; + } else if (child == 0) { + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + /* Do copy on write */ + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) { + fault = 1; + goto check_child_tag_inheritance_err; + } + for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) { + child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i))); + if (parent_tag != child_tag) { + ksft_print_msg("FAIL: child mte tag mismatch\n"); + fault = 1; + goto check_child_tag_inheritance_err; + } + } + mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW); + memset(ptr - UNDERFLOW, '2', UNDERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false) { + fault = 1; + goto check_child_tag_inheritance_err; + } + mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW); + memset(ptr + size, '3', OVERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false) { + fault = 1; + goto check_child_tag_inheritance_err; + } +check_child_tag_inheritance_err: + _exit(fault); + } + /* Wait for child process to terminate */ + wait(&child_status); + if (WIFEXITED(child_status)) + fault = WEXITSTATUS(child_status); + else + fault = 1; + return (fault) ? KSFT_FAIL : KSFT_PASS; +} + +static int check_child_memory_mapping(int mem_type, int mode, int mapping) +{ + char *ptr; + int run, result; + int item = sizeof(sizes)/sizeof(int); + + item = sizeof(sizes)/sizeof(int); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < item; run++) { + ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, + UNDERFLOW, OVERFLOW); + if (check_allocated_memory_range(ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW) != KSFT_PASS) + return KSFT_FAIL; + result = check_child_tag_inheritance(ptr, sizes[run], mode); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); + if (result == KSFT_FAIL) + return result; + } + return KSFT_PASS; +} + +static int check_child_file_mapping(int mem_type, int mode, int mapping) +{ + char *ptr, *map_ptr; + int run, fd, map_size, result = KSFT_PASS; + int total = sizeof(sizes)/sizeof(int); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + + map_size = sizes[run] + OVERFLOW + UNDERFLOW; + map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + ptr = map_ptr + UNDERFLOW; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)ptr, sizes[run]); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on file based memory\n"); + munmap((void *)map_ptr, map_size); + close(fd); + return KSFT_FAIL; + } + result = check_child_tag_inheritance(ptr, sizes[run], mode); + mte_clear_tags((void *)ptr, sizes[run]); + munmap((void *)map_ptr, map_size); + close(fd); + if (result != KSFT_PASS) + return KSFT_FAIL; + } + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + int item = sizeof(sizes)/sizeof(int); + + page_size = getpagesize(); + if (!page_size) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + sizes[item - 3] = page_size - 1; + sizes[item - 2] = page_size; + sizes[item - 1] = page_size + 1; + + err = mte_default_setup(); + if (err) + return err; + + /* Register SIGSEGV handler */ + mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler); + + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child anonymous memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check child anonymous memory with shared mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check child anonymous memory with private mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check child anonymous memory with shared mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child anonymous memory with private mapping, precise mode and mmap/mprotect memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED), + "Check child anonymous memory with shared mapping, precise mode and mmap/mprotect memory\n"); + + evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child file memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check child file memory with shared mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check child file memory with private mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check child file memory with shared mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child file memory with private mapping, precise mode and mmap/mprotect memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED), + "Check child file memory with shared mapping, precise mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c new file mode 100644 index 000000000000..bc41ae630c86 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/mman.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define TEST_UNIT 10 +#define PATH_KSM "/sys/kernel/mm/ksm/" +#define MAX_LOOP 4 + +static size_t page_sz; +static unsigned long ksm_sysfs[5]; + +static unsigned long read_sysfs(char *str) +{ + FILE *f; + unsigned long val = 0; + + f = fopen(str, "r"); + if (!f) { + ksft_print_msg("ERR: missing %s\n", str); + return 0; + } + fscanf(f, "%lu", &val); + fclose(f); + return val; +} + +static void write_sysfs(char *str, unsigned long val) +{ + FILE *f; + + f = fopen(str, "w"); + if (!f) { + ksft_print_msg("ERR: missing %s\n", str); + return; + } + fprintf(f, "%lu", val); + fclose(f); +} + +static void mte_ksm_setup(void) +{ + ksm_sysfs[0] = read_sysfs(PATH_KSM "merge_across_nodes"); + write_sysfs(PATH_KSM "merge_across_nodes", 1); + ksm_sysfs[1] = read_sysfs(PATH_KSM "sleep_millisecs"); + write_sysfs(PATH_KSM "sleep_millisecs", 0); + ksm_sysfs[2] = read_sysfs(PATH_KSM "run"); + write_sysfs(PATH_KSM "run", 1); + ksm_sysfs[3] = read_sysfs(PATH_KSM "max_page_sharing"); + write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3] + TEST_UNIT); + ksm_sysfs[4] = read_sysfs(PATH_KSM "pages_to_scan"); + write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4] + TEST_UNIT); +} + +static void mte_ksm_restore(void) +{ + write_sysfs(PATH_KSM "merge_across_nodes", ksm_sysfs[0]); + write_sysfs(PATH_KSM "sleep_millisecs", ksm_sysfs[1]); + write_sysfs(PATH_KSM "run", ksm_sysfs[2]); + write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3]); + write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4]); +} + +static void mte_ksm_scan(void) +{ + int cur_count = read_sysfs(PATH_KSM "full_scans"); + int scan_count = cur_count + 1; + int max_loop_count = MAX_LOOP; + + while ((cur_count < scan_count) && max_loop_count) { + sleep(1); + cur_count = read_sysfs(PATH_KSM "full_scans"); + max_loop_count--; + } +#ifdef DEBUG + ksft_print_msg("INFO: pages_shared=%lu pages_sharing=%lu\n", + read_sysfs(PATH_KSM "pages_shared"), + read_sysfs(PATH_KSM "pages_sharing")); +#endif +} + +static int check_madvise_options(int mem_type, int mode, int mapping) +{ + char *ptr; + int err, ret; + + err = KSFT_FAIL; + if (access(PATH_KSM, F_OK) == -1) { + ksft_print_msg("ERR: Kernel KSM config not enabled\n"); + return err; + } + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true); + if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + /* Insert same data in all the pages */ + memset(ptr, 'A', TEST_UNIT * page_sz); + ret = madvise(ptr, TEST_UNIT * page_sz, MADV_MERGEABLE); + if (ret) { + ksft_print_msg("ERR: madvise failed to set MADV_UNMERGEABLE\n"); + goto madvise_err; + } + mte_ksm_scan(); + /* Tagged pages should not merge */ + if ((read_sysfs(PATH_KSM "pages_shared") < 1) || + (read_sysfs(PATH_KSM "pages_sharing") < (TEST_UNIT - 1))) + err = KSFT_PASS; +madvise_err: + mte_free_memory(ptr, TEST_UNIT * page_sz, mem_type, true); + return err; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + page_sz = getpagesize(); + if (!page_sz) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler); + /* Enable KSM */ + mte_ksm_setup(); + + evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check KSM mte page merge for private mapping, sync mode and mmap memory\n"); + evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check KSM mte page merge for private mapping, async mode and mmap memory\n"); + evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check KSM mte page merge for shared mapping, sync mode and mmap memory\n"); + evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check KSM mte page merge for shared mapping, async mode and mmap memory\n"); + + mte_ksm_restore(); + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c new file mode 100644 index 000000000000..33b13b86199b --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define RUNS (MT_TAG_COUNT) +#define UNDERFLOW MT_GRANULE_SIZE +#define OVERFLOW MT_GRANULE_SIZE +#define TAG_CHECK_ON 0 +#define TAG_CHECK_OFF 1 + +static size_t page_size; +static int sizes[] = { + 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, + /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 +}; + +static int check_mte_memory(char *ptr, int size, int mode, int tag_check) +{ + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW); + memset(ptr - UNDERFLOW, '2', UNDERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON) + return KSFT_FAIL; + if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW); + memset(ptr + size, '3', OVERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON) + return KSFT_FAIL; + if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +{ + char *ptr, *map_ptr; + int run, result, map_size; + int item = sizeof(sizes)/sizeof(int); + + item = sizeof(sizes)/sizeof(int); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < item; run++) { + map_size = sizes[run] + OVERFLOW + UNDERFLOW; + map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + ptr = map_ptr + UNDERFLOW; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)ptr, sizes[run]); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n"); + munmap((void *)map_ptr, map_size); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, tag_check); + mte_clear_tags((void *)ptr, sizes[run]); + mte_free_memory((void *)map_ptr, map_size, mem_type, false); + if (result == KSFT_FAIL) + return KSFT_FAIL; + } + return KSFT_PASS; +} + +static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +{ + char *ptr, *map_ptr; + int run, fd, map_size; + int total = sizeof(sizes)/sizeof(int); + int result = KSFT_PASS; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + + map_size = sizes[run] + UNDERFLOW + OVERFLOW; + map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + ptr = map_ptr + UNDERFLOW; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)ptr, sizes[run]); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on file based memory\n"); + munmap((void *)map_ptr, map_size); + close(fd); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, tag_check); + mte_clear_tags((void *)ptr, sizes[run]); + munmap((void *)map_ptr, map_size); + close(fd); + if (result == KSFT_FAIL) + break; + } + return result; +} + +static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) +{ + char *ptr, *map_ptr; + int run, prot_flag, result, fd, map_size; + int total = sizeof(sizes)/sizeof(int); + + prot_flag = PROT_READ | PROT_WRITE; + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + map_size = sizes[run] + OVERFLOW + UNDERFLOW; + ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, + UNDERFLOW, OVERFLOW); + if (check_allocated_memory_range(ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW) != KSFT_PASS) + return KSFT_FAIL; + map_ptr = ptr - UNDERFLOW; + /* Try to clear PROT_MTE property and verify it by tag checking */ + if (mprotect(map_ptr, map_size, prot_flag)) { + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW); + ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); + if (result != KSFT_PASS) + return KSFT_FAIL; + + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + ptr = (char *)mte_allocate_file_memory_tag_range(sizes[run], mem_type, mapping, + UNDERFLOW, OVERFLOW, fd); + if (check_allocated_memory_range(ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + map_ptr = ptr - UNDERFLOW; + /* Try to clear PROT_MTE property and verify it by tag checking */ + if (mprotect(map_ptr, map_size, prot_flag)) { + ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW); + close(fd); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); + close(fd); + if (result != KSFT_PASS) + return KSFT_FAIL; + } + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + int item = sizeof(sizes)/sizeof(int); + + err = mte_default_setup(); + if (err) + return err; + page_size = getpagesize(); + if (!page_size) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + sizes[item - 3] = page_size - 1; + sizes[item - 2] = page_size; + sizes[item - 1] = page_size + 1; + + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler); + + mte_enable_pstate_tco(); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n"); + + mte_disable_pstate_tco(); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n"); + + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); + + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); + + evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n"); + evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c new file mode 100644 index 000000000000..94d245a0ed56 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/wait.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define BUFFER_SIZE (5 * MT_GRANULE_SIZE) +#define RUNS (MT_TAG_COUNT * 2) +#define MTE_LAST_TAG_MASK (0x7FFF) + +static int verify_mte_pointer_validity(char *ptr, int mode) +{ + mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE); + /* Check the validity of the tagged pointer */ + memset((void *)ptr, '1', BUFFER_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid) + return KSFT_FAIL; + /* Proceed further for nonzero tags */ + if (!MT_FETCH_TAG((uintptr_t)ptr)) + return KSFT_PASS; + mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE + 1); + /* Check the validity outside the range */ + ptr[BUFFER_SIZE] = '2'; + mte_wait_after_trig(); + if (!cur_mte_cxt.fault_valid) + return KSFT_FAIL; + else + return KSFT_PASS; +} + +static int check_single_included_tags(int mem_type, int mode) +{ + char *ptr; + int tag, run, result = KSFT_PASS; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, + mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) { + mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag)); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* Check tag value */ + if (MT_FETCH_TAG((uintptr_t)ptr) == tag) { + ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + MT_FETCH_TAG((uintptr_t)ptr), + MT_INCLUDE_VALID_TAG(tag)); + result = KSFT_FAIL; + break; + } + result = verify_mte_pointer_validity(ptr, mode); + } + } + mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + return result; +} + +static int check_multiple_included_tags(int mem_type, int mode) +{ + char *ptr; + int tag, run, result = KSFT_PASS; + unsigned long excl_mask = 0; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, + mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) { + excl_mask |= 1 << tag; + mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask)); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* Check tag value */ + if (MT_FETCH_TAG((uintptr_t)ptr) < tag) { + ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + MT_FETCH_TAG((uintptr_t)ptr), + MT_INCLUDE_VALID_TAGS(excl_mask)); + result = KSFT_FAIL; + break; + } + result = verify_mte_pointer_validity(ptr, mode); + } + } + mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + return result; +} + +static int check_all_included_tags(int mem_type, int mode) +{ + char *ptr; + int run, result = KSFT_PASS; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, + mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + mte_switch_mode(mode, MT_INCLUDE_TAG_MASK); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* + * Here tag byte can be between 0x0 to 0xF (full allowed range) + * so no need to match so just verify if it is writable. + */ + result = verify_mte_pointer_validity(ptr, mode); + } + mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + return result; +} + +static int check_none_included_tags(int mem_type, int mode) +{ + char *ptr; + int run; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; run < RUNS; run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* Here all tags exluded so tag value generated should be 0 */ + if (MT_FETCH_TAG((uintptr_t)ptr)) { + ksft_print_msg("FAIL: included tag value found\n"); + mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, true); + return KSFT_FAIL; + } + mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE); + /* Check the write validity of the untagged pointer */ + memset((void *)ptr, '1', BUFFER_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid) + break; + } + mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false); + if (cur_mte_cxt.fault_valid) + return KSFT_FAIL; + else + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + + /* Register SIGSEGV handler */ + mte_register_signal(SIGSEGV, mte_default_handler); + + evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check an included tag value with sync mode\n"); + evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check different included tags value with sync mode\n"); + evaluate_test(check_none_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check none included tags value with sync mode\n"); + evaluate_test(check_all_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check all included tags value with sync mode\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c new file mode 100644 index 000000000000..594e98e76880 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <ucontext.h> +#include <unistd.h> +#include <sys/mman.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +static size_t page_sz; + +static int check_usermem_access_fault(int mem_type, int mode, int mapping) +{ + int fd, i, err; + char val = 'A'; + size_t len, read_len; + void *ptr, *ptr_next; + + err = KSFT_FAIL; + len = 2 * page_sz; + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + for (i = 0; i < len; i++) + write(fd, &val, sizeof(val)); + lseek(fd, 0, 0); + ptr = mte_allocate_memory(len, mem_type, mapping, true); + if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + mte_initialize_current_context(mode, (uintptr_t)ptr, len); + /* Copy from file into buffer with valid tag */ + read_len = read(fd, ptr, len); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid || read_len < len) + goto usermem_acc_err; + /* Verify same pattern is read */ + for (i = 0; i < len; i++) + if (*(char *)(ptr + i) != val) + break; + if (i < len) + goto usermem_acc_err; + + /* Tag the next half of memory with different value */ + ptr_next = (void *)((unsigned long)ptr + page_sz); + ptr_next = mte_insert_new_tag(ptr_next); + mte_set_tag_address_range(ptr_next, page_sz); + + lseek(fd, 0, 0); + /* Copy from file into buffer with invalid tag */ + read_len = read(fd, ptr, len); + mte_wait_after_trig(); + /* + * Accessing user memory in kernel with invalid tag should fail in sync + * mode without fault but may not fail in async mode as per the + * implemented MTE userspace support in Arm64 kernel. + */ + if (mode == MTE_SYNC_ERR && + !cur_mte_cxt.fault_valid && read_len < len) { + err = KSFT_PASS; + } else if (mode == MTE_ASYNC_ERR && + !cur_mte_cxt.fault_valid && read_len == len) { + err = KSFT_PASS; + } +usermem_acc_err: + mte_free_memory((void *)ptr, len, mem_type, true); + close(fd); + return err; +} + +int main(int argc, char *argv[]) +{ + int err; + + page_sz = getpagesize(); + if (!page_sz) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + err = mte_default_setup(); + if (err) + return err; + /* Register signal handlers */ + mte_register_signal(SIGSEGV, mte_default_handler); + + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check memory access from kernel in sync mode, shared mapping and mmap memory\n"); + + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check memory access from kernel in async mode, private mapping and mmap memory\n"); + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check memory access from kernel in async mode, shared mapping and mmap memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c new file mode 100644 index 000000000000..39f8908988ea --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include <fcntl.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <linux/auxvec.h> +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/prctl.h> + +#include <asm/hwcap.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define INIT_BUFFER_SIZE 256 + +struct mte_fault_cxt cur_mte_cxt; +static unsigned int mte_cur_mode; +static unsigned int mte_cur_pstate_tco; + +void mte_default_handler(int signum, siginfo_t *si, void *uc) +{ + unsigned long addr = (unsigned long)si->si_addr; + + if (signum == SIGSEGV) { +#ifdef DEBUG + ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n", + ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); +#endif + if (si->si_code == SEGV_MTEAERR) { + if (cur_mte_cxt.trig_si_code == si->si_code) + cur_mte_cxt.fault_valid = true; + return; + } + /* Compare the context for precise error */ + else if (si->si_code == SEGV_MTESERR) { + if (cur_mte_cxt.trig_si_code == si->si_code && + ((cur_mte_cxt.trig_range >= 0 && + addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + (cur_mte_cxt.trig_range < 0 && + addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) { + cur_mte_cxt.fault_valid = true; + /* Adjust the pc by 4 */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + } else { + ksft_print_msg("Invalid MTE synchronous exception caught!\n"); + exit(1); + } + } else { + ksft_print_msg("Unknown SIGSEGV exception caught!\n"); + exit(1); + } + } else if (signum == SIGBUS) { + ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n", + ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); + if ((cur_mte_cxt.trig_range >= 0 && + addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + (cur_mte_cxt.trig_range < 0 && + addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) { + cur_mte_cxt.fault_valid = true; + /* Adjust the pc by 4 */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + } + } +} + +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)) +{ + struct sigaction sa; + + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaction(signal, &sa, NULL); +} + +void mte_wait_after_trig(void) +{ + sched_yield(); +} + +void *mte_insert_tags(void *ptr, size_t size) +{ + void *tag_ptr; + int align_size; + + if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { + ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + return NULL; + } + align_size = MT_ALIGN_UP(size); + tag_ptr = mte_insert_random_tag(ptr); + mte_set_tag_address_range(tag_ptr, align_size); + return tag_ptr; +} + +void mte_clear_tags(void *ptr, size_t size) +{ + if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { + ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + return; + } + size = MT_ALIGN_UP(size); + ptr = (void *)MT_CLEAR_TAG((unsigned long)ptr); + mte_clear_tag_address_range(ptr, size); +} + +static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after, + bool tags, int fd) +{ + void *ptr; + int prot_flag, map_flag; + size_t entire_size = size + range_before + range_after; + + if (mem_type != USE_MALLOC && mem_type != USE_MMAP && + mem_type != USE_MPROTECT) { + ksft_print_msg("FAIL: Invalid allocate request\n"); + return NULL; + } + if (mem_type == USE_MALLOC) + return malloc(entire_size) + range_before; + + prot_flag = PROT_READ | PROT_WRITE; + if (mem_type == USE_MMAP) + prot_flag |= PROT_MTE; + + map_flag = mapping; + if (fd == -1) + map_flag = MAP_ANONYMOUS | map_flag; + if (!(mapping & MAP_SHARED)) + map_flag |= MAP_PRIVATE; + ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0); + if (ptr == MAP_FAILED) { + ksft_print_msg("FAIL: mmap allocation\n"); + return NULL; + } + if (mem_type == USE_MPROTECT) { + if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) { + munmap(ptr, size); + ksft_print_msg("FAIL: mprotect PROT_MTE property\n"); + return NULL; + } + } + if (tags) + ptr = mte_insert_tags(ptr + range_before, size); + return ptr; +} + +void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after) +{ + return __mte_allocate_memory_range(size, mem_type, mapping, range_before, + range_after, true, -1); +} + +void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags) +{ + return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, -1); +} + +void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags, int fd) +{ + int index; + char buffer[INIT_BUFFER_SIZE]; + + if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) { + ksft_print_msg("FAIL: Invalid mmap file request\n"); + return NULL; + } + /* Initialize the file for mappable size */ + lseek(fd, 0, SEEK_SET); + for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) + write(fd, buffer, INIT_BUFFER_SIZE); + index -= INIT_BUFFER_SIZE; + write(fd, buffer, size - index); + return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd); +} + +void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after, int fd) +{ + int index; + char buffer[INIT_BUFFER_SIZE]; + int map_size = size + range_before + range_after; + + if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) { + ksft_print_msg("FAIL: Invalid mmap file request\n"); + return NULL; + } + /* Initialize the file for mappable size */ + lseek(fd, 0, SEEK_SET); + for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE) + write(fd, buffer, INIT_BUFFER_SIZE); + index -= INIT_BUFFER_SIZE; + write(fd, buffer, map_size - index); + return __mte_allocate_memory_range(size, mem_type, mapping, range_before, + range_after, true, fd); +} + +static void __mte_free_memory_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after, bool tags) +{ + switch (mem_type) { + case USE_MALLOC: + free(ptr - range_before); + break; + case USE_MMAP: + case USE_MPROTECT: + if (tags) + mte_clear_tags(ptr, size); + munmap(ptr - range_before, size + range_before + range_after); + break; + default: + ksft_print_msg("FAIL: Invalid free request\n"); + break; + } +} + +void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after) +{ + __mte_free_memory_range(ptr, size, mem_type, range_before, range_after, true); +} + +void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags) +{ + __mte_free_memory_range(ptr, size, mem_type, 0, 0, tags); +} + +void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range) +{ + cur_mte_cxt.fault_valid = false; + cur_mte_cxt.trig_addr = ptr; + cur_mte_cxt.trig_range = range; + if (mode == MTE_SYNC_ERR) + cur_mte_cxt.trig_si_code = SEGV_MTESERR; + else if (mode == MTE_ASYNC_ERR) + cur_mte_cxt.trig_si_code = SEGV_MTEAERR; + else + cur_mte_cxt.trig_si_code = 0; +} + +int mte_switch_mode(int mte_option, unsigned long incl_mask) +{ + unsigned long en = 0; + + if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR || + mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) { + ksft_print_msg("FAIL: Invalid mte config option\n"); + return -EINVAL; + } + en = PR_TAGGED_ADDR_ENABLE; + if (mte_option == MTE_SYNC_ERR) + en |= PR_MTE_TCF_SYNC; + else if (mte_option == MTE_ASYNC_ERR) + en |= PR_MTE_TCF_ASYNC; + else if (mte_option == MTE_NONE_ERR) + en |= PR_MTE_TCF_NONE; + + en |= (incl_mask << PR_MTE_TAG_SHIFT); + /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */ + if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) { + ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n"); + return -EINVAL; + } + return 0; +} + +#define ID_AA64PFR1_MTE_SHIFT 8 +#define ID_AA64PFR1_MTE 2 + +int mte_default_setup(void) +{ + unsigned long hwcaps = getauxval(AT_HWCAP); + unsigned long en = 0; + int ret; + + if (!(hwcaps & HWCAP_CPUID)) { + ksft_print_msg("FAIL: CPUID registers unavailable\n"); + return KSFT_FAIL; + } + /* Read ID_AA64PFR1_EL1 register */ + asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory"); + if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) { + ksft_print_msg("FAIL: MTE features unavailable\n"); + return KSFT_SKIP; + } + /* Get current mte mode */ + ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0); + if (ret < 0) { + ksft_print_msg("FAIL:prctl PR_GET_TAGGED_ADDR_CTRL with error =%d\n", ret); + return KSFT_FAIL; + } + if (ret & PR_MTE_TCF_SYNC) + mte_cur_mode = MTE_SYNC_ERR; + else if (ret & PR_MTE_TCF_ASYNC) + mte_cur_mode = MTE_ASYNC_ERR; + else if (ret & PR_MTE_TCF_NONE) + mte_cur_mode = MTE_NONE_ERR; + + mte_cur_pstate_tco = mte_get_pstate_tco(); + /* Disable PSTATE.TCO */ + mte_disable_pstate_tco(); + return 0; +} + +void mte_restore_setup(void) +{ + mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG); + if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN) + mte_enable_pstate_tco(); + else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS) + mte_disable_pstate_tco(); +} + +int create_temp_file(void) +{ + int fd; + char filename[] = "/dev/shm/tmp_XXXXXX"; + + /* Create a file in the tmpfs filesystem */ + fd = mkstemp(&filename[0]); + if (fd == -1) { + ksft_print_msg("FAIL: Unable to open temporary file\n"); + return 0; + } + unlink(&filename[0]); + return fd; +} diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h new file mode 100644 index 000000000000..195a7d1879e6 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#ifndef _MTE_COMMON_UTIL_H +#define _MTE_COMMON_UTIL_H + +#include <signal.h> +#include <stdbool.h> +#include <stdlib.h> +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include "mte_def.h" +#include "kselftest.h" + +enum mte_mem_type { + USE_MALLOC, + USE_MMAP, + USE_MPROTECT, +}; + +enum mte_mode { + MTE_NONE_ERR, + MTE_SYNC_ERR, + MTE_ASYNC_ERR, +}; + +struct mte_fault_cxt { + /* Address start which triggers mte tag fault */ + unsigned long trig_addr; + /* Address range for mte tag fault and negative value means underflow */ + ssize_t trig_range; + /* siginfo si code */ + unsigned long trig_si_code; + /* Flag to denote if correct fault caught */ + bool fault_valid; +}; + +extern struct mte_fault_cxt cur_mte_cxt; + +/* MTE utility functions */ +void mte_default_handler(int signum, siginfo_t *si, void *uc); +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)); +void mte_wait_after_trig(void); +void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags); +void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after); +void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, + bool tags, int fd); +void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after, int fd); +void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags); +void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after); +void *mte_insert_tags(void *ptr, size_t size); +void mte_clear_tags(void *ptr, size_t size); +int mte_default_setup(void); +void mte_restore_setup(void); +int mte_switch_mode(int mte_option, unsigned long incl_mask); +void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range); + +/* Common utility functions */ +int create_temp_file(void); + +/* Assembly MTE utility functions */ +void *mte_insert_random_tag(void *ptr); +void *mte_insert_new_tag(void *ptr); +void *mte_get_tag_address(void *ptr); +void mte_set_tag_address_range(void *ptr, int range); +void mte_clear_tag_address_range(void *ptr, int range); +void mte_disable_pstate_tco(void); +void mte_enable_pstate_tco(void); +unsigned int mte_get_pstate_tco(void); + +/* Test framework static inline functions/macros */ +static inline void evaluate_test(int err, const char *msg) +{ + if (err == KSFT_PASS) + ksft_test_result_pass(msg); + else if (err == KSFT_FAIL) + ksft_test_result_fail(msg); +} + +static inline int check_allocated_memory(void *ptr, size_t size, + int mem_type, bool tags) +{ + if (ptr == NULL) { + ksft_print_msg("FAIL: memory allocation\n"); + return KSFT_FAIL; + } + + if (tags && !MT_FETCH_TAG((uintptr_t)ptr)) { + ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr); + mte_free_memory((void *)ptr, size, mem_type, false); + return KSFT_FAIL; + } + + return KSFT_PASS; +} + +static inline int check_allocated_memory_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after) +{ + if (ptr == NULL) { + ksft_print_msg("FAIL: memory allocation\n"); + return KSFT_FAIL; + } + + if (!MT_FETCH_TAG((uintptr_t)ptr)) { + ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr); + mte_free_memory_tag_range((void *)ptr, size, mem_type, range_before, + range_after); + return KSFT_FAIL; + } + return KSFT_PASS; +} + +#endif /* _MTE_COMMON_UTIL_H */ diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h new file mode 100644 index 000000000000..9b188254b61a --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_def.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +/* + * Below definitions may be found in kernel headers, However, they are + * redefined here to decouple the MTE selftests compilations from them. + */ +#ifndef SEGV_MTEAERR +#define SEGV_MTEAERR 8 +#endif +#ifndef SEGV_MTESERR +#define SEGV_MTESERR 9 +#endif +#ifndef PROT_MTE +#define PROT_MTE 0x20 +#endif +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) +#endif + +#ifndef PR_MTE_TCF_SHIFT +#define PR_MTE_TCF_SHIFT 1 +#endif +#ifndef PR_MTE_TCF_NONE +#define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TCF_SYNC +#define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TCF_ASYNC +#define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TAG_SHIFT +#define PR_MTE_TAG_SHIFT 3 +#endif + +/* MTE Hardware feature definitions below. */ +#define MT_TAG_SHIFT 56 +#define MT_TAG_MASK 0xFUL +#define MT_FREE_TAG 0x0UL +#define MT_GRANULE_SIZE 16 +#define MT_TAG_COUNT 16 +#define MT_INCLUDE_TAG_MASK 0xFFFF +#define MT_EXCLUDE_TAG_MASK 0x0 + +#define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1) +#define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT)) +#define MT_SET_TAG(x, y) ((x) | (y << MT_TAG_SHIFT)) +#define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK)) +#define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE)) + +#define MT_PSTATE_TCO_SHIFT 25 +#define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT) +#define MT_PSTATE_TCO_EN 1 +#define MT_PSTATE_TCO_DIS 0 + +#define MT_EXCLUDE_TAG(x) (1 << (x)) +#define MT_INCLUDE_VALID_TAG(x) (MT_INCLUDE_TAG_MASK ^ MT_EXCLUDE_TAG(x)) +#define MT_INCLUDE_VALID_TAGS(x) (MT_INCLUDE_TAG_MASK ^ (x)) +#define MTE_ALLOW_NON_ZERO_TAG MT_INCLUDE_VALID_TAG(0) diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S new file mode 100644 index 000000000000..a02c04cd0aac --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_helper.S @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#include "mte_def.h" + +#define ENTRY(name) \ + .globl name ;\ + .p2align 2;\ + .type name, @function ;\ +name: + +#define ENDPROC(name) \ + .size name, .-name ; + + .text +/* + * mte_insert_random_tag: Insert random tag and might be same as the source tag if + * the source pointer has it. + * Input: + * x0 - source pointer with a tag/no-tag + * Return: + * x0 - pointer with random tag + */ +ENTRY(mte_insert_random_tag) + irg x0, x0, xzr + ret +ENDPROC(mte_insert_random_tag) + +/* + * mte_insert_new_tag: Insert new tag and different from the source tag if + * source pointer has it. + * Input: + * x0 - source pointer with a tag/no-tag + * Return: + * x0 - pointer with random tag + */ +ENTRY(mte_insert_new_tag) + gmi x1, x0, xzr + irg x0, x0, x1 + ret +ENDPROC(mte_insert_new_tag) + +/* + * mte_get_tag_address: Get the tag from given address. + * Input: + * x0 - source pointer + * Return: + * x0 - pointer with appended tag + */ +ENTRY(mte_get_tag_address) + ldg x0, [x0] + ret +ENDPROC(mte_get_tag_address) + +/* + * mte_set_tag_address_range: Set the tag range from the given address + * Input: + * x0 - source pointer with tag data + * x1 - range + * Return: + * none + */ +ENTRY(mte_set_tag_address_range) + cbz x1, 2f +1: + stg x0, [x0, #0x0] + add x0, x0, #MT_GRANULE_SIZE + sub x1, x1, #MT_GRANULE_SIZE + cbnz x1, 1b +2: + ret +ENDPROC(mte_set_tag_address_range) + +/* + * mt_clear_tag_address_range: Clear the tag range from the given address + * Input: + * x0 - source pointer with tag data + * x1 - range + * Return: + * none + */ +ENTRY(mte_clear_tag_address_range) + cbz x1, 2f +1: + stzg x0, [x0, #0x0] + add x0, x0, #MT_GRANULE_SIZE + sub x1, x1, #MT_GRANULE_SIZE + cbnz x1, 1b +2: + ret +ENDPROC(mte_clear_tag_address_range) + +/* + * mte_enable_pstate_tco: Enable PSTATE.TCO (tag check override) field + * Input: + * none + * Return: + * none + */ +ENTRY(mte_enable_pstate_tco) + msr tco, #MT_PSTATE_TCO_EN + ret +ENDPROC(mte_enable_pstate_tco) + +/* + * mte_disable_pstate_tco: Disable PSTATE.TCO (tag check override) field + * Input: + * none + * Return: + * none + */ +ENTRY(mte_disable_pstate_tco) + msr tco, #MT_PSTATE_TCO_DIS + ret +ENDPROC(mte_disable_pstate_tco) + +/* + * mte_get_pstate_tco: Get PSTATE.TCO (tag check override) field + * Input: + * none + * Return: + * x0 + */ +ENTRY(mte_get_pstate_tco) + mrs x0, tco + ubfx x0, x0, #MT_PSTATE_TCO_SHIFT, #1 + ret +ENDPROC(mte_get_pstate_tco) diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore new file mode 100644 index 000000000000..155137d92722 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/.gitignore @@ -0,0 +1,2 @@ +exec_target +pac diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile new file mode 100644 index 000000000000..72e290b0b10c --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/Makefile @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020 ARM Limited + +# preserve CC value from top level Makefile +ifeq ($(CC),cc) +CC := $(CROSS_COMPILE)gcc +endif + +CFLAGS += -mbranch-protection=pac-ret +# check if the compiler supports ARMv8.3 and branch protection with PAuth +pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) + +ifeq ($(pauth_cc_support),1) +TEST_GEN_PROGS := pac +TEST_GEN_FILES := pac_corruptor.o helper.o +TEST_GEN_PROGS_EXTENDED := exec_target +endif + +include ../../lib.mk + +ifeq ($(pauth_cc_support),1) +# pac* and aut* instructions are not available on architectures berfore +# ARMv8.3. Therefore target ARMv8.3 wherever they are used directly +$(OUTPUT)/pac_corruptor.o: pac_corruptor.S + $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a + +$(OUTPUT)/helper.o: helper.c + $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a + +# when -mbranch-protection is enabled and the target architecture is ARMv8.3 or +# greater, gcc emits pac* instructions which are not in HINT NOP space, +# preventing the tests from occurring at all. Compile for ARMv8.2 so tests can +# run on earlier targets and print a meaningful error messages +$(OUTPUT)/exec_target: exec_target.c $(OUTPUT)/helper.o + $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a + +$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o + $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a +endif diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c new file mode 100644 index 000000000000..4435600ca400 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/exec_target.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include <stdio.h> +#include <stdlib.h> +#include <sys/auxv.h> + +#include "helper.h" + +int main(void) +{ + struct signatures signed_vals; + unsigned long hwcaps; + size_t val; + + fread(&val, sizeof(size_t), 1, stdin); + + /* don't try to execute illegal (unimplemented) instructions) caller + * should have checked this and keep worker simple + */ + hwcaps = getauxval(AT_HWCAP); + + if (hwcaps & HWCAP_PACA) { + signed_vals.keyia = keyia_sign(val); + signed_vals.keyib = keyib_sign(val); + signed_vals.keyda = keyda_sign(val); + signed_vals.keydb = keydb_sign(val); + } + signed_vals.keyg = (hwcaps & HWCAP_PACG) ? keyg_sign(val) : 0; + + fwrite(&signed_vals, sizeof(struct signatures), 1, stdout); + + return 0; +} diff --git a/tools/testing/selftests/arm64/pauth/helper.c b/tools/testing/selftests/arm64/pauth/helper.c new file mode 100644 index 000000000000..2c201e7d0d50 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/helper.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include "helper.h" + +size_t keyia_sign(size_t ptr) +{ + asm volatile("paciza %0" : "+r" (ptr)); + return ptr; +} + +size_t keyib_sign(size_t ptr) +{ + asm volatile("pacizb %0" : "+r" (ptr)); + return ptr; +} + +size_t keyda_sign(size_t ptr) +{ + asm volatile("pacdza %0" : "+r" (ptr)); + return ptr; +} + +size_t keydb_sign(size_t ptr) +{ + asm volatile("pacdzb %0" : "+r" (ptr)); + return ptr; +} + +size_t keyg_sign(size_t ptr) +{ + /* output is encoded in the upper 32 bits */ + size_t dest = 0; + size_t modifier = 0; + + asm volatile("pacga %0, %1, %2" : "=r" (dest) : "r" (ptr), "r" (modifier)); + + return dest; +} diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h new file mode 100644 index 000000000000..652496c7b411 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/helper.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#ifndef _HELPER_H_ +#define _HELPER_H_ + +#include <stdlib.h> + +#define NKEYS 5 + +struct signatures { + size_t keyia; + size_t keyib; + size_t keyda; + size_t keydb; + size_t keyg; +}; + +void pac_corruptor(void); + +/* PAuth sign a value with key ia and modifier value 0 */ +size_t keyia_sign(size_t val); +size_t keyib_sign(size_t val); +size_t keyda_sign(size_t val); +size_t keydb_sign(size_t val); +size_t keyg_sign(size_t val); + +#endif diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c new file mode 100644 index 000000000000..592fe538506e --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <sys/auxv.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <signal.h> +#include <setjmp.h> +#include <sched.h> + +#include "../../kselftest_harness.h" +#include "helper.h" + +#define PAC_COLLISION_ATTEMPTS 10 +/* + * The kernel sets TBID by default. So bits 55 and above should remain + * untouched no matter what. + * The VA space size is 48 bits. Bigger is opt-in. + */ +#define PAC_MASK (~0xff80ffffffffffff) +#define ARBITRARY_VALUE (0x1234) +#define ASSERT_PAUTH_ENABLED() \ +do { \ + unsigned long hwcaps = getauxval(AT_HWCAP); \ + /* data key instructions are not in NOP space. This prevents a SIGILL */ \ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \ +} while (0) +#define ASSERT_GENERIC_PAUTH_ENABLED() \ +do { \ + unsigned long hwcaps = getauxval(AT_HWCAP); \ + /* generic key instructions are not in NOP space. This prevents a SIGILL */ \ + ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \ +} while (0) + +void sign_specific(struct signatures *sign, size_t val) +{ + sign->keyia = keyia_sign(val); + sign->keyib = keyib_sign(val); + sign->keyda = keyda_sign(val); + sign->keydb = keydb_sign(val); +} + +void sign_all(struct signatures *sign, size_t val) +{ + sign->keyia = keyia_sign(val); + sign->keyib = keyib_sign(val); + sign->keyda = keyda_sign(val); + sign->keydb = keydb_sign(val); + sign->keyg = keyg_sign(val); +} + +int n_same(struct signatures *old, struct signatures *new, int nkeys) +{ + int res = 0; + + res += old->keyia == new->keyia; + res += old->keyib == new->keyib; + res += old->keyda == new->keyda; + res += old->keydb == new->keydb; + if (nkeys == NKEYS) + res += old->keyg == new->keyg; + + return res; +} + +int n_same_single_set(struct signatures *sign, int nkeys) +{ + size_t vals[nkeys]; + int same = 0; + + vals[0] = sign->keyia & PAC_MASK; + vals[1] = sign->keyib & PAC_MASK; + vals[2] = sign->keyda & PAC_MASK; + vals[3] = sign->keydb & PAC_MASK; + + if (nkeys >= 4) + vals[4] = sign->keyg & PAC_MASK; + + for (int i = 0; i < nkeys - 1; i++) { + for (int j = i + 1; j < nkeys; j++) { + if (vals[i] == vals[j]) + same += 1; + } + } + return same; +} + +int exec_sign_all(struct signatures *signed_vals, size_t val) +{ + int new_stdin[2]; + int new_stdout[2]; + int status; + int i; + ssize_t ret; + pid_t pid; + cpu_set_t mask; + + ret = pipe(new_stdin); + if (ret == -1) { + perror("pipe returned error"); + return -1; + } + + ret = pipe(new_stdout); + if (ret == -1) { + perror("pipe returned error"); + return -1; + } + + /* + * pin this process and all its children to a single CPU, so it can also + * guarantee a context switch with its child + */ + sched_getaffinity(0, sizeof(mask), &mask); + + for (i = 0; i < sizeof(cpu_set_t); i++) + if (CPU_ISSET(i, &mask)) + break; + + CPU_ZERO(&mask); + CPU_SET(i, &mask); + sched_setaffinity(0, sizeof(mask), &mask); + + pid = fork(); + // child + if (pid == 0) { + dup2(new_stdin[0], STDIN_FILENO); + if (ret == -1) { + perror("dup2 returned error"); + exit(1); + } + + dup2(new_stdout[1], STDOUT_FILENO); + if (ret == -1) { + perror("dup2 returned error"); + exit(1); + } + + close(new_stdin[0]); + close(new_stdin[1]); + close(new_stdout[0]); + close(new_stdout[1]); + + ret = execl("exec_target", "exec_target", (char *)NULL); + if (ret == -1) { + perror("exec returned error"); + exit(1); + } + } + + close(new_stdin[0]); + close(new_stdout[1]); + + ret = write(new_stdin[1], &val, sizeof(size_t)); + if (ret == -1) { + perror("write returned error"); + return -1; + } + + /* + * wait for the worker to finish, so that read() reads all data + * will also context switch with worker so that this function can be used + * for context switch tests + */ + waitpid(pid, &status, 0); + if (WIFEXITED(status) == 0) { + fprintf(stderr, "worker exited unexpectedly\n"); + return -1; + } + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "worker exited with error\n"); + return -1; + } + + ret = read(new_stdout[0], signed_vals, sizeof(struct signatures)); + if (ret == -1) { + perror("read returned error"); + return -1; + } + + return 0; +} + +sigjmp_buf jmpbuf; +void pac_signal_handler(int signum, siginfo_t *si, void *uc) +{ + if (signum == SIGSEGV || signum == SIGILL) + siglongjmp(jmpbuf, 1); +} + +/* check that a corrupted PAC results in SIGSEGV or SIGILL */ +TEST(corrupt_pac) +{ + struct sigaction sa; + + ASSERT_PAUTH_ENABLED(); + if (sigsetjmp(jmpbuf, 1) == 0) { + sa.sa_sigaction = pac_signal_handler; + sa.sa_flags = SA_SIGINFO | SA_RESETHAND; + sigemptyset(&sa.sa_mask); + + sigaction(SIGSEGV, &sa, NULL); + sigaction(SIGILL, &sa, NULL); + + pac_corruptor(); + ASSERT_TRUE(0) TH_LOG("SIGSEGV/SIGILL signal did not occur"); + } +} + +/* + * There are no separate pac* and aut* controls so checking only the pac* + * instructions is sufficient + */ +TEST(pac_instructions_not_nop) +{ + size_t keyia = 0; + size_t keyib = 0; + size_t keyda = 0; + size_t keydb = 0; + + ASSERT_PAUTH_ENABLED(); + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + keyia |= keyia_sign(i) & PAC_MASK; + keyib |= keyib_sign(i) & PAC_MASK; + keyda |= keyda_sign(i) & PAC_MASK; + keydb |= keydb_sign(i) & PAC_MASK; + } + + ASSERT_NE(0, keyia) TH_LOG("keyia instructions did nothing"); + ASSERT_NE(0, keyib) TH_LOG("keyib instructions did nothing"); + ASSERT_NE(0, keyda) TH_LOG("keyda instructions did nothing"); + ASSERT_NE(0, keydb) TH_LOG("keydb instructions did nothing"); +} + +TEST(pac_instructions_not_nop_generic) +{ + size_t keyg = 0; + + ASSERT_GENERIC_PAUTH_ENABLED(); + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) + keyg |= keyg_sign(i) & PAC_MASK; + + ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing"); +} + +TEST(single_thread_different_keys) +{ + int same = 10; + int nkeys = NKEYS; + int tmp; + struct signatures signed_vals; + unsigned long hwcaps = getauxval(AT_HWCAP); + + /* generic and data key instructions are not in NOP space. This prevents a SIGILL */ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); + if (!(hwcaps & HWCAP_PACG)) { + TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks"); + nkeys = NKEYS - 1; + } + + /* + * In Linux the PAC field can be up to 7 bits wide. Even if keys are + * different, there is about 5% chance for PACs to collide with + * different addresses. This chance rapidly increases with fewer bits + * allocated for the PAC (e.g. wider address). A comparison of the keys + * directly will be more reliable. + * All signed values need to be different at least once out of n + * attempts to be certain that the keys are different + */ + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + if (nkeys == NKEYS) + sign_all(&signed_vals, i); + else + sign_specific(&signed_vals, i); + + tmp = n_same_single_set(&signed_vals, nkeys); + if (tmp < same) + same = tmp; + } + + ASSERT_EQ(0, same) TH_LOG("%d keys clashed every time", same); +} + +/* + * fork() does not change keys. Only exec() does so call a worker program. + * Its only job is to sign a value and report back the resutls + */ +TEST(exec_changed_keys) +{ + struct signatures new_keys; + struct signatures old_keys; + int ret; + int same = 10; + int nkeys = NKEYS; + unsigned long hwcaps = getauxval(AT_HWCAP); + + /* generic and data key instructions are not in NOP space. This prevents a SIGILL */ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); + if (!(hwcaps & HWCAP_PACG)) { + TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks"); + nkeys = NKEYS - 1; + } + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + ret = exec_sign_all(&new_keys, i); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + if (nkeys == NKEYS) + sign_all(&old_keys, i); + else + sign_specific(&old_keys, i); + + ret = n_same(&old_keys, &new_keys, nkeys); + if (ret < same) + same = ret; + } + + ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same); +} + +TEST(context_switch_keep_keys) +{ + int ret; + struct signatures trash; + struct signatures before; + struct signatures after; + + ASSERT_PAUTH_ENABLED(); + + sign_specific(&before, ARBITRARY_VALUE); + + /* will context switch with a process with different keys at least once */ + ret = exec_sign_all(&trash, ARBITRARY_VALUE); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + sign_specific(&after, ARBITRARY_VALUE); + + ASSERT_EQ(before.keyia, after.keyia) TH_LOG("keyia changed after context switching"); + ASSERT_EQ(before.keyib, after.keyib) TH_LOG("keyib changed after context switching"); + ASSERT_EQ(before.keyda, after.keyda) TH_LOG("keyda changed after context switching"); + ASSERT_EQ(before.keydb, after.keydb) TH_LOG("keydb changed after context switching"); +} + +TEST(context_switch_keep_keys_generic) +{ + int ret; + struct signatures trash; + size_t before; + size_t after; + + ASSERT_GENERIC_PAUTH_ENABLED(); + + before = keyg_sign(ARBITRARY_VALUE); + + /* will context switch with a process with different keys at least once */ + ret = exec_sign_all(&trash, ARBITRARY_VALUE); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + after = keyg_sign(ARBITRARY_VALUE); + + ASSERT_EQ(before, after) TH_LOG("keyg changed after context switching"); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/arm64/pauth/pac_corruptor.S b/tools/testing/selftests/arm64/pauth/pac_corruptor.S new file mode 100644 index 000000000000..aa6588050752 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/pac_corruptor.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +.global pac_corruptor + +.text +/* + * Corrupting a single bit of the PAC ensures the authentication will fail. It + * also guarantees no possible collision. TCR_EL1.TBI0 is set by default so no + * top byte PAC is tested + */ + pac_corruptor: + paciasp + + /* corrupt the top bit of the PAC */ + eor lr, lr, #1 << 53 + + autiasp + ret diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 58c0eab71bca..0517c744b04e 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -78,6 +78,7 @@ #define KPF_ARCH 38 #define KPF_UNCACHED 39 #define KPF_SOFTDIRTY 40 +#define KPF_ARCH_2 41 /* [48-] take some arbitrary free slots for expanding overloaded flags * not part of kernel API @@ -135,6 +136,7 @@ static const char * const page_flag_names[] = { [KPF_ARCH] = "h:arch", [KPF_UNCACHED] = "c:uncached", [KPF_SOFTDIRTY] = "f:softdirty", + [KPF_ARCH_2] = "H:arch_2", [KPF_READAHEAD] = "I:readahead", [KPF_SLOB_FREE] = "P:slob_free", |