summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/boot/Makefile2
-rw-r--r--arch/s390/boot/boot.h40
-rw-r--r--arch/s390/boot/decompressor.c1
-rw-r--r--arch/s390/boot/decompressor.h26
-rw-r--r--arch/s390/boot/kaslr.c20
-rw-r--r--arch/s390/boot/mem_detect.c72
-rw-r--r--arch/s390/boot/startup.c86
-rw-r--r--arch/s390/boot/vmem.c278
-rw-r--r--arch/s390/crypto/aes_s390.c4
-rw-r--r--arch/s390/crypto/arch_random.c1
-rw-r--r--arch/s390/crypto/paes_s390.c2
-rw-r--r--arch/s390/include/asm/abs_lowcore.h16
-rw-r--r--arch/s390/include/asm/ap.h12
-rw-r--r--arch/s390/include/asm/asm-extable.h4
-rw-r--r--arch/s390/include/asm/ccwdev.h2
-rw-r--r--arch/s390/include/asm/cmpxchg.h109
-rw-r--r--arch/s390/include/asm/cpu_mcf.h112
-rw-r--r--arch/s390/include/asm/cpu_mf.h53
-rw-r--r--arch/s390/include/asm/cputime.h19
-rw-r--r--arch/s390/include/asm/diag.h16
-rw-r--r--arch/s390/include/asm/fpu/internal.h4
-rw-r--r--arch/s390/include/asm/idals.h12
-rw-r--r--arch/s390/include/asm/idle.h5
-rw-r--r--arch/s390/include/asm/kasan.h12
-rw-r--r--arch/s390/include/asm/kprobes.h2
-rw-r--r--arch/s390/include/asm/maccess.h2
-rw-r--r--arch/s390/include/asm/mem_detect.h39
-rw-r--r--arch/s390/include/asm/page.h5
-rw-r--r--arch/s390/include/asm/pgtable.h70
-rw-r--r--arch/s390/include/asm/processor.h29
-rw-r--r--arch/s390/include/asm/ptrace.h2
-rw-r--r--arch/s390/include/asm/setup.h6
-rw-r--r--arch/s390/include/asm/syscall_wrapper.h144
-rw-r--r--arch/s390/include/asm/uaccess.h208
-rw-r--r--arch/s390/include/asm/unwind.h10
-rw-r--r--arch/s390/include/uapi/asm/fs3270.h25
-rw-r--r--arch/s390/include/uapi/asm/raw3270.h75
-rw-r--r--arch/s390/include/uapi/asm/types.h15
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h3
-rw-r--r--arch/s390/kernel/Makefile3
-rw-r--r--arch/s390/kernel/abs_lowcore.c49
-rw-r--r--arch/s390/kernel/cache.c2
-rw-r--r--arch/s390/kernel/compat_signal.c4
-rw-r--r--arch/s390/kernel/crash_dump.c2
-rw-r--r--arch/s390/kernel/diag.c26
-rw-r--r--arch/s390/kernel/early.c8
-rw-r--r--arch/s390/kernel/entry.S6
-rw-r--r--arch/s390/kernel/entry.h1
-rw-r--r--arch/s390/kernel/head64.S1
-rw-r--r--arch/s390/kernel/idle.c94
-rw-r--r--arch/s390/kernel/ipl.c101
-rw-r--r--arch/s390/kernel/irq.c8
-rw-r--r--arch/s390/kernel/kprobes.c30
-rw-r--r--arch/s390/kernel/machine_kexec.c5
-rw-r--r--arch/s390/kernel/mcount.S12
-rw-r--r--arch/s390/kernel/os_info.c5
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c312
-rw-r--r--arch/s390/kernel/perf_cpum_cf_common.c233
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c151
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c4
-rw-r--r--arch/s390/kernel/perf_pai_ext.c6
-rw-r--r--arch/s390/kernel/process.c4
-rw-r--r--arch/s390/kernel/ptrace.c6
-rw-r--r--arch/s390/kernel/rethook.c34
-rw-r--r--arch/s390/kernel/rethook.h7
-rw-r--r--arch/s390/kernel/setup.c96
-rw-r--r--arch/s390/kernel/signal.c4
-rw-r--r--arch/s390/kernel/smp.c14
-rw-r--r--arch/s390/kernel/stacktrace.c6
-rw-r--r--arch/s390/kernel/text_amode31.S13
-rw-r--r--arch/s390/kernel/vdso.c4
-rw-r--r--arch/s390/kernel/vmlinux.lds.S5
-rw-r--r--arch/s390/kernel/vtime.c2
-rw-r--r--arch/s390/lib/test_unwind.c12
-rw-r--r--arch/s390/mm/dump_pagetables.c16
-rw-r--r--arch/s390/mm/extable.c9
-rw-r--r--arch/s390/mm/fault.c63
-rw-r--r--arch/s390/mm/gmap.c11
-rw-r--r--arch/s390/mm/init.c35
-rw-r--r--arch/s390/mm/kasan_init.c246
-rw-r--r--arch/s390/mm/maccess.c28
-rw-r--r--arch/s390/mm/pgtable.c25
-rw-r--r--arch/s390/mm/vmem.c103
-rw-r--r--arch/s390/net/bpf_jit_comp.c715
85 files changed, 2631 insertions, 1434 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 7fd08755a1f9..933771b0b07a 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -187,6 +187,7 @@ config S390
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
select HAVE_KRETPROBES
+ select HAVE_RETHOOK
select HAVE_KVM
select HAVE_LIVEPATCH
select HAVE_MEMBLOCK_PHYS_MAP
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index d52c3e2e16bc..47a397da0498 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -35,7 +35,7 @@ endif
CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
-obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
+obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 70418389414d..58ce701d6110 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -8,10 +8,36 @@
#ifndef __ASSEMBLY__
+struct machine_info {
+ unsigned char has_edat1 : 1;
+ unsigned char has_edat2 : 1;
+ unsigned char has_nx : 1;
+};
+
+struct vmlinux_info {
+ unsigned long default_lma;
+ unsigned long entry;
+ unsigned long image_size; /* does not include .bss */
+ unsigned long bss_size; /* uncompressed image .bss size */
+ unsigned long bootdata_off;
+ unsigned long bootdata_size;
+ unsigned long bootdata_preserved_off;
+ unsigned long bootdata_preserved_size;
+ unsigned long dynsym_start;
+ unsigned long rela_dyn_start;
+ unsigned long rela_dyn_end;
+ unsigned long amode31_size;
+ unsigned long init_mm_off;
+ unsigned long swapper_pg_dir_off;
+ unsigned long invalid_pg_dir_off;
+};
+
void startup_kernel(void);
-unsigned long detect_memory(void);
+unsigned long detect_memory(unsigned long *safe_addr);
+void mem_detect_set_usable_limit(unsigned long limit);
bool is_ipl_block_dump(void);
void store_ipl_parmblock(void);
+unsigned long read_ipl_report(unsigned long safe_addr);
void setup_boot_command_line(void);
void parse_boot_command_line(void);
void verify_facilities(void);
@@ -19,7 +45,12 @@ void print_missing_facilities(void);
void sclp_early_setup_buffer(void);
void print_pgm_check_info(void);
unsigned long get_random_base(unsigned long safe_addr);
+void setup_vmem(unsigned long asce_limit);
+unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total);
void __printf(1, 2) decompressor_printk(const char *fmt, ...);
+void error(char *m);
+
+extern struct machine_info machine;
/* Symbols defined by linker scripts */
extern const char kernel_version[];
@@ -31,8 +62,13 @@ extern char __boot_data_start[], __boot_data_end[];
extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
extern char _decompressor_syms_start[], _decompressor_syms_end[];
extern char _stack_start[], _stack_end[];
+extern char _end[];
+extern unsigned char _compressed_start[];
+extern unsigned char _compressed_end[];
+extern struct vmlinux_info _vmlinux_info;
+#define vmlinux _vmlinux_info
-unsigned long read_ipl_report(unsigned long safe_offset);
+#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
#endif /* __ASSEMBLY__ */
#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
index b519a1f045d8..d762733a0753 100644
--- a/arch/s390/boot/decompressor.c
+++ b/arch/s390/boot/decompressor.c
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <asm/page.h>
#include "decompressor.h"
+#include "boot.h"
/*
* gzip declarations
diff --git a/arch/s390/boot/decompressor.h b/arch/s390/boot/decompressor.h
index f75cc31a77dd..92b81d2ea35d 100644
--- a/arch/s390/boot/decompressor.h
+++ b/arch/s390/boot/decompressor.h
@@ -2,37 +2,11 @@
#ifndef BOOT_COMPRESSED_DECOMPRESSOR_H
#define BOOT_COMPRESSED_DECOMPRESSOR_H
-#include <linux/stddef.h>
-
#ifdef CONFIG_KERNEL_UNCOMPRESSED
static inline void *decompress_kernel(void) { return NULL; }
#else
void *decompress_kernel(void);
#endif
unsigned long mem_safe_offset(void);
-void error(char *m);
-
-struct vmlinux_info {
- unsigned long default_lma;
- void (*entry)(void);
- unsigned long image_size; /* does not include .bss */
- unsigned long bss_size; /* uncompressed image .bss size */
- unsigned long bootdata_off;
- unsigned long bootdata_size;
- unsigned long bootdata_preserved_off;
- unsigned long bootdata_preserved_size;
- unsigned long dynsym_start;
- unsigned long rela_dyn_start;
- unsigned long rela_dyn_end;
- unsigned long amode31_size;
-};
-
-/* Symbols defined by linker scripts */
-extern char _end[];
-extern unsigned char _compressed_start[];
-extern unsigned char _compressed_end[];
-extern char _vmlinux_info[];
-
-#define vmlinux (*(struct vmlinux_info *)_vmlinux_info)
#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index e8d74d4f62aa..3e3d846400b4 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -132,7 +132,7 @@ static unsigned long count_valid_kernel_positions(unsigned long kernel_size,
unsigned long start, end, pos = 0;
int i;
- for_each_mem_detect_block(i, &start, &end) {
+ for_each_mem_detect_usable_block(i, &start, &end) {
if (_min >= end)
continue;
if (start >= _max)
@@ -153,7 +153,7 @@ static unsigned long position_to_address(unsigned long pos, unsigned long kernel
unsigned long start, end;
int i;
- for_each_mem_detect_block(i, &start, &end) {
+ for_each_mem_detect_usable_block(i, &start, &end) {
if (_min >= end)
continue;
if (start >= _max)
@@ -172,26 +172,20 @@ static unsigned long position_to_address(unsigned long pos, unsigned long kernel
unsigned long get_random_base(unsigned long safe_addr)
{
+ unsigned long usable_total = get_mem_detect_usable_total();
unsigned long memory_limit = get_mem_detect_end();
unsigned long base_pos, max_pos, kernel_size;
- unsigned long kasan_needs;
int i;
- memory_limit = min(memory_limit, ident_map_size);
-
/*
* Avoid putting kernel in the end of physical memory
- * which kasan will use for shadow memory and early pgtable
- * mapping allocations.
+ * which vmem and kasan code will use for shadow memory and
+ * pgtable mapping allocations.
*/
- memory_limit -= kasan_estimate_memory_needs(memory_limit);
+ memory_limit -= kasan_estimate_memory_needs(usable_total);
+ memory_limit -= vmem_estimate_memory_needs(usable_total);
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size) {
- if (safe_addr < initrd_data.start + initrd_data.size)
- safe_addr = initrd_data.start + initrd_data.size;
- }
safe_addr = ALIGN(safe_addr, THREAD_SIZE);
-
kernel_size = vmlinux.image_size + vmlinux.bss_size;
if (safe_addr + kernel_size > memory_limit)
return 0;
diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
index 7fa1a32ea0f3..35f4ba11f7fd 100644
--- a/arch/s390/boot/mem_detect.c
+++ b/arch/s390/boot/mem_detect.c
@@ -16,29 +16,10 @@ struct mem_detect_info __bootdata(mem_detect);
#define ENTRIES_EXTENDED_MAX \
(256 * (1020 / 2) * sizeof(struct mem_detect_block))
-/*
- * To avoid corrupting old kernel memory during dump, find lowest memory
- * chunk possible either right after the kernel end (decompressed kernel) or
- * after initrd (if it is present and there is no hole between the kernel end
- * and initrd)
- */
-static void *mem_detect_alloc_extended(void)
-{
- unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64));
-
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
- initrd_data.start < offset + ENTRIES_EXTENDED_MAX)
- offset = ALIGN(initrd_data.start + initrd_data.size, sizeof(u64));
-
- return (void *)offset;
-}
-
static struct mem_detect_block *__get_mem_detect_block_ptr(u32 n)
{
if (n < MEM_INLINED_ENTRIES)
return &mem_detect.entries[n];
- if (unlikely(!mem_detect.entries_extended))
- mem_detect.entries_extended = mem_detect_alloc_extended();
return &mem_detect.entries_extended[n - MEM_INLINED_ENTRIES];
}
@@ -147,7 +128,7 @@ static int tprot(unsigned long addr)
return rc;
}
-static void search_mem_end(void)
+static unsigned long search_mem_end(void)
{
unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */
unsigned long offset = 0;
@@ -159,33 +140,52 @@ static void search_mem_end(void)
if (!tprot(pivot << 20))
offset = pivot;
}
-
- add_mem_detect_block(0, (offset + 1) << 20);
+ return (offset + 1) << 20;
}
-unsigned long detect_memory(void)
+unsigned long detect_memory(unsigned long *safe_addr)
{
- unsigned long max_physmem_end;
+ unsigned long max_physmem_end = 0;
sclp_early_get_memsize(&max_physmem_end);
+ mem_detect.entries_extended = (struct mem_detect_block *)ALIGN(*safe_addr, sizeof(u64));
if (!sclp_early_read_storage_info()) {
mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO;
- return max_physmem_end;
- }
-
- if (!diag260()) {
+ } else if (!diag260()) {
mem_detect.info_source = MEM_DETECT_DIAG260;
- return max_physmem_end;
- }
-
- if (max_physmem_end) {
+ max_physmem_end = max_physmem_end ?: get_mem_detect_end();
+ } else if (max_physmem_end) {
add_mem_detect_block(0, max_physmem_end);
mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO;
- return max_physmem_end;
+ } else {
+ max_physmem_end = search_mem_end();
+ add_mem_detect_block(0, max_physmem_end);
+ mem_detect.info_source = MEM_DETECT_BIN_SEARCH;
}
- search_mem_end();
- mem_detect.info_source = MEM_DETECT_BIN_SEARCH;
- return get_mem_detect_end();
+ if (mem_detect.count > MEM_INLINED_ENTRIES) {
+ *safe_addr += (mem_detect.count - MEM_INLINED_ENTRIES) *
+ sizeof(struct mem_detect_block);
+ }
+
+ return max_physmem_end;
+}
+
+void mem_detect_set_usable_limit(unsigned long limit)
+{
+ struct mem_detect_block *block;
+ int i;
+
+ /* make sure mem_detect.usable ends up within online memory block */
+ for (i = 0; i < mem_detect.count; i++) {
+ block = __get_mem_detect_block_ptr(i);
+ if (block->start >= limit)
+ break;
+ if (block->end >= limit) {
+ mem_detect.usable = limit;
+ break;
+ }
+ mem_detect.usable = block->end;
+ }
}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 47ca3264c023..11413f0baabc 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -3,6 +3,7 @@
#include <linux/elf.h>
#include <asm/boot_data.h>
#include <asm/sections.h>
+#include <asm/maccess.h>
#include <asm/cpu_mf.h>
#include <asm/setup.h>
#include <asm/kasan.h>
@@ -11,6 +12,7 @@
#include <asm/diag.h>
#include <asm/uv.h>
#include <asm/abs_lowcore.h>
+#include <asm/mem_detect.h>
#include "decompressor.h"
#include "boot.h"
#include "uv.h"
@@ -18,6 +20,7 @@
unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata_preserved(__abs_lowcore);
unsigned long __bootdata_preserved(__memcpy_real_area);
+pte_t *__bootdata_preserved(memcpy_real_ptep);
unsigned long __bootdata(__amode31_base);
unsigned long __bootdata_preserved(VMALLOC_START);
unsigned long __bootdata_preserved(VMALLOC_END);
@@ -33,6 +36,8 @@ u64 __bootdata_preserved(stfle_fac_list[16]);
u64 __bootdata_preserved(alt_stfle_fac_list[16]);
struct oldmem_data __bootdata_preserved(oldmem_data);
+struct machine_info machine;
+
void error(char *x)
{
sclp_early_printk("\n\n");
@@ -42,6 +47,20 @@ void error(char *x)
disabled_wait();
}
+static void detect_facilities(void)
+{
+ if (test_facility(8)) {
+ machine.has_edat1 = 1;
+ __ctl_set_bit(0, 23);
+ }
+ if (test_facility(78))
+ machine.has_edat2 = 1;
+ if (!noexec_disabled && test_facility(130)) {
+ machine.has_nx = 1;
+ __ctl_set_bit(0, 20);
+ }
+}
+
static void setup_lpp(void)
{
S390_lowcore.current_pid = 0;
@@ -57,16 +76,17 @@ unsigned long mem_safe_offset(void)
}
#endif
-static void rescue_initrd(unsigned long addr)
+static unsigned long rescue_initrd(unsigned long safe_addr)
{
if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
- return;
+ return safe_addr;
if (!initrd_data.start || !initrd_data.size)
- return;
- if (addr <= initrd_data.start)
- return;
- memmove((void *)addr, (void *)initrd_data.start, initrd_data.size);
- initrd_data.start = addr;
+ return safe_addr;
+ if (initrd_data.start < safe_addr) {
+ memmove((void *)safe_addr, (void *)initrd_data.start, initrd_data.size);
+ initrd_data.start = safe_addr;
+ }
+ return initrd_data.start + initrd_data.size;
}
static void copy_bootdata(void)
@@ -150,9 +170,10 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
#endif
}
-static void setup_kernel_memory_layout(void)
+static unsigned long setup_kernel_memory_layout(void)
{
unsigned long vmemmap_start;
+ unsigned long asce_limit;
unsigned long rte_size;
unsigned long pages;
unsigned long vmax;
@@ -167,10 +188,10 @@ static void setup_kernel_memory_layout(void)
vmalloc_size > _REGION2_SIZE ||
vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
_REGION2_SIZE) {
- vmax = _REGION1_SIZE;
+ asce_limit = _REGION1_SIZE;
rte_size = _REGION2_SIZE;
} else {
- vmax = _REGION2_SIZE;
+ asce_limit = _REGION2_SIZE;
rte_size = _REGION3_SIZE;
}
/*
@@ -178,7 +199,7 @@ static void setup_kernel_memory_layout(void)
* secure storage limit, so that any vmalloc allocation
* we do could be used to back secure guest storage.
*/
- vmax = adjust_to_uv_max(vmax);
+ vmax = adjust_to_uv_max(asce_limit);
#ifdef CONFIG_KASAN
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
@@ -207,6 +228,8 @@ static void setup_kernel_memory_layout(void)
/* make sure vmemmap doesn't overlay with vmalloc area */
VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
vmemmap = (struct page *)vmemmap_start;
+
+ return asce_limit;
}
/*
@@ -240,19 +263,25 @@ static void offset_vmlinux_info(unsigned long offset)
vmlinux.rela_dyn_start += offset;
vmlinux.rela_dyn_end += offset;
vmlinux.dynsym_start += offset;
+ vmlinux.init_mm_off += offset;
+ vmlinux.swapper_pg_dir_off += offset;
+ vmlinux.invalid_pg_dir_off += offset;
}
static unsigned long reserve_amode31(unsigned long safe_addr)
{
__amode31_base = PAGE_ALIGN(safe_addr);
- return safe_addr + vmlinux.amode31_size;
+ return __amode31_base + vmlinux.amode31_size;
}
void startup_kernel(void)
{
+ unsigned long max_physmem_end;
unsigned long random_lma;
unsigned long safe_addr;
+ unsigned long asce_limit;
void *img;
+ psw_t psw;
initrd_data.start = parmarea.initrd_start;
initrd_data.size = parmarea.initrd_size;
@@ -265,14 +294,17 @@ void startup_kernel(void)
safe_addr = reserve_amode31(safe_addr);
safe_addr = read_ipl_report(safe_addr);
uv_query_info();
- rescue_initrd(safe_addr);
+ safe_addr = rescue_initrd(safe_addr);
sclp_early_read_info();
setup_boot_command_line();
parse_boot_command_line();
+ detect_facilities();
sanitize_prot_virt_host();
- setup_ident_map_size(detect_memory());
+ max_physmem_end = detect_memory(&safe_addr);
+ setup_ident_map_size(max_physmem_end);
setup_vmalloc_size();
- setup_kernel_memory_layout();
+ asce_limit = setup_kernel_memory_layout();
+ mem_detect_set_usable_limit(ident_map_size);
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
random_lma = get_random_base(safe_addr);
@@ -289,9 +321,23 @@ void startup_kernel(void)
} else if (__kaslr_offset)
memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
+ /*
+ * The order of the following operations is important:
+ *
+ * - handle_relocs() must follow clear_bss_section() to establish static
+ * memory references to data in .bss to be used by setup_vmem()
+ * (i.e init_mm.pgd)
+ *
+ * - setup_vmem() must follow handle_relocs() to be able using
+ * static memory references to data in .bss (i.e init_mm.pgd)
+ *
+ * - copy_bootdata() must follow setup_vmem() to propagate changes to
+ * bootdata made by setup_vmem()
+ */
clear_bss_section();
- copy_bootdata();
handle_relocs(__kaslr_offset);
+ setup_vmem(asce_limit);
+ copy_bootdata();
if (__kaslr_offset) {
/*
@@ -303,5 +349,11 @@ void startup_kernel(void)
if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
memset(img, 0, vmlinux.image_size);
}
- vmlinux.entry();
+
+ /*
+ * Jump to the decompressed kernel entry point and switch DAT mode on.
+ */
+ psw.addr = vmlinux.entry;
+ psw.mask = PSW_KERNEL_BITS;
+ __load_psw(psw);
}
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
new file mode 100644
index 000000000000..4d1d0d8e99cb
--- /dev/null
+++ b/arch/s390/boot/vmem.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/sched/task.h>
+#include <linux/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/mem_detect.h>
+#include <asm/maccess.h>
+#include <asm/abs_lowcore.h>
+#include "decompressor.h"
+#include "boot.h"
+
+#define init_mm (*(struct mm_struct *)vmlinux.init_mm_off)
+#define swapper_pg_dir vmlinux.swapper_pg_dir_off
+#define invalid_pg_dir vmlinux.invalid_pg_dir_off
+
+/*
+ * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though.
+ */
+static inline pte_t *__virt_to_kpte(unsigned long va)
+{
+ return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
+}
+
+unsigned long __bootdata_preserved(s390_invalid_asce);
+unsigned long __bootdata(pgalloc_pos);
+unsigned long __bootdata(pgalloc_end);
+unsigned long __bootdata(pgalloc_low);
+
+enum populate_mode {
+ POPULATE_NONE,
+ POPULATE_ONE2ONE,
+ POPULATE_ABS_LOWCORE,
+};
+
+static void boot_check_oom(void)
+{
+ if (pgalloc_pos < pgalloc_low)
+ error("out of memory on boot\n");
+}
+
+static void pgtable_populate_init(void)
+{
+ unsigned long initrd_end;
+ unsigned long kernel_end;
+
+ kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
+ pgalloc_low = round_up(kernel_end, PAGE_SIZE);
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
+ initrd_end = round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
+ pgalloc_low = max(pgalloc_low, initrd_end);
+ }
+
+ pgalloc_end = round_down(get_mem_detect_end(), PAGE_SIZE);
+ pgalloc_pos = pgalloc_end;
+
+ boot_check_oom();
+}
+
+static void *boot_alloc_pages(unsigned int order)
+{
+ unsigned long size = PAGE_SIZE << order;
+
+ pgalloc_pos -= size;
+ pgalloc_pos = round_down(pgalloc_pos, size);
+
+ boot_check_oom();
+
+ return (void *)pgalloc_pos;
+}
+
+static void *boot_crst_alloc(unsigned long val)
+{
+ unsigned long *table;
+
+ table = boot_alloc_pages(CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init(table, val);
+ return table;
+}
+
+static pte_t *boot_pte_alloc(void)
+{
+ static void *pte_leftover;
+ pte_t *pte;
+
+ BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE);
+
+ if (!pte_leftover) {
+ pte_leftover = boot_alloc_pages(0);
+ pte = pte_leftover + _PAGE_TABLE_SIZE;
+ } else {
+ pte = pte_leftover;
+ pte_leftover = NULL;
+ }
+ memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+ return pte;
+}
+
+static unsigned long _pa(unsigned long addr, enum populate_mode mode)
+{
+ switch (mode) {
+ case POPULATE_NONE:
+ return -1;
+ case POPULATE_ONE2ONE:
+ return addr;
+ case POPULATE_ABS_LOWCORE:
+ return __abs_lowcore_pa(addr);
+ default:
+ return -1;
+ }
+}
+
+static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end)
+{
+ return machine.has_edat2 &&
+ IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE;
+}
+
+static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end)
+{
+ return machine.has_edat1 &&
+ IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE;
+}
+
+static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long next;
+ pte_t *pte, entry;
+
+ pte = pte_offset_kernel(pmd, addr);
+ for (; addr < end; addr += PAGE_SIZE, pte++) {
+ if (pte_none(*pte)) {
+ entry = __pte(_pa(addr, mode));
+ entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
+ set_pte(pte, entry);
+ }
+ }
+}
+
+static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long next;
+ pmd_t *pmd, entry;
+ pte_t *pte;
+
+ pmd = pmd_offset(pud, addr);
+ for (; addr < end; addr = next, pmd++) {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(*pmd)) {
+ if (can_large_pmd(pmd, addr, next)) {
+ entry = __pmd(_pa(addr, mode));
+ entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
+ set_pmd(pmd, entry);
+ continue;
+ }
+ pte = boot_pte_alloc();
+ pmd_populate(&init_mm, pmd, pte);
+ } else if (pmd_large(*pmd)) {
+ continue;
+ }
+ pgtable_pte_populate(pmd, addr, next, mode);
+ }
+}
+
+static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long next;
+ pud_t *pud, entry;
+ pmd_t *pmd;
+
+ pud = pud_offset(p4d, addr);
+ for (; addr < end; addr = next, pud++) {
+ next = pud_addr_end(addr, end);
+ if (pud_none(*pud)) {
+ if (can_large_pud(pud, addr, next)) {
+ entry = __pud(_pa(addr, mode));
+ entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
+ set_pud(pud, entry);
+ continue;
+ }
+ pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ pud_populate(&init_mm, pud, pmd);
+ } else if (pud_large(*pud)) {
+ continue;
+ }
+ pgtable_pmd_populate(pud, addr, next, mode);
+ }
+}
+
+static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long next;
+ p4d_t *p4d;
+ pud_t *pud;
+
+ p4d = p4d_offset(pgd, addr);
+ for (; addr < end; addr = next, p4d++) {
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(*p4d)) {
+ pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY);
+ p4d_populate(&init_mm, p4d, pud);
+ }
+ pgtable_pud_populate(p4d, addr, next, mode);
+ }
+}
+
+static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode)
+{
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+
+ pgd = pgd_offset(&init_mm, addr);
+ for (; addr < end; addr = next, pgd++) {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(*pgd)) {
+ p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY);
+ pgd_populate(&init_mm, pgd, p4d);
+ }
+ pgtable_p4d_populate(pgd, addr, next, mode);
+ }
+}
+
+void setup_vmem(unsigned long asce_limit)
+{
+ unsigned long start, end;
+ unsigned long asce_type;
+ unsigned long asce_bits;
+ int i;
+
+ if (asce_limit == _REGION1_SIZE) {
+ asce_type = _REGION2_ENTRY_EMPTY;
+ asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+ } else {
+ asce_type = _REGION3_ENTRY_EMPTY;
+ asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+ }
+ s390_invalid_asce = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+
+ crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
+ crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
+
+ /*
+ * To allow prefixing the lowcore must be mapped with 4KB pages.
+ * To prevent creation of a large page at address 0 first map
+ * the lowcore and create the identity mapping only afterwards.
+ */
+ pgtable_populate_init();
+ pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE);
+ for_each_mem_detect_usable_block(i, &start, &end)
+ pgtable_populate(start, end, POPULATE_ONE2ONE);
+ pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
+ POPULATE_ABS_LOWCORE);
+ pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
+ POPULATE_NONE);
+ memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area);
+
+ S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits;
+ S390_lowcore.user_asce = s390_invalid_asce;
+
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+ __ctl_load(S390_lowcore.kernel_asce, 13, 13);
+
+ init_mm.context.asce = S390_lowcore.kernel_asce;
+}
+
+unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total)
+{
+ unsigned long pages = DIV_ROUND_UP(online_mem_total, PAGE_SIZE);
+
+ return DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2;
+}
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 526c3f40f6a2..c773820e4af9 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -398,10 +398,6 @@ static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
if (err)
return err;
- /* In fips mode only 128 bit or 256 bit keys are valid */
- if (fips_enabled && key_len != 32 && key_len != 64)
- return -EINVAL;
-
/* Pick the correct function code based on the key length */
fc = (key_len == 32) ? CPACF_KM_XTS_128 :
(key_len == 64) ? CPACF_KM_XTS_256 : 0;
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
index 1f2d40993c4d..a8a2407381af 100644
--- a/arch/s390/crypto/arch_random.c
+++ b/arch/s390/crypto/arch_random.c
@@ -10,6 +10,7 @@
#include <linux/atomic.h>
#include <linux/random.h>
#include <linux/static_key.h>
+#include <asm/archrandom.h>
#include <asm/cpacf.h>
DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index a279b7d23a5e..29dc827e0fe8 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -474,7 +474,7 @@ static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
return rc;
/*
- * xts_check_key verifies the key length is not odd and makes
+ * xts_verify_key verifies the key length is not odd and makes
* sure that the two keys are not the same. This can be done
* on the two protected keys as well
*/
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
index 4c61b14ee928..6f264b79e377 100644
--- a/arch/s390/include/asm/abs_lowcore.h
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -7,11 +7,21 @@
#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))
extern unsigned long __abs_lowcore;
-extern bool abs_lowcore_mapped;
-struct lowcore *get_abs_lowcore(unsigned long *flags);
-void put_abs_lowcore(struct lowcore *lc, unsigned long flags);
int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc);
void abs_lowcore_unmap(int cpu);
+static inline struct lowcore *get_abs_lowcore(void)
+{
+ int cpu;
+
+ cpu = get_cpu();
+ return ((struct lowcore *)__abs_lowcore) + cpu;
+}
+
+static inline void put_abs_lowcore(struct lowcore *lc)
+{
+ put_cpu();
+}
+
#endif /* _ASM_S390_ABS_LOWCORE_H */
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index f508f5025e38..57a2d6518d27 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -239,7 +239,10 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
union {
unsigned long value;
struct ap_qirq_ctrl qirqctrl;
- struct ap_queue_status status;
+ struct {
+ u32 _pad;
+ struct ap_queue_status status;
+ };
} reg1;
unsigned long reg2 = pa_ind;
@@ -253,7 +256,7 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
" lgr %[reg1],1\n" /* gr1 (status) into reg1 */
: [reg1] "+&d" (reg1)
: [reg0] "d" (reg0), [reg2] "d" (reg2)
- : "cc", "0", "1", "2");
+ : "cc", "memory", "0", "1", "2");
return reg1.status;
}
@@ -290,7 +293,10 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
unsigned long reg0 = qid | (5UL << 24) | ((ifbit & 0x01) << 22);
union {
unsigned long value;
- struct ap_queue_status status;
+ struct {
+ u32 _pad;
+ struct ap_queue_status status;
+ };
} reg1;
unsigned long reg2;
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
index b74f1070ddb2..55a02a153dfc 100644
--- a/arch/s390/include/asm/asm-extable.h
+++ b/arch/s390/include/asm/asm-extable.h
@@ -12,6 +12,7 @@
#define EX_TYPE_UA_STORE 3
#define EX_TYPE_UA_LOAD_MEM 4
#define EX_TYPE_UA_LOAD_REG 5
+#define EX_TYPE_UA_LOAD_REGPAIR 6
#define EX_DATA_REG_ERR_SHIFT 0
#define EX_DATA_REG_ERR GENMASK(3, 0)
@@ -85,4 +86,7 @@
#define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \
__EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
+#define EX_TABLE_UA_LOAD_REGPAIR(_fault, _target, _regerr, _regzero) \
+ __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REGPAIR, _regerr, _regzero, 0)
+
#endif /* __ASM_EXTABLE_H */
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index bd1596810cc1..91d261751d25 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -15,6 +15,7 @@
#include <asm/fcx.h>
#include <asm/irq.h>
#include <asm/schid.h>
+#include <linux/mutex.h>
/* structs from asm/cio.h */
struct irb;
@@ -87,6 +88,7 @@ struct ccw_device {
spinlock_t *ccwlock;
/* private: */
struct ccw_device_private *private; /* cio private information */
+ struct mutex reg_mutex;
/* public: */
struct ccw_device_id id;
struct ccw_driver *drv;
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 84c3f0d576c5..3f26416c2ad8 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -88,67 +88,90 @@ static __always_inline unsigned long __cmpxchg(unsigned long address,
unsigned long old,
unsigned long new, int size)
{
- unsigned long prev, tmp;
- int shift;
-
switch (size) {
- case 1:
+ case 1: {
+ unsigned int prev, shift, mask;
+
shift = (3 ^ (address & 3)) << 3;
address ^= address & 3;
+ old = (old & 0xff) << shift;
+ new = (new & 0xff) << shift;
+ mask = ~(0xff << shift);
asm volatile(
- " l %0,%2\n"
- "0: nr %0,%5\n"
- " lr %1,%0\n"
- " or %0,%3\n"
- " or %1,%4\n"
- " cs %0,%1,%2\n"
- " jnl 1f\n"
- " xr %1,%0\n"
- " nr %1,%5\n"
- " jnz 0b\n"
+ " l %[prev],%[address]\n"
+ " nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "0: lr %[tmp],%[prev]\n"
+ " cs %[prev],%[new],%[address]\n"
+ " jnl 1f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jz 0b\n"
"1:"
- : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address)
- : "d" ((old & 0xff) << shift),
- "d" ((new & 0xff) << shift),
- "d" (~(0xff << shift))
- : "memory", "cc");
+ : [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (old),
+ [new] "+&d" (new),
+ [mask] "+&d" (mask)
+ :: "memory", "cc");
return prev >> shift;
- case 2:
+ }
+ case 2: {
+ unsigned int prev, shift, mask;
+
shift = (2 ^ (address & 2)) << 3;
address ^= address & 2;
+ old = (old & 0xffff) << shift;
+ new = (new & 0xffff) << shift;
+ mask = ~(0xffff << shift);
asm volatile(
- " l %0,%2\n"
- "0: nr %0,%5\n"
- " lr %1,%0\n"
- " or %0,%3\n"
- " or %1,%4\n"
- " cs %0,%1,%2\n"
- " jnl 1f\n"
- " xr %1,%0\n"
- " nr %1,%5\n"
- " jnz 0b\n"
+ " l %[prev],%[address]\n"
+ " nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "0: lr %[tmp],%[prev]\n"
+ " cs %[prev],%[new],%[address]\n"
+ " jnl 1f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jz 0b\n"
"1:"
- : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address)
- : "d" ((old & 0xffff) << shift),
- "d" ((new & 0xffff) << shift),
- "d" (~(0xffff << shift))
- : "memory", "cc");
+ : [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (old),
+ [new] "+&d" (new),
+ [mask] "+&d" (mask)
+ :: "memory", "cc");
return prev >> shift;
- case 4:
+ }
+ case 4: {
+ unsigned int prev = old;
+
asm volatile(
- " cs %0,%3,%1\n"
- : "=&d" (prev), "+Q" (*(int *) address)
- : "0" (old), "d" (new)
+ " cs %[prev],%[new],%[address]\n"
+ : [prev] "+&d" (prev),
+ [address] "+Q" (*(int *)address)
+ : [new] "d" (new)
: "memory", "cc");
return prev;
- case 8:
+ }
+ case 8: {
+ unsigned long prev = old;
+
asm volatile(
- " csg %0,%3,%1\n"
- : "=&d" (prev), "+QS" (*(long *) address)
- : "0" (old), "d" (new)
+ " csg %[prev],%[new],%[address]\n"
+ : [prev] "+&d" (prev),
+ [address] "+QS" (*(long *)address)
+ : [new] "d" (new)
: "memory", "cc");
return prev;
}
+ }
__cmpxchg_called_with_bad_pointer();
return old;
}
diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h
deleted file mode 100644
index f87a4788c19c..000000000000
--- a/arch/s390/include/asm/cpu_mcf.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Counter facility support definitions for the Linux perf
- *
- * Copyright IBM Corp. 2019
- * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
- */
-#ifndef _ASM_S390_CPU_MCF_H
-#define _ASM_S390_CPU_MCF_H
-
-#include <linux/perf_event.h>
-#include <asm/cpu_mf.h>
-
-enum cpumf_ctr_set {
- CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
- CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
- CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */
- CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */
- CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */
-
- /* Maximum number of counter sets */
- CPUMF_CTR_SET_MAX,
-};
-
-#define CPUMF_LCCTL_ENABLE_SHIFT 16
-#define CPUMF_LCCTL_ACTCTL_SHIFT 0
-
-static inline void ctr_set_enable(u64 *state, u64 ctrsets)
-{
- *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
-}
-
-static inline void ctr_set_disable(u64 *state, u64 ctrsets)
-{
- *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
-}
-
-static inline void ctr_set_start(u64 *state, u64 ctrsets)
-{
- *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
-}
-
-static inline void ctr_set_stop(u64 *state, u64 ctrsets)
-{
- *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
-}
-
-static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
-{
- switch (set) {
- case CPUMF_CTR_SET_BASIC:
- return stcctm(BASIC, range, dest);
- case CPUMF_CTR_SET_USER:
- return stcctm(PROBLEM_STATE, range, dest);
- case CPUMF_CTR_SET_CRYPTO:
- return stcctm(CRYPTO_ACTIVITY, range, dest);
- case CPUMF_CTR_SET_EXT:
- return stcctm(EXTENDED, range, dest);
- case CPUMF_CTR_SET_MT_DIAG:
- return stcctm(MT_DIAG_CLEARING, range, dest);
- case CPUMF_CTR_SET_MAX:
- return 3;
- }
- return 3;
-}
-
-struct cpu_cf_events {
- struct cpumf_ctr_info info;
- atomic_t ctr_set[CPUMF_CTR_SET_MAX];
- atomic64_t alert;
- u64 state; /* For perf_event_open SVC */
- u64 dev_state; /* For /dev/hwctr */
- unsigned int flags;
- size_t used; /* Bytes used in data */
- size_t usedss; /* Bytes used in start/stop */
- unsigned char start[PAGE_SIZE]; /* Counter set at event add */
- unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */
- unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */
- unsigned int sets; /* # Counter set saved in memory */
-};
-DECLARE_PER_CPU(struct cpu_cf_events, cpu_cf_events);
-
-bool kernel_cpumcf_avail(void);
-int __kernel_cpumcf_begin(void);
-unsigned long kernel_cpumcf_alert(int clear);
-void __kernel_cpumcf_end(void);
-
-static inline int kernel_cpumcf_begin(void)
-{
- if (!cpum_cf_avail())
- return -ENODEV;
-
- preempt_disable();
- return __kernel_cpumcf_begin();
-}
-static inline void kernel_cpumcf_end(void)
-{
- __kernel_cpumcf_end();
- preempt_enable();
-}
-
-/* Return true if store counter set multiple instruction is available */
-static inline int stccm_avail(void)
-{
- return test_facility(142);
-}
-
-size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
- struct cpumf_ctr_info *info);
-int cfset_online_cpu(unsigned int cpu);
-int cfset_offline_cpu(unsigned int cpu);
-#endif /* _ASM_S390_CPU_MCF_H */
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index efa103b52a1a..7e417d7de568 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -42,7 +42,6 @@ static inline int cpum_sf_avail(void)
return test_facility(40) && test_facility(68);
}
-
struct cpumf_ctr_info {
u16 cfvn;
u16 auth_ctl;
@@ -275,56 +274,4 @@ static inline int lsctl(struct hws_lsctl_request_block *req)
return cc ? -EINVAL : 0;
}
-
-/* Sampling control helper functions */
-
-#include <linux/time.h>
-
-static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
- unsigned long freq)
-{
- return (USEC_PER_SEC / freq) * qsi->cpu_speed;
-}
-
-static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
- unsigned long rate)
-{
- return USEC_PER_SEC * qsi->cpu_speed / rate;
-}
-
-/* Return TOD timestamp contained in an trailer entry */
-static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
-{
- /* TOD in STCKE format */
- if (te->header.t)
- return *((unsigned long long *) &te->timestamp[1]);
-
- /* TOD in STCK format */
- return *((unsigned long long *) &te->timestamp[0]);
-}
-
-/* Return pointer to trailer entry of an sample data block */
-static inline unsigned long *trailer_entry_ptr(unsigned long v)
-{
- void *ret;
-
- ret = (void *) v;
- ret += PAGE_SIZE;
- ret -= sizeof(struct hws_trailer_entry);
-
- return (unsigned long *) ret;
-}
-
-/* Return true if the entry in the sample data block table (sdbt)
- * is a link to the next sdbt */
-static inline int is_link_entry(unsigned long *s)
-{
- return *s & 0x1ul ? 1 : 0;
-}
-
-/* Return pointer to the linked sdbt */
-static inline unsigned long *get_next_sdbt(unsigned long *s)
-{
- return (unsigned long *) (*s & ~0x1ul);
-}
#endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 1d389847b588..30bb3ec4e5fc 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -11,30 +11,11 @@
#include <linux/types.h>
#include <asm/timex.h>
-#define CPUTIME_PER_USEC 4096ULL
-#define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
-
-/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
-
-#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
-
-/*
- * Convert cputime to microseconds.
- */
-static inline u64 cputime_to_usecs(const u64 cputime)
-{
- return cputime >> 12;
-}
-
/*
* Convert cputime to nanoseconds.
*/
#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
-u64 arch_cpu_idle_time(int cpu);
-
-#define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
-
void account_idle_time_irq(void);
#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 56e99c286d12..674a939f16ee 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -12,6 +12,7 @@
#include <linux/if_ether.h>
#include <linux/percpu.h>
#include <asm/asm-extable.h>
+#include <asm/cio.h>
enum diag_stat_enum {
DIAG_STAT_X008,
@@ -20,6 +21,7 @@ enum diag_stat_enum {
DIAG_STAT_X014,
DIAG_STAT_X044,
DIAG_STAT_X064,
+ DIAG_STAT_X08C,
DIAG_STAT_X09C,
DIAG_STAT_X0DC,
DIAG_STAT_X204,
@@ -79,10 +81,20 @@ struct diag210 {
u8 vrdccrty; /* real device type (output) */
u8 vrdccrmd; /* real device model (output) */
u8 vrdccrft; /* real device feature (output) */
-} __attribute__((packed, aligned(4)));
+} __packed __aligned(4);
extern int diag210(struct diag210 *addr);
+struct diag8c {
+ u8 flags;
+ u8 num_partitions;
+ u16 width;
+ u16 height;
+ u8 data[0];
+} __packed __aligned(4);
+
+extern int diag8c(struct diag8c *out, struct ccw_dev_id *devno);
+
/* bit is set in flags, when physical cpu info is included in diag 204 data */
#define DIAG204_LPAR_PHYS_FLG 0x80
#define DIAG204_LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */
@@ -318,6 +330,7 @@ struct diag_ops {
int (*diag210)(struct diag210 *addr);
int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode);
int (*diag14)(unsigned long rx, unsigned long ry1, unsigned long subcode);
+ int (*diag8c)(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
void (*diag0c)(struct hypfs_diag0c_entry *entry);
void (*diag308_reset)(void);
};
@@ -330,5 +343,6 @@ int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode);
int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode);
void _diag0c_amode31(struct hypfs_diag0c_entry *entry);
void _diag308_reset_amode31(void);
+int _diag8c_amode31(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
#endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
index 4a71dbbf76fb..bbdadb1c9efc 100644
--- a/arch/s390/include/asm/fpu/internal.h
+++ b/arch/s390/include/asm/fpu/internal.h
@@ -27,7 +27,7 @@ static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
int i;
for (i = 0; i < __NUM_FPRS; i++)
- fprs[i] = *(freg_t *)(vxrs + i);
+ fprs[i].ui = vxrs[i].high;
}
static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
@@ -35,7 +35,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
int i;
for (i = 0; i < __NUM_FPRS; i++)
- *(freg_t *)(vxrs + i) = fprs[i];
+ vxrs[i].high = fprs[i].ui;
}
static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
index 40eae2c08d61..59fcc3c72edf 100644
--- a/arch/s390/include/asm/idals.h
+++ b/arch/s390/include/asm/idals.h
@@ -23,6 +23,9 @@
#define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */
#define IDA_BLOCK_SIZE (1L<<IDA_SIZE_LOG)
+#define IDA_2K_SIZE_LOG 11
+#define IDA_2K_BLOCK_SIZE (1L << IDA_2K_SIZE_LOG)
+
/*
* Test if an address/length pair needs an idal list.
*/
@@ -43,6 +46,15 @@ static inline unsigned int idal_nr_words(void *vaddr, unsigned int length)
}
/*
+ * Return the number of 2K IDA words needed for an address/length pair.
+ */
+static inline unsigned int idal_2k_nr_words(void *vaddr, unsigned int length)
+{
+ return ((__pa(vaddr) & (IDA_2K_BLOCK_SIZE - 1)) + length +
+ (IDA_2K_BLOCK_SIZE - 1)) >> IDA_2K_SIZE_LOG;
+}
+
+/*
* Create the list of idal words for an address/length pair.
*/
static inline unsigned long *idal_create_words(unsigned long *idaws,
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index 5cea629c548e..09f763b9eb40 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -10,16 +10,12 @@
#include <linux/types.h>
#include <linux/device.h>
-#include <linux/seqlock.h>
struct s390_idle_data {
- seqcount_t seqcount;
unsigned long idle_count;
unsigned long idle_time;
unsigned long clock_idle_enter;
- unsigned long clock_idle_exit;
unsigned long timer_idle_enter;
- unsigned long timer_idle_exit;
unsigned long mt_cycles_enter[8];
};
@@ -27,6 +23,5 @@ extern struct device_attribute dev_attr_idle_count;
extern struct device_attribute dev_attr_idle_time_us;
void psw_idle(struct s390_idle_data *data, unsigned long psw_mask);
-void psw_idle_exit(void);
#endif /* _S390_IDLE_H */
diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h
index 2768d5db181f..e5cfc81d5b61 100644
--- a/arch/s390/include/asm/kasan.h
+++ b/arch/s390/include/asm/kasan.h
@@ -14,17 +14,15 @@
#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
extern void kasan_early_init(void);
-extern void kasan_copy_shadow_mapping(void);
-extern void kasan_free_early_identity(void);
/*
* Estimate kasan memory requirements, which it will reserve
* at the very end of available physical memory. To estimate
* that, we take into account that kasan would require
* 1/8 of available physical memory (for shadow memory) +
- * creating page tables for the whole memory + shadow memory
- * region (1 + 1/8). To keep page tables estimates simple take
- * the double of combined ptes size.
+ * creating page tables for the shadow memory region.
+ * To keep page tables estimates simple take the double of
+ * combined ptes size.
*
* physmem parameter has to be already adjusted if not entire physical memory
* would be used (e.g. due to effect of "mem=" option).
@@ -36,15 +34,13 @@ static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem)
/* for shadow memory */
kasan_needs = round_up(physmem / 8, PAGE_SIZE);
/* for paging structures */
- pages = DIV_ROUND_UP(physmem + kasan_needs, PAGE_SIZE);
+ pages = DIV_ROUND_UP(kasan_needs, PAGE_SIZE);
kasan_needs += DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2;
return kasan_needs;
}
#else
static inline void kasan_early_init(void) { }
-static inline void kasan_copy_shadow_mapping(void) { }
-static inline void kasan_free_early_identity(void) { }
static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) { return 0; }
#endif
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 598095f4b924..83f732ca3af4 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -70,8 +70,6 @@ struct kprobe_ctlblk {
};
void arch_remove_kprobe(struct kprobe *p);
-void __kretprobe_trampoline(void);
-void trampoline_probe_handler(struct pt_regs *regs);
int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
int kprobe_exceptions_notify(struct notifier_block *self,
diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h
index c7fa838cf6b9..cfec3141fdba 100644
--- a/arch/s390/include/asm/maccess.h
+++ b/arch/s390/include/asm/maccess.h
@@ -7,7 +7,7 @@
struct iov_iter;
extern unsigned long __memcpy_real_area;
-void memcpy_real_init(void);
+extern pte_t *memcpy_real_ptep;
size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count);
int memcpy_real(void *dest, unsigned long src, size_t count);
#ifdef CONFIG_CRASH_DUMP
diff --git a/arch/s390/include/asm/mem_detect.h b/arch/s390/include/asm/mem_detect.h
index a7c922a69050..f9e7354036d2 100644
--- a/arch/s390/include/asm/mem_detect.h
+++ b/arch/s390/include/asm/mem_detect.h
@@ -30,6 +30,7 @@ struct mem_detect_block {
struct mem_detect_info {
u32 count;
u8 info_source;
+ unsigned long usable;
struct mem_detect_block entries[MEM_INLINED_ENTRIES];
struct mem_detect_block *entries_extended;
};
@@ -38,7 +39,7 @@ extern struct mem_detect_info mem_detect;
void add_mem_detect_block(u64 start, u64 end);
static inline int __get_mem_detect_block(u32 n, unsigned long *start,
- unsigned long *end)
+ unsigned long *end, bool respect_usable_limit)
{
if (n >= mem_detect.count) {
*start = 0;
@@ -53,21 +54,41 @@ static inline int __get_mem_detect_block(u32 n, unsigned long *start,
*start = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].start;
*end = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].end;
}
+
+ if (respect_usable_limit && mem_detect.usable) {
+ if (*start >= mem_detect.usable)
+ return -1;
+ if (*end > mem_detect.usable)
+ *end = mem_detect.usable;
+ }
return 0;
}
/**
- * for_each_mem_detect_block - early online memory range iterator
+ * for_each_mem_detect_usable_block - early online memory range iterator
* @i: an integer used as loop variable
* @p_start: ptr to unsigned long for start address of the range
* @p_end: ptr to unsigned long for end address of the range
*
- * Walks over detected online memory ranges.
+ * Walks over detected online memory ranges below usable limit.
*/
-#define for_each_mem_detect_block(i, p_start, p_end) \
- for (i = 0, __get_mem_detect_block(i, p_start, p_end); \
- i < mem_detect.count; \
- i++, __get_mem_detect_block(i, p_start, p_end))
+#define for_each_mem_detect_usable_block(i, p_start, p_end) \
+ for (i = 0; !__get_mem_detect_block(i, p_start, p_end, true); i++)
+
+/* Walks over all detected online memory ranges disregarding usable limit. */
+#define for_each_mem_detect_block(i, p_start, p_end) \
+ for (i = 0; !__get_mem_detect_block(i, p_start, p_end, false); i++)
+
+static inline unsigned long get_mem_detect_usable_total(void)
+{
+ unsigned long start, end, total = 0;
+ int i;
+
+ for_each_mem_detect_usable_block(i, &start, &end)
+ total += end - start;
+
+ return total;
+}
static inline void get_mem_detect_reserved(unsigned long *start,
unsigned long *size)
@@ -84,8 +105,10 @@ static inline unsigned long get_mem_detect_end(void)
unsigned long start;
unsigned long end;
+ if (mem_detect.usable)
+ return mem_detect.usable;
if (mem_detect.count) {
- __get_mem_detect_block(mem_detect.count - 1, &start, &end);
+ __get_mem_detect_block(mem_detect.count - 1, &start, &end, false);
return end;
}
return 0;
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 61dea67bb9c7..8a2a3b5d1e29 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -73,9 +73,8 @@ static inline void copy_page(void *to, void *from)
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
- alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
+#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
+ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false)
/*
* These are used to make use of C type-checking..
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b26cbf1c533c..2c70b4d1263d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -23,6 +23,7 @@
#include <asm/uv.h>
extern pgd_t swapper_pg_dir[];
+extern pgd_t invalid_pg_dir[];
extern void paging_init(void);
extern unsigned long s390_invalid_asce;
@@ -181,6 +182,8 @@ static inline int is_module_addr(void *addr)
#define _PAGE_SOFT_DIRTY 0x000
#endif
+#define _PAGE_SW_BITS 0xffUL /* All SW bits */
+
#define _PAGE_SWP_EXCLUSIVE _PAGE_LARGE /* SW pte exclusive swap bit */
/* Set of bits not changed in pte_modify */
@@ -188,6 +191,12 @@ static inline int is_module_addr(void *addr)
_PAGE_YOUNG | _PAGE_SOFT_DIRTY)
/*
+ * Mask of bits that must not be changed with RDP. Allow only _PAGE_PROTECT
+ * HW bit and all SW bits.
+ */
+#define _PAGE_RDP_MASK ~(_PAGE_PROTECT | _PAGE_SW_BITS)
+
+/*
* handle_pte_fault uses pte_present and pte_none to find out the pte type
* WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
* distinguish present from not-present ptes. It is changed only with the page
@@ -477,6 +486,12 @@ static inline int is_module_addr(void *addr)
_REGION3_ENTRY_YOUNG | \
_REGION_ENTRY_PROTECT | \
_REGION_ENTRY_NOEXEC)
+#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \
+ _REGION3_ENTRY_LARGE | \
+ _REGION3_ENTRY_READ | \
+ _REGION3_ENTRY_WRITE | \
+ _REGION3_ENTRY_YOUNG | \
+ _REGION3_ENTRY_DIRTY)
static inline bool mm_p4d_folded(struct mm_struct *mm)
{
@@ -812,7 +827,6 @@ static inline int pmd_protnone(pmd_t pmd)
}
#endif
-#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
static inline int pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
@@ -1045,6 +1059,19 @@ static inline pte_t pte_mkhuge(pte_t pte)
#define IPTE_NODAT 0x400
#define IPTE_GUEST_ASCE 0x800
+static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep,
+ unsigned long opt, unsigned long asce,
+ int local)
+{
+ unsigned long pto;
+
+ pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
+ asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]"
+ : "+m" (*ptep)
+ : [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt),
+ [asce] "a" (asce), [m4] "i" (local));
+}
+
static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
unsigned long opt, unsigned long asce,
int local)
@@ -1195,6 +1222,42 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
}
+/*
+ * Check if PTEs only differ in _PAGE_PROTECT HW bit, but also allow SW PTE
+ * bits in the comparison. Those might change e.g. because of dirty and young
+ * tracking.
+ */
+static inline int pte_allow_rdp(pte_t old, pte_t new)
+{
+ /*
+ * Only allow changes from RO to RW
+ */
+ if (!(pte_val(old) & _PAGE_PROTECT) || pte_val(new) & _PAGE_PROTECT)
+ return 0;
+
+ return (pte_val(old) & _PAGE_RDP_MASK) == (pte_val(new) & _PAGE_RDP_MASK);
+}
+
+static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
+ unsigned long address)
+{
+ /*
+ * RDP might not have propagated the PTE protection reset to all CPUs,
+ * so there could be spurious TLB protection faults.
+ * NOTE: This will also be called when a racing pagetable update on
+ * another thread already installed the correct PTE. Both cases cannot
+ * really be distinguished.
+ * Therefore, only do the local TLB flush when RDP can be used, to avoid
+ * unnecessary overhead.
+ */
+ if (MACHINE_HAS_RDP)
+ asm volatile("ptlb" : : : "memory");
+}
+#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
+
+void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t new);
+
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
@@ -1202,7 +1265,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
{
if (pte_same(*ptep, entry))
return 0;
- ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
+ if (MACHINE_HAS_RDP && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))
+ ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry);
+ else
+ ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
return 1;
}
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index c907f747d2a0..e98d9650764b 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -44,29 +44,46 @@
typedef long (*sys_call_ptr_t)(struct pt_regs *regs);
-static inline void set_cpu_flag(int flag)
+static __always_inline void set_cpu_flag(int flag)
{
S390_lowcore.cpu_flags |= (1UL << flag);
}
-static inline void clear_cpu_flag(int flag)
+static __always_inline void clear_cpu_flag(int flag)
{
S390_lowcore.cpu_flags &= ~(1UL << flag);
}
-static inline int test_cpu_flag(int flag)
+static __always_inline bool test_cpu_flag(int flag)
{
- return !!(S390_lowcore.cpu_flags & (1UL << flag));
+ return S390_lowcore.cpu_flags & (1UL << flag);
+}
+
+static __always_inline bool test_and_set_cpu_flag(int flag)
+{
+ if (test_cpu_flag(flag))
+ return true;
+ set_cpu_flag(flag);
+ return false;
+}
+
+static __always_inline bool test_and_clear_cpu_flag(int flag)
+{
+ if (!test_cpu_flag(flag))
+ return false;
+ clear_cpu_flag(flag);
+ return true;
}
/*
* Test CIF flag of another CPU. The caller needs to ensure that
* CPU hotplug can not happen, e.g. by disabling preemption.
*/
-static inline int test_cpu_flag_of(int flag, int cpu)
+static __always_inline bool test_cpu_flag_of(int flag, int cpu)
{
struct lowcore *lc = lowcore_ptr[cpu];
- return !!(lc->cpu_flags & (1UL << flag));
+
+ return lc->cpu_flags & (1UL << flag);
}
#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 8bae33ab320a..bfb8c3cb8aee 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -26,7 +26,7 @@
#ifndef __ASSEMBLY__
#define PSW_KERNEL_BITS (PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \
- PSW_MASK_EA | PSW_MASK_BA)
+ PSW_MASK_EA | PSW_MASK_BA | PSW_MASK_DAT)
#define PSW_USER_BITS (PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 77e6506898f5..3a1f8825bc7d 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -34,6 +34,7 @@
#define MACHINE_FLAG_GS BIT(16)
#define MACHINE_FLAG_SCC BIT(17)
#define MACHINE_FLAG_PCI_MIO BIT(18)
+#define MACHINE_FLAG_RDP BIT(19)
#define LPP_MAGIC BIT(31)
#define LPP_PID_MASK _AC(0xffffffff, UL)
@@ -73,6 +74,10 @@ extern unsigned int zlib_dfltcc_support;
extern int noexec_disabled;
extern unsigned long ident_map_size;
+extern unsigned long pgalloc_pos;
+extern unsigned long pgalloc_end;
+extern unsigned long pgalloc_low;
+extern unsigned long __amode31_base;
/* The Write Back bit position in the physaddr is given by the SLPC PCI */
extern unsigned long mio_wb_bit_mask;
@@ -95,6 +100,7 @@ extern unsigned long mio_wb_bit_mask;
#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
#define MACHINE_HAS_PCI_MIO (S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO)
+#define MACHINE_HAS_RDP (S390_lowcore.machine_flags & MACHINE_FLAG_RDP)
/*
* Console mode. Override with conmode=
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index fde7e6b1df48..9286430fe729 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -7,36 +7,13 @@
#ifndef _ASM_S390_SYSCALL_WRAPPER_H
#define _ASM_S390_SYSCALL_WRAPPER_H
-#define __SC_TYPE(t, a) t
-
-#define SYSCALL_PT_ARG6(regs, m, t1, t2, t3, t4, t5, t6)\
- SYSCALL_PT_ARG5(regs, m, t1, t2, t3, t4, t5), \
- m(t6, (regs->gprs[7]))
-
-#define SYSCALL_PT_ARG5(regs, m, t1, t2, t3, t4, t5) \
- SYSCALL_PT_ARG4(regs, m, t1, t2, t3, t4), \
- m(t5, (regs->gprs[6]))
-
-#define SYSCALL_PT_ARG4(regs, m, t1, t2, t3, t4) \
- SYSCALL_PT_ARG3(regs, m, t1, t2, t3), \
- m(t4, (regs->gprs[5]))
-
-#define SYSCALL_PT_ARG3(regs, m, t1, t2, t3) \
- SYSCALL_PT_ARG2(regs, m, t1, t2), \
- m(t3, (regs->gprs[4]))
-
-#define SYSCALL_PT_ARG2(regs, m, t1, t2) \
- SYSCALL_PT_ARG1(regs, m, t1), \
- m(t2, (regs->gprs[3]))
-
-#define SYSCALL_PT_ARG1(regs, m, t1) \
- m(t1, (regs->orig_gpr2))
-
-#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__)
+/* Mapping of registers to parameters for syscalls */
+#define SC_S390_REGS_TO_ARGS(x, ...) \
+ __MAP(x, __SC_ARGS \
+ ,, regs->orig_gpr2,, regs->gprs[3],, regs->gprs[4] \
+ ,, regs->gprs[5],, regs->gprs[6],, regs->gprs[7])
#ifdef CONFIG_COMPAT
-#define __SC_COMPAT_TYPE(t, a) \
- __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a
#define __SC_COMPAT_CAST(t, a) \
({ \
@@ -56,34 +33,31 @@
(t)__ReS; \
})
-#define __S390_SYS_STUBx(x, name, ...) \
- long __s390_sys##name(struct pt_regs *regs); \
- ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \
- long __s390_sys##name(struct pt_regs *regs) \
- { \
- long ret = __do_sys##name(SYSCALL_PT_ARGS(x, regs, \
- __SC_COMPAT_CAST, __MAP(x, __SC_TYPE, __VA_ARGS__))); \
- __MAP(x,__SC_TEST,__VA_ARGS__); \
- return ret; \
- }
-
/*
* To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
* named __s390x_sys_*()
*/
#define COMPAT_SYSCALL_DEFINE0(sname) \
- SYSCALL_METADATA(_##sname, 0); \
long __s390_compat_sys_##sname(void); \
ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO); \
long __s390_compat_sys_##sname(void)
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
+ long __s390_sys_##sname(void); \
+ ALLOW_ERROR_INJECTION(__s390_sys_##sname, ERRNO); \
long __s390x_sys_##sname(void); \
ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \
+ static inline long __do_sys_##sname(void); \
long __s390_sys_##sname(void) \
- __attribute__((alias(__stringify(__s390x_sys_##sname)))); \
- long __s390x_sys_##sname(void)
+ { \
+ return __do_sys_##sname(); \
+ } \
+ long __s390x_sys_##sname(void) \
+ { \
+ return __do_sys_##sname(); \
+ } \
+ static inline long __do_sys_##sname(void)
#define COND_SYSCALL(name) \
cond_syscall(__s390x_sys_##name); \
@@ -94,24 +68,20 @@
SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers)
#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
- __diag_push(); \
- __diag_ignore(GCC, 8, "-Wattribute-alias", \
- "Type aliasing is used to sanitize syscall arguments"); \
long __s390_compat_sys##name(struct pt_regs *regs); \
- long __s390_compat_sys##name(struct pt_regs *regs) \
- __attribute__((alias(__stringify(__se_compat_sys##name)))); \
ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \
- static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
- long __se_compat_sys##name(struct pt_regs *regs); \
- long __se_compat_sys##name(struct pt_regs *regs) \
+ static inline long __se_compat_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)); \
+ static inline long __do_compat_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__)); \
+ long __s390_compat_sys##name(struct pt_regs *regs) \
{ \
- long ret = __do_compat_sys##name(SYSCALL_PT_ARGS(x, regs, __SC_DELOUSE, \
- __MAP(x, __SC_TYPE, __VA_ARGS__))); \
- __MAP(x,__SC_TEST,__VA_ARGS__); \
- return ret; \
+ return __se_compat_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__)); \
} \
- __diag_pop(); \
- static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+ static inline long __se_compat_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)) \
+ { \
+ __MAP(x, __SC_TEST, __VA_ARGS__); \
+ return __do_compat_sys##name(__MAP(x, __SC_DELOUSE, __VA_ARGS__)); \
+ } \
+ static inline long __do_compat_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__))
/*
* As some compat syscalls may not be implemented, we need to expand
@@ -124,42 +94,58 @@
#define COMPAT_SYS_NI(name) \
SYSCALL_ALIAS(__s390_compat_sys_##name, sys_ni_posix_timers)
-#else /* CONFIG_COMPAT */
+#define __S390_SYS_STUBx(x, name, ...) \
+ long __s390_sys##name(struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \
+ static inline long ___se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)); \
+ long __s390_sys##name(struct pt_regs *regs) \
+ { \
+ return ___se_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__)); \
+ } \
+ static inline long ___se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)) \
+ { \
+ __MAP(x, __SC_TEST, __VA_ARGS__); \
+ return __do_sys##name(__MAP(x, __SC_COMPAT_CAST, __VA_ARGS__)); \
+ }
-#define __S390_SYS_STUBx(x, fullname, name, ...)
+#else /* CONFIG_COMPAT */
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
long __s390x_sys_##sname(void); \
ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \
- long __s390x_sys_##sname(void)
+ static inline long __do_sys_##sname(void); \
+ long __s390x_sys_##sname(void) \
+ { \
+ return __do_sys_##sname(); \
+ } \
+ static inline long __do_sys_##sname(void)
#define COND_SYSCALL(name) \
cond_syscall(__s390x_sys_##name)
#define SYS_NI(name) \
- SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers);
+ SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers)
+
+#define __S390_SYS_STUBx(x, fullname, name, ...)
#endif /* CONFIG_COMPAT */
-#define __SYSCALL_DEFINEx(x, name, ...) \
- __diag_push(); \
- __diag_ignore(GCC, 8, "-Wattribute-alias", \
- "Type aliasing is used to sanitize syscall arguments"); \
- long __s390x_sys##name(struct pt_regs *regs) \
- __attribute__((alias(__stringify(__se_sys##name)))); \
- ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \
- static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
- long __se_sys##name(struct pt_regs *regs); \
- __S390_SYS_STUBx(x, name, __VA_ARGS__) \
- long __se_sys##name(struct pt_regs *regs) \
- { \
- long ret = __do_sys##name(SYSCALL_PT_ARGS(x, regs, \
- __SC_CAST, __MAP(x, __SC_TYPE, __VA_ARGS__))); \
- __MAP(x,__SC_TEST,__VA_ARGS__); \
- return ret; \
- } \
- __diag_pop(); \
- static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+#define __SYSCALL_DEFINEx(x, name, ...) \
+ long __s390x_sys##name(struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \
+ static inline long __se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)); \
+ static inline long __do_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__)); \
+ __S390_SYS_STUBx(x, name, __VA_ARGS__); \
+ long __s390x_sys##name(struct pt_regs *regs) \
+ { \
+ return __se_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__)); \
+ } \
+ static inline long __se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__)) \
+ { \
+ __MAP(x, __SC_TEST, __VA_ARGS__); \
+ return __do_sys##name(__MAP(x, __SC_CAST, __VA_ARGS__)); \
+ } \
+ static inline long __do_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__))
#endif /* _ASM_S390_SYSCALL_WRAPPER_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index f7038b800cc3..8a8c64a678c4 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -390,4 +390,212 @@ do { \
goto err_label; \
} while (0)
+void __cmpxchg_user_key_called_with_bad_pointer(void);
+
+#define CMPXCHG_USER_KEY_MAX_LOOPS 128
+
+static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
+ __uint128_t old, __uint128_t new,
+ unsigned long key, int size)
+{
+ int rc = 0;
+
+ switch (size) {
+ case 1: {
+ unsigned int prev, shift, mask, _old, _new;
+ unsigned long count;
+
+ shift = (3 ^ (address & 3)) << 3;
+ address ^= address & 3;
+ _old = ((unsigned int)old & 0xff) << shift;
+ _new = ((unsigned int)new & 0xff) << shift;
+ mask = ~(0xff << shift);
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ " llill %[count],%[max_loops]\n"
+ "0: l %[prev],%[address]\n"
+ "1: nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "2: lr %[tmp],%[prev]\n"
+ "3: cs %[prev],%[new],%[address]\n"
+ "4: jnl 5f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jnz 5f\n"
+ " brct %[count],2b\n"
+ "5: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (_old),
+ [new] "+&d" (_new),
+ [mask] "+&d" (mask),
+ [count] "=a" (count)
+ : [key] "%[count]" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY),
+ [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+ : "memory", "cc");
+ *(unsigned char *)uval = prev >> shift;
+ if (!count)
+ rc = -EAGAIN;
+ return rc;
+ }
+ case 2: {
+ unsigned int prev, shift, mask, _old, _new;
+ unsigned long count;
+
+ shift = (2 ^ (address & 2)) << 3;
+ address ^= address & 2;
+ _old = ((unsigned int)old & 0xffff) << shift;
+ _new = ((unsigned int)new & 0xffff) << shift;
+ mask = ~(0xffff << shift);
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ " llill %[count],%[max_loops]\n"
+ "0: l %[prev],%[address]\n"
+ "1: nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "2: lr %[tmp],%[prev]\n"
+ "3: cs %[prev],%[new],%[address]\n"
+ "4: jnl 5f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jnz 5f\n"
+ " brct %[count],2b\n"
+ "5: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (_old),
+ [new] "+&d" (_new),
+ [mask] "+&d" (mask),
+ [count] "=a" (count)
+ : [key] "%[count]" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY),
+ [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+ : "memory", "cc");
+ *(unsigned short *)uval = prev >> shift;
+ if (!count)
+ rc = -EAGAIN;
+ return rc;
+ }
+ case 4: {
+ unsigned int prev = old;
+
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: cs %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+Q" (*(int *)address)
+ : [new] "d" ((unsigned int)new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ *(unsigned int *)uval = prev;
+ return rc;
+ }
+ case 8: {
+ unsigned long prev = old;
+
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: csg %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+QS" (*(long *)address)
+ : [new] "d" ((unsigned long)new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ *(unsigned long *)uval = prev;
+ return rc;
+ }
+ case 16: {
+ __uint128_t prev = old;
+
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: cdsg %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+QS" (*(__int128_t *)address)
+ : [new] "d" (new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ *(__uint128_t *)uval = prev;
+ return rc;
+ }
+ }
+ __cmpxchg_user_key_called_with_bad_pointer();
+ return rc;
+}
+
+/**
+ * cmpxchg_user_key() - cmpxchg with user space target, honoring storage keys
+ * @ptr: User space address of value to compare to @old and exchange with
+ * @new. Must be aligned to sizeof(*@ptr).
+ * @uval: Address where the old value of *@ptr is written to.
+ * @old: Old value. Compared to the content pointed to by @ptr in order to
+ * determine if the exchange occurs. The old value read from *@ptr is
+ * written to *@uval.
+ * @new: New value to place at *@ptr.
+ * @key: Access key to use for checking storage key protection.
+ *
+ * Perform a cmpxchg on a user space target, honoring storage key protection.
+ * @key alone determines how key checking is performed, neither
+ * storage-protection-override nor fetch-protection-override apply.
+ * The caller must compare *@uval and @old to determine if values have been
+ * exchanged. In case of an exception *@uval is set to zero.
+ *
+ * Return: 0: cmpxchg executed
+ * -EFAULT: an exception happened when trying to access *@ptr
+ * -EAGAIN: maxed out number of retries (byte and short only)
+ */
+#define cmpxchg_user_key(ptr, uval, old, new, key) \
+({ \
+ __typeof__(ptr) __ptr = (ptr); \
+ __typeof__(uval) __uval = (uval); \
+ \
+ BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval))); \
+ might_fault(); \
+ __chk_user_ptr(__ptr); \
+ __cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \
+ (old), (new), (key), sizeof(*(__ptr))); \
+})
+
#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index 02462e7100c1..b8ecf04e3468 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -4,7 +4,7 @@
#include <linux/sched.h>
#include <linux/ftrace.h>
-#include <linux/kprobes.h>
+#include <linux/rethook.h>
#include <linux/llist.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
@@ -43,13 +43,15 @@ struct unwind_state {
bool error;
};
-/* Recover the return address modified by kretprobe and ftrace_graph. */
+/* Recover the return address modified by rethook and ftrace_graph. */
static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state,
unsigned long ip)
{
ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *)state->sp);
- if (is_kretprobe_trampoline(ip))
- ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur);
+#ifdef CONFIG_RETHOOK
+ if (is_rethook_trampoline(ip))
+ ip = rethook_find_ret_addr(state->task, state->sp, &state->kr_cur);
+#endif
return ip;
}
diff --git a/arch/s390/include/uapi/asm/fs3270.h b/arch/s390/include/uapi/asm/fs3270.h
new file mode 100644
index 000000000000..c4bc1108af6a
--- /dev/null
+++ b/arch/s390/include/uapi/asm/fs3270.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_S390_UAPI_FS3270_H
+#define __ASM_S390_UAPI_FS3270_H
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+/* ioctls for fullscreen 3270 */
+#define TUBICMD _IO('3', 3) /* set ccw command for fs reads. */
+#define TUBOCMD _IO('3', 4) /* set ccw command for fs writes. */
+#define TUBGETI _IO('3', 7) /* get ccw command for fs reads. */
+#define TUBGETO _IO('3', 8) /* get ccw command for fs writes. */
+#define TUBGETMOD _IO('3', 13) /* get characteristics like model, cols, rows */
+
+/* For TUBGETMOD */
+struct raw3270_iocb {
+ __u16 model;
+ __u16 line_cnt;
+ __u16 col_cnt;
+ __u16 pf_cnt;
+ __u16 re_cnt;
+ __u16 map;
+};
+
+#endif /* __ASM_S390_UAPI_FS3270_H */
diff --git a/arch/s390/include/uapi/asm/raw3270.h b/arch/s390/include/uapi/asm/raw3270.h
new file mode 100644
index 000000000000..6676f102bd50
--- /dev/null
+++ b/arch/s390/include/uapi/asm/raw3270.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_S390_UAPI_RAW3270_H
+#define __ASM_S390_UAPI_RAW3270_H
+
+/* Local Channel Commands */
+#define TC_WRITE 0x01 /* Write */
+#define TC_RDBUF 0x02 /* Read Buffer */
+#define TC_EWRITE 0x05 /* Erase write */
+#define TC_READMOD 0x06 /* Read modified */
+#define TC_EWRITEA 0x0d /* Erase write alternate */
+#define TC_WRITESF 0x11 /* Write structured field */
+
+/* Buffer Control Orders */
+#define TO_GE 0x08 /* Graphics Escape */
+#define TO_SF 0x1d /* Start field */
+#define TO_SBA 0x11 /* Set buffer address */
+#define TO_IC 0x13 /* Insert cursor */
+#define TO_PT 0x05 /* Program tab */
+#define TO_RA 0x3c /* Repeat to address */
+#define TO_SFE 0x29 /* Start field extended */
+#define TO_EUA 0x12 /* Erase unprotected to address */
+#define TO_MF 0x2c /* Modify field */
+#define TO_SA 0x28 /* Set attribute */
+
+/* Field Attribute Bytes */
+#define TF_INPUT 0x40 /* Visible input */
+#define TF_INPUTN 0x4c /* Invisible input */
+#define TF_INMDT 0xc1 /* Visible, Set-MDT */
+#define TF_LOG 0x60
+
+/* Character Attribute Bytes */
+#define TAT_RESET 0x00
+#define TAT_FIELD 0xc0
+#define TAT_EXTHI 0x41
+#define TAT_FGCOLOR 0x42
+#define TAT_CHARS 0x43
+#define TAT_BGCOLOR 0x45
+#define TAT_TRANS 0x46
+
+/* Extended-Highlighting Bytes */
+#define TAX_RESET 0x00
+#define TAX_BLINK 0xf1
+#define TAX_REVER 0xf2
+#define TAX_UNDER 0xf4
+
+/* Reset value */
+#define TAR_RESET 0x00
+
+/* Color values */
+#define TAC_RESET 0x00
+#define TAC_BLUE 0xf1
+#define TAC_RED 0xf2
+#define TAC_PINK 0xf3
+#define TAC_GREEN 0xf4
+#define TAC_TURQ 0xf5
+#define TAC_YELLOW 0xf6
+#define TAC_WHITE 0xf7
+#define TAC_DEFAULT 0x00
+
+/* Write Control Characters */
+#define TW_NONE 0x40 /* No particular action */
+#define TW_KR 0xc2 /* Keyboard restore */
+#define TW_PLUSALARM 0x04 /* Add this bit for alarm */
+
+#define RAW3270_FIRSTMINOR 1 /* First minor number */
+#define RAW3270_MAXDEVS 255 /* Max number of 3270 devices */
+
+#define AID_CLEAR 0x6d
+#define AID_ENTER 0x7d
+#define AID_PF3 0xf3
+#define AID_PF7 0xf7
+#define AID_PF8 0xf8
+#define AID_READ_PARTITION 0x88
+
+#endif /* __ASM_S390_UAPI_RAW3270_H */
diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h
index da034c606314..84457dbb26b4 100644
--- a/arch/s390/include/uapi/asm/types.h
+++ b/arch/s390/include/uapi/asm/types.h
@@ -12,15 +12,18 @@
#ifndef __ASSEMBLY__
-/* A address type so that arithmetic can be done on it & it can be upgraded to
- 64 bit when necessary
-*/
-typedef unsigned long addr_t;
+typedef unsigned long addr_t;
typedef __signed__ long saddr_t;
typedef struct {
- __u32 u[4];
-} __vector128;
+ union {
+ struct {
+ __u64 high;
+ __u64 low;
+ };
+ __u32 u[4];
+ };
+} __attribute__((packed, aligned(4))) __vector128;
#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index d83713f67530..f4785abe1b9f 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -85,7 +85,8 @@ struct ica_rsa_modexpo_crt {
struct CPRBX {
__u16 cprb_len; /* CPRB length 220 */
__u8 cprb_ver_id; /* CPRB version id. 0x02 */
- __u8 _pad_000[3]; /* Alignment pad bytes */
+ __u8 ctfm; /* Command Type Filtering Mask */
+ __u8 pad_000[2]; /* Alignment pad bytes */
__u8 func_id[2]; /* function id 0x5432 */
__u8 cprb_flags[4]; /* Flags */
__u32 req_parml; /* request parameter buffer len */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 5e6a23299790..8983837b3565 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -58,6 +58,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KPROBES) += kprobes_insn_page.o
obj-$(CONFIG_KPROBES) += mcount.o
+obj-$(CONFIG_RETHOOK) += rethook.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
@@ -69,7 +70,7 @@ obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o
obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
-obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
index fb92e8ed0525..f9efc54ec4b7 100644
--- a/arch/s390/kernel/abs_lowcore.c
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -3,12 +3,7 @@
#include <linux/pgtable.h>
#include <asm/abs_lowcore.h>
-#define ABS_LOWCORE_UNMAPPED 1
-#define ABS_LOWCORE_LAP_ON 2
-#define ABS_LOWCORE_IRQS_ON 4
-
unsigned long __bootdata_preserved(__abs_lowcore);
-bool __ro_after_init abs_lowcore_mapped;
int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
{
@@ -49,47 +44,3 @@ void abs_lowcore_unmap(int cpu)
addr += PAGE_SIZE;
}
}
-
-struct lowcore *get_abs_lowcore(unsigned long *flags)
-{
- unsigned long irq_flags;
- union ctlreg0 cr0;
- int cpu;
-
- *flags = 0;
- cpu = get_cpu();
- if (abs_lowcore_mapped) {
- return ((struct lowcore *)__abs_lowcore) + cpu;
- } else {
- if (cpu != 0)
- panic("Invalid unmapped absolute lowcore access\n");
- local_irq_save(irq_flags);
- if (!irqs_disabled_flags(irq_flags))
- *flags |= ABS_LOWCORE_IRQS_ON;
- __ctl_store(cr0.val, 0, 0);
- if (cr0.lap) {
- *flags |= ABS_LOWCORE_LAP_ON;
- __ctl_clear_bit(0, 28);
- }
- *flags |= ABS_LOWCORE_UNMAPPED;
- return lowcore_ptr[0];
- }
-}
-
-void put_abs_lowcore(struct lowcore *lc, unsigned long flags)
-{
- if (abs_lowcore_mapped) {
- if (flags)
- panic("Invalid mapped absolute lowcore release\n");
- } else {
- if (smp_processor_id() != 0)
- panic("Invalid mapped absolute lowcore access\n");
- if (!(flags & ABS_LOWCORE_UNMAPPED))
- panic("Invalid unmapped absolute lowcore release\n");
- if (flags & ABS_LOWCORE_LAP_ON)
- __ctl_set_bit(0, 28);
- if (flags & ABS_LOWCORE_IRQS_ON)
- local_irq_enable();
- }
- put_cpu();
-}
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 7ee3651d00ab..56254fa06f99 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -46,7 +46,7 @@ struct cache_info {
#define CACHE_MAX_LEVEL 8
union cache_topology {
struct cache_info ci[CACHE_MAX_LEVEL];
- unsigned long long raw;
+ unsigned long raw;
};
static const char * const cache_type_string[] = {
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index eee1ad3e1b29..cecedd01d4ec 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -139,7 +139,7 @@ static int save_sigregs_ext32(struct pt_regs *regs,
/* Save vector registers to signal stack */
if (MACHINE_HAS_VX) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
+ vxrs[i] = current->thread.fpu.vxrs[i].low;
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
sizeof(sregs_ext->vxrs_low)) ||
__copy_to_user(&sregs_ext->vxrs_high,
@@ -173,7 +173,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
+ current->thread.fpu.vxrs[i].low = vxrs[i];
}
return 0;
}
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index c13b1455ec8c..8a617be28bb4 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -110,7 +110,7 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
/* Copy lower halves of vector registers 0-15 */
for (i = 0; i < 16; i++)
- memcpy(&sa->vxrs_low[i], &vxrs[i].u[2], 8);
+ sa->vxrs_low[i] = vxrs[i].low;
/* Copy vector registers 16-31 */
memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
}
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index a778714e4d8b..82079f2d8583 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -35,6 +35,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" },
[DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" },
[DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" },
+ [DIAG_STAT_X08C] = { .code = 0x08c, .name = "Access 3270 Display Device Information" },
[DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" },
[DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" },
[DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" },
@@ -57,12 +58,16 @@ struct diag_ops __amode31_ref diag_amode31_ops = {
.diag26c = _diag26c_amode31,
.diag14 = _diag14_amode31,
.diag0c = _diag0c_amode31,
+ .diag8c = _diag8c_amode31,
.diag308_reset = _diag308_reset_amode31
};
static struct diag210 _diag210_tmp_amode31 __section(".amode31.data");
struct diag210 __amode31_ref *__diag210_tmp_amode31 = &_diag210_tmp_amode31;
+static struct diag8c _diag8c_tmp_amode31 __section(".amode31.data");
+static struct diag8c __amode31_ref *__diag8c_tmp_amode31 = &_diag8c_tmp_amode31;
+
static int show_diag_stat(struct seq_file *m, void *v)
{
struct diag_stat *stat;
@@ -194,6 +199,27 @@ int diag210(struct diag210 *addr)
}
EXPORT_SYMBOL(diag210);
+/*
+ * Diagnose 210: Get information about a virtual device
+ */
+int diag8c(struct diag8c *addr, struct ccw_dev_id *devno)
+{
+ static DEFINE_SPINLOCK(diag8c_lock);
+ unsigned long flags;
+ int ccode;
+
+ spin_lock_irqsave(&diag8c_lock, flags);
+
+ diag_stat_inc(DIAG_STAT_X08C);
+ ccode = diag_amode31_ops.diag8c(__diag8c_tmp_amode31, devno, sizeof(*addr));
+
+ *addr = *__diag8c_tmp_amode31;
+ spin_unlock_irqrestore(&diag8c_lock, flags);
+
+ return ccode;
+}
+EXPORT_SYMBOL(diag8c);
+
int diag224(void *ptr)
{
int rc = -EOPNOTSUPP;
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 6030fdd6997b..59eba19ae0f2 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -18,6 +18,7 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <asm/asm-extable.h>
+#include <linux/memblock.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/ipl.h>
@@ -160,9 +161,7 @@ static noinline __init void setup_lowcore_early(void)
psw_t psw;
psw.addr = (unsigned long)early_pgm_check_handler;
- psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
- if (IS_ENABLED(CONFIG_KASAN))
- psw.mask |= PSW_MASK_DAT;
+ psw.mask = PSW_KERNEL_BITS;
S390_lowcore.program_new_psw = psw;
S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
}
@@ -227,6 +226,8 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
/* the control bit is set during PCI initialization */
}
+ if (test_facility(194))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_RDP;
}
static inline void save_vector_registers(void)
@@ -288,7 +289,6 @@ static void __init sort_amode31_extable(void)
void __init startup_init(void)
{
- sclp_early_adjust_va();
reset_tod_clock();
check_image_bootable();
time_early_init();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 0f423e9df095..c8d8c9960936 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -137,19 +137,13 @@ _LPP_OFFSET = __LC_LPP
lgr %r14,\reg
larl %r13,\start
slgr %r14,%r13
-#ifdef CONFIG_AS_IS_LLVM
clgfrl %r14,.Lrange_size\@
-#else
- clgfi %r14,\end - \start
-#endif
jhe \outside_label
-#ifdef CONFIG_AS_IS_LLVM
.section .rodata, "a"
.align 4
.Lrange_size\@:
.long \end - \start
.previous
-#endif
.endm
.macro SIEEXIT
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 995ec7449feb..34674e38826b 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -73,6 +73,5 @@ extern struct exception_table_entry _stop_amode31_ex_table[];
#define __amode31_data __section(".amode31.data")
#define __amode31_ref __section(".amode31.refs")
extern long _start_amode31_refs[], _end_amode31_refs[];
-extern unsigned long __amode31_base;
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index d7b8b6ad574d..3b3bf8329e6c 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -25,6 +25,7 @@ ENTRY(startup_continue)
larl %r14,init_task
stg %r14,__LC_CURRENT
larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE
+ brasl %r14,sclp_early_adjust_va # allow sclp_early_printk
#ifdef CONFIG_KASAN
brasl %r14,kasan_early_init
#endif
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 4bf1ee293f2b..38e267c7bff7 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -12,9 +12,9 @@
#include <linux/notifier.h>
#include <linux/init.h>
#include <linux/cpu.h>
-#include <linux/sched/cputime.h>
#include <trace/events/power.h>
#include <asm/cpu_mf.h>
+#include <asm/cputime.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include "entry.h"
@@ -24,117 +24,61 @@ static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
void account_idle_time_irq(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ unsigned long idle_time;
u64 cycles_new[8];
int i;
- clear_cpu_flag(CIF_ENABLED_WAIT);
if (smp_cpu_mtid) {
stcctm(MT_DIAG, smp_cpu_mtid, cycles_new);
for (i = 0; i < smp_cpu_mtid; i++)
this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
}
- idle->clock_idle_exit = S390_lowcore.int_clock;
- idle->timer_idle_exit = S390_lowcore.sys_enter_timer;
+ idle_time = S390_lowcore.int_clock - idle->clock_idle_enter;
S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock;
- S390_lowcore.last_update_clock = idle->clock_idle_exit;
+ S390_lowcore.last_update_clock = S390_lowcore.int_clock;
S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter;
- S390_lowcore.last_update_timer = idle->timer_idle_exit;
+ S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
+
+ /* Account time spent with enabled wait psw loaded as idle time. */
+ WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
+ WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
+ account_idle_time(cputime_to_nsecs(idle_time));
}
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
- unsigned long idle_time;
unsigned long psw_mask;
/* Wait for external, I/O or machine check interrupt. */
- psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
clear_cpu_flag(CIF_NOHZ_DELAY);
/* psw_idle() returns with interrupts disabled. */
psw_idle(idle, psw_mask);
-
- /* Account time spent with enabled wait psw loaded as idle time. */
- raw_write_seqcount_begin(&idle->seqcount);
- idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
- idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
- idle->idle_time += idle_time;
- idle->idle_count++;
- account_idle_time(cputime_to_nsecs(idle_time));
- raw_write_seqcount_end(&idle->seqcount);
- raw_local_irq_enable();
}
static ssize_t show_idle_count(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned long idle_count;
- unsigned int seq;
-
- do {
- seq = read_seqcount_begin(&idle->seqcount);
- idle_count = READ_ONCE(idle->idle_count);
- if (READ_ONCE(idle->clock_idle_enter))
- idle_count++;
- } while (read_seqcount_retry(&idle->seqcount, seq));
- return sprintf(buf, "%lu\n", idle_count);
+
+ return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count));
}
DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
static ssize_t show_idle_time(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
- unsigned long now, idle_time, idle_enter, idle_exit, in_idle;
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned int seq;
-
- do {
- seq = read_seqcount_begin(&idle->seqcount);
- idle_time = READ_ONCE(idle->idle_time);
- idle_enter = READ_ONCE(idle->clock_idle_enter);
- idle_exit = READ_ONCE(idle->clock_idle_exit);
- } while (read_seqcount_retry(&idle->seqcount, seq));
- in_idle = 0;
- now = get_tod_clock();
- if (idle_enter) {
- if (idle_exit) {
- in_idle = idle_exit - idle_enter;
- } else if (now > idle_enter) {
- in_idle = now - idle_enter;
- }
- }
- idle_time += in_idle;
- return sprintf(buf, "%lu\n", idle_time >> 12);
-}
-DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
-u64 arch_cpu_idle_time(int cpu)
-{
- struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
- unsigned long now, idle_enter, idle_exit, in_idle;
- unsigned int seq;
-
- do {
- seq = read_seqcount_begin(&idle->seqcount);
- idle_enter = READ_ONCE(idle->clock_idle_enter);
- idle_exit = READ_ONCE(idle->clock_idle_exit);
- } while (read_seqcount_retry(&idle->seqcount, seq));
- in_idle = 0;
- now = get_tod_clock();
- if (idle_enter) {
- if (idle_exit) {
- in_idle = idle_exit - idle_enter;
- } else if (now > idle_enter) {
- in_idle = now - idle_enter;
- }
- }
- return cputime_to_nsecs(in_idle);
+ return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12);
}
+DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
void arch_cpu_idle_enter(void)
{
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index fbd646dbf440..5f0f5c86963a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -593,6 +593,7 @@ static struct attribute *ipl_eckd_attrs[] = {
&sys_ipl_type_attr.attr,
&sys_ipl_eckd_bootprog_attr.attr,
&sys_ipl_eckd_br_chr_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
&sys_ipl_device_attr.attr,
&sys_ipl_secure_attr.attr,
&sys_ipl_has_secure_attr.attr,
@@ -888,23 +889,27 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
return len;
}
-/* FCP wrapper */
-static ssize_t reipl_fcp_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
-{
- return reipl_generic_loadparm_show(reipl_block_fcp, page);
-}
-
-static ssize_t reipl_fcp_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
-{
- return reipl_generic_loadparm_store(reipl_block_fcp, buf, len);
-}
-
-static struct kobj_attribute sys_reipl_fcp_loadparm_attr =
- __ATTR(loadparm, 0644, reipl_fcp_loadparm_show,
- reipl_fcp_loadparm_store);
+#define DEFINE_GENERIC_LOADPARM(name) \
+static ssize_t reipl_##name##_loadparm_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *page) \
+{ \
+ return reipl_generic_loadparm_show(reipl_block_##name, page); \
+} \
+static ssize_t reipl_##name##_loadparm_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ return reipl_generic_loadparm_store(reipl_block_##name, buf, len); \
+} \
+static struct kobj_attribute sys_reipl_##name##_loadparm_attr = \
+ __ATTR(loadparm, 0644, reipl_##name##_loadparm_show, \
+ reipl_##name##_loadparm_store)
+
+DEFINE_GENERIC_LOADPARM(fcp);
+DEFINE_GENERIC_LOADPARM(nvme);
+DEFINE_GENERIC_LOADPARM(ccw);
+DEFINE_GENERIC_LOADPARM(nss);
+DEFINE_GENERIC_LOADPARM(eckd);
static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
@@ -994,24 +999,6 @@ DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
reipl_block_nvme->nvme.br_lba);
-/* nvme wrapper */
-static ssize_t reipl_nvme_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
-{
- return reipl_generic_loadparm_show(reipl_block_nvme, page);
-}
-
-static ssize_t reipl_nvme_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
-{
- return reipl_generic_loadparm_store(reipl_block_nvme, buf, len);
-}
-
-static struct kobj_attribute sys_reipl_nvme_loadparm_attr =
- __ATTR(loadparm, 0644, reipl_nvme_loadparm_show,
- reipl_nvme_loadparm_store);
-
static struct attribute *reipl_nvme_attrs[] = {
&sys_reipl_nvme_fid_attr.attr,
&sys_reipl_nvme_nsid_attr.attr,
@@ -1047,38 +1034,6 @@ static struct kobj_attribute sys_reipl_nvme_clear_attr =
/* CCW reipl device attributes */
DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
-/* NSS wrapper */
-static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
-{
- return reipl_generic_loadparm_show(reipl_block_nss, page);
-}
-
-static ssize_t reipl_nss_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
-{
- return reipl_generic_loadparm_store(reipl_block_nss, buf, len);
-}
-
-/* CCW wrapper */
-static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
-{
- return reipl_generic_loadparm_show(reipl_block_ccw, page);
-}
-
-static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
-{
- return reipl_generic_loadparm_store(reipl_block_ccw, buf, len);
-}
-
-static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
- __ATTR(loadparm, 0644, reipl_ccw_loadparm_show,
- reipl_ccw_loadparm_store);
-
static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
@@ -1176,6 +1131,7 @@ static struct attribute *reipl_eckd_attrs[] = {
&sys_reipl_eckd_device_attr.attr,
&sys_reipl_eckd_bootprog_attr.attr,
&sys_reipl_eckd_br_chr_attr.attr,
+ &sys_reipl_eckd_loadparm_attr.attr,
NULL,
};
@@ -1194,7 +1150,7 @@ static ssize_t reipl_eckd_clear_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t len)
{
- if (strtobool(buf, &reipl_eckd_clear) < 0)
+ if (kstrtobool(buf, &reipl_eckd_clear) < 0)
return -EINVAL;
return len;
}
@@ -1251,10 +1207,6 @@ static struct kobj_attribute sys_reipl_nss_name_attr =
__ATTR(name, 0644, reipl_nss_name_show,
reipl_nss_name_store);
-static struct kobj_attribute sys_reipl_nss_loadparm_attr =
- __ATTR(loadparm, 0644, reipl_nss_loadparm_show,
- reipl_nss_loadparm_store);
-
static struct attribute *reipl_nss_attrs[] = {
&sys_reipl_nss_name_attr.attr,
&sys_reipl_nss_loadparm_attr.attr,
@@ -1986,15 +1938,14 @@ static void dump_reipl_run(struct shutdown_trigger *trigger)
{
unsigned long ipib = (unsigned long) reipl_block_actual;
struct lowcore *abs_lc;
- unsigned long flags;
unsigned int csum;
csum = (__force unsigned int)
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
abs_lc->ipib = ipib;
abs_lc->ipib_checksum = csum;
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
dump_run(trigger);
}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 45393919fe61..b020ff17d206 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -136,7 +136,7 @@ void noinstr do_io_irq(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
struct pt_regs *old_regs = set_irq_regs(regs);
- int from_idle;
+ bool from_idle;
irq_enter_rcu();
@@ -146,7 +146,7 @@ void noinstr do_io_irq(struct pt_regs *regs)
current->thread.last_break = regs->last_break;
}
- from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
account_idle_time_irq();
@@ -171,7 +171,7 @@ void noinstr do_ext_irq(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
struct pt_regs *old_regs = set_irq_regs(regs);
- int from_idle;
+ bool from_idle;
irq_enter_rcu();
@@ -185,7 +185,7 @@ void noinstr do_ext_irq(struct pt_regs *regs)
regs->int_parm = S390_lowcore.ext_params;
regs->int_parm_long = S390_lowcore.ext_params2;
- from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
account_idle_time_irq();
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 401f9c933ff9..5e713f318de3 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -281,16 +281,6 @@ static void pop_kprobe(struct kprobe_ctlblk *kcb)
}
NOKPROBE_SYMBOL(pop_kprobe);
-void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
-{
- ri->ret_addr = (kprobe_opcode_t *)regs->gprs[14];
- ri->fp = (void *)regs->gprs[15];
-
- /* Replace the return addr with trampoline addr */
- regs->gprs[14] = (unsigned long)&__kretprobe_trampoline;
-}
-NOKPROBE_SYMBOL(arch_prepare_kretprobe);
-
static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
{
switch (kcb->kprobe_status) {
@@ -371,26 +361,6 @@ static int kprobe_handler(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(kprobe_handler);
-void arch_kretprobe_fixup_return(struct pt_regs *regs,
- kprobe_opcode_t *correct_ret_addr)
-{
- /* Replace fake return address with real one. */
- regs->gprs[14] = (unsigned long)correct_ret_addr;
-}
-NOKPROBE_SYMBOL(arch_kretprobe_fixup_return);
-
-/*
- * Called from __kretprobe_trampoline
- */
-void trampoline_probe_handler(struct pt_regs *regs)
-{
- kretprobe_trampoline_handler(regs, (void *)regs->gprs[15]);
-}
-NOKPROBE_SYMBOL(trampoline_probe_handler);
-
-/* assembler function that handles the kretprobes must not be probed itself */
-NOKPROBE_SYMBOL(__kretprobe_trampoline);
-
/*
* Called after single-stepping. p->addr is the address of the
* instruction whose first byte has been replaced by the "breakpoint"
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 4579b42286d5..2a8e73266428 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -224,7 +224,6 @@ void machine_kexec_cleanup(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
struct lowcore *abs_lc;
- unsigned long flags;
VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
@@ -232,9 +231,9 @@ void arch_crash_save_vmcoreinfo(void)
vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
abs_lc->vmcore_info = paddr_vmcoreinfo_note();
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
}
void machine_shutdown(void)
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 4786bfe02144..43ff91073d2a 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -135,9 +135,9 @@ SYM_FUNC_END(return_to_handler)
#endif
#endif /* CONFIG_FUNCTION_TRACER */
-#ifdef CONFIG_KPROBES
+#ifdef CONFIG_RETHOOK
-SYM_FUNC_START(__kretprobe_trampoline)
+SYM_FUNC_START(arch_rethook_trampoline)
stg %r14,(__SF_GPRS+8*8)(%r15)
lay %r15,-STACK_FRAME_SIZE(%r15)
@@ -152,16 +152,16 @@ SYM_FUNC_START(__kretprobe_trampoline)
epsw %r2,%r3
risbg %r3,%r2,0,31,32
stg %r3,STACK_PTREGS_PSW(%r15)
- larl %r1,__kretprobe_trampoline
+ larl %r1,arch_rethook_trampoline
stg %r1,STACK_PTREGS_PSW+8(%r15)
lay %r2,STACK_PTREGS(%r15)
- brasl %r14,trampoline_probe_handler
+ brasl %r14,arch_rethook_trampoline_callback
mvc __SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15)
lmg %r0,%r15,STACK_PTREGS_GPRS(%r15)
lpswe __SF_EMPTY(%r15)
-SYM_FUNC_END(__kretprobe_trampoline)
+SYM_FUNC_END(arch_rethook_trampoline)
-#endif /* CONFIG_KPROBES */
+#endif /* CONFIG_RETHOOK */
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index ec0bd9457e90..6e1824141b29 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -59,15 +59,14 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
void __init os_info_init(void)
{
struct lowcore *abs_lc;
- unsigned long flags;
os_info.version_major = OS_INFO_VERSION_MAJOR;
os_info.version_minor = OS_INFO_VERSION_MINOR;
os_info.magic = OS_INFO_MAGIC;
os_info.csum = os_info_csum(&os_info);
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
abs_lc->os_info = __pa(&os_info);
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
}
#ifdef CONFIG_CRASH_DUMP
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index f043a7ff220b..c9ab971498d6 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -2,7 +2,7 @@
/*
* Performance event support for s390x - CPU-measurement Counter Facility
*
- * Copyright IBM Corp. 2012, 2021
+ * Copyright IBM Corp. 2012, 2023
* Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
* Thomas Richter <tmricht@linux.ibm.com>
*/
@@ -16,11 +16,82 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/miscdevice.h>
+#include <linux/perf_event.h>
-#include <asm/cpu_mcf.h>
+#include <asm/cpu_mf.h>
#include <asm/hwctrset.h>
#include <asm/debug.h>
+enum cpumf_ctr_set {
+ CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
+ CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
+ CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */
+ CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */
+ CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */
+
+ /* Maximum number of counter sets */
+ CPUMF_CTR_SET_MAX,
+};
+
+#define CPUMF_LCCTL_ENABLE_SHIFT 16
+#define CPUMF_LCCTL_ACTCTL_SHIFT 0
+
+static inline void ctr_set_enable(u64 *state, u64 ctrsets)
+{
+ *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
+}
+
+static inline void ctr_set_disable(u64 *state, u64 ctrsets)
+{
+ *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
+}
+
+static inline void ctr_set_start(u64 *state, u64 ctrsets)
+{
+ *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
+}
+
+static inline void ctr_set_stop(u64 *state, u64 ctrsets)
+{
+ *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
+}
+
+static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
+{
+ switch (set) {
+ case CPUMF_CTR_SET_BASIC:
+ return stcctm(BASIC, range, dest);
+ case CPUMF_CTR_SET_USER:
+ return stcctm(PROBLEM_STATE, range, dest);
+ case CPUMF_CTR_SET_CRYPTO:
+ return stcctm(CRYPTO_ACTIVITY, range, dest);
+ case CPUMF_CTR_SET_EXT:
+ return stcctm(EXTENDED, range, dest);
+ case CPUMF_CTR_SET_MT_DIAG:
+ return stcctm(MT_DIAG_CLEARING, range, dest);
+ case CPUMF_CTR_SET_MAX:
+ return 3;
+ }
+ return 3;
+}
+
+struct cpu_cf_events {
+ struct cpumf_ctr_info info;
+ atomic_t ctr_set[CPUMF_CTR_SET_MAX];
+ u64 state; /* For perf_event_open SVC */
+ u64 dev_state; /* For /dev/hwctr */
+ unsigned int flags;
+ size_t used; /* Bytes used in data */
+ size_t usedss; /* Bytes used in start/stop */
+ unsigned char start[PAGE_SIZE]; /* Counter set at event add */
+ unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */
+ unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */
+ unsigned int sets; /* # Counter set saved in memory */
+};
+
+/* Per-CPU event structure for the counter facility */
+static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events);
+
static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */
static debug_info_t *cf_dbg;
@@ -112,6 +183,53 @@ static void cfdiag_trailer(struct cf_trailer_entry *te)
te->timestamp = get_tod_clock_fast();
}
+/*
+ * Return the maximum possible counter set size (in number of 8 byte counters)
+ * depending on type and model number.
+ */
+static size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
+ struct cpumf_ctr_info *info)
+{
+ size_t ctrset_size = 0;
+
+ switch (ctrset) {
+ case CPUMF_CTR_SET_BASIC:
+ if (info->cfvn >= 1)
+ ctrset_size = 6;
+ break;
+ case CPUMF_CTR_SET_USER:
+ if (info->cfvn == 1)
+ ctrset_size = 6;
+ else if (info->cfvn >= 3)
+ ctrset_size = 2;
+ break;
+ case CPUMF_CTR_SET_CRYPTO:
+ if (info->csvn >= 1 && info->csvn <= 5)
+ ctrset_size = 16;
+ else if (info->csvn == 6 || info->csvn == 7)
+ ctrset_size = 20;
+ break;
+ case CPUMF_CTR_SET_EXT:
+ if (info->csvn == 1)
+ ctrset_size = 32;
+ else if (info->csvn == 2)
+ ctrset_size = 48;
+ else if (info->csvn >= 3 && info->csvn <= 5)
+ ctrset_size = 128;
+ else if (info->csvn == 6 || info->csvn == 7)
+ ctrset_size = 160;
+ break;
+ case CPUMF_CTR_SET_MT_DIAG:
+ if (info->csvn > 3)
+ ctrset_size = 48;
+ break;
+ case CPUMF_CTR_SET_MAX:
+ break;
+ }
+
+ return ctrset_size;
+}
+
/* Read a counter set. The counter set number determines the counter set and
* the CPUM-CF first and second version number determine the number of
* available counters in each counter set.
@@ -388,6 +506,47 @@ static void cpumf_pmu_disable(struct pmu *pmu)
cpuhw->flags &= ~PMU_F_ENABLED;
}
+#define PMC_INIT 0UL
+#define PMC_RELEASE 1UL
+
+static void cpum_cf_setup_cpu(void *flags)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+
+ switch ((unsigned long)flags) {
+ case PMC_INIT:
+ memset(&cpuhw->info, 0, sizeof(cpuhw->info));
+ qctri(&cpuhw->info);
+ cpuhw->flags |= PMU_F_RESERVED;
+ break;
+
+ case PMC_RELEASE:
+ cpuhw->flags &= ~PMU_F_RESERVED;
+ break;
+ }
+
+ /* Disable CPU counter sets */
+ lcctl(0);
+ debug_sprintf_event(cf_dbg, 5, "%s flags %#x flags %#x state %#llx\n",
+ __func__, *(int *)flags, cpuhw->flags,
+ cpuhw->state);
+}
+
+/* Initialize the CPU-measurement counter facility */
+static int __kernel_cpumcf_begin(void)
+{
+ on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_INIT, 1);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+ return 0;
+}
+
+/* Release the CPU-measurement counter facility */
+static void __kernel_cpumcf_end(void)
+{
+ on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_RELEASE, 1);
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+}
/* Number of perf events counting hardware events */
static atomic_t num_events = ATOMIC_INIT(0);
@@ -397,12 +556,10 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
/* Release the PMU if event is the last perf event */
static void hw_perf_event_destroy(struct perf_event *event)
{
- if (!atomic_add_unless(&num_events, -1, 1)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_dec_return(&num_events) == 0)
- __kernel_cpumcf_end();
- mutex_unlock(&pmc_reserve_mutex);
- }
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_events) == 0)
+ __kernel_cpumcf_end();
+ mutex_unlock(&pmc_reserve_mutex);
}
/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
@@ -434,6 +591,12 @@ static void cpumf_hw_inuse(void)
mutex_unlock(&pmc_reserve_mutex);
}
+static int is_userspace_event(u64 ev)
+{
+ return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev;
+}
+
static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
{
struct perf_event_attr *attr = &event->attr;
@@ -456,19 +619,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
if (is_sampling_event(event)) /* No sampling support */
return -ENOENT;
ev = attr->config;
- /* Count user space (problem-state) only */
if (!attr->exclude_user && attr->exclude_kernel) {
- if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
- return -EOPNOTSUPP;
- ev = cpumf_generic_events_user[ev];
-
- /* No support for kernel space counters only */
+ /*
+ * Count user space (problem-state) only
+ * Handle events 32 and 33 as 0:u and 1:u
+ */
+ if (!is_userspace_event(ev)) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_user[ev];
+ }
} else if (!attr->exclude_kernel && attr->exclude_user) {
+ /* No support for kernel space counters only */
return -EOPNOTSUPP;
- } else { /* Count user and kernel space */
- if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
- return -EOPNOTSUPP;
- ev = cpumf_generic_events_basic[ev];
+ } else {
+ /* Count user and kernel space, incl. events 32 + 33 */
+ if (!is_userspace_event(ev)) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_basic[ev];
+ }
}
break;
@@ -662,9 +832,7 @@ static int cfdiag_push_sample(struct perf_event *event,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = cpuhw->usedss;
raw.frag.data = cpuhw->stop;
- raw.size = raw.frag.size;
- data.raw = &raw;
- data.sample_flags |= PERF_SAMPLE_RAW;
+ perf_sample_save_raw_data(&data, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);
@@ -763,31 +931,120 @@ static struct pmu cpumf_pmu = {
.read = cpumf_pmu_read,
};
+static int cpum_cf_setup(unsigned int cpu, unsigned long flags)
+{
+ local_irq_disable();
+ cpum_cf_setup_cpu((void *)flags);
+ local_irq_enable();
+ return 0;
+}
+
+static int cfset_online_cpu(unsigned int cpu);
+static int cpum_cf_online_cpu(unsigned int cpu)
+{
+ debug_sprintf_event(cf_dbg, 4, "%s cpu %d in_irq %ld\n", __func__,
+ cpu, in_interrupt());
+ cpum_cf_setup(cpu, PMC_INIT);
+ return cfset_online_cpu(cpu);
+}
+
+static int cfset_offline_cpu(unsigned int cpu);
+static int cpum_cf_offline_cpu(unsigned int cpu)
+{
+ debug_sprintf_event(cf_dbg, 4, "%s cpu %d\n", __func__, cpu);
+ cfset_offline_cpu(cpu);
+ return cpum_cf_setup(cpu, PMC_RELEASE);
+}
+
+/* Return true if store counter set multiple instruction is available */
+static inline int stccm_avail(void)
+{
+ return test_facility(142);
+}
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_cf_events *cpuhw;
+
+ if (!(alert & CPU_MF_INT_CF_MASK))
+ return;
+
+ inc_irq_stat(IRQEXT_CMC);
+ cpuhw = this_cpu_ptr(&cpu_cf_events);
+
+ /*
+ * Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case.
+ */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* counter authorization change alert */
+ if (alert & CPU_MF_INT_CF_CACA)
+ qctri(&cpuhw->info);
+
+ /* loss of counter data alert */
+ if (alert & CPU_MF_INT_CF_LCDA)
+ pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+
+ /* loss of MT counter data alert */
+ if (alert & CPU_MF_INT_CF_MTDA)
+ pr_warn("CPU[%i] MT counter data was lost\n",
+ smp_processor_id());
+}
+
static int cfset_init(void);
static int __init cpumf_pmu_init(void)
{
int rc;
- if (!kernel_cpumcf_avail())
+ if (!cpum_cf_avail())
return -ENODEV;
+ /*
+ * Clear bit 15 of cr0 to unauthorize problem-state to
+ * extract measurement counters
+ */
+ ctl_clear_bit(0, 48);
+
+ /* register handler for measurement-alert interruptions */
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (rc) {
+ pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc);
+ return rc;
+ }
+
/* Setup s390dbf facility */
cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
if (!cf_dbg) {
pr_err("Registration of s390dbf(cpum_cf) failed\n");
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto out1;
}
debug_register_view(cf_dbg, &debug_sprintf_view);
cpumf_pmu.attr_groups = cpumf_cf_event_group();
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
if (rc) {
- debug_unregister_view(cf_dbg, &debug_sprintf_view);
- debug_unregister(cf_dbg);
pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+ goto out2;
} else if (stccm_avail()) { /* Setup counter set device */
cfset_init();
}
+
+ rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
+ "perf/s390/cf:online",
+ cpum_cf_online_cpu, cpum_cf_offline_cpu);
+ return rc;
+
+out2:
+ debug_unregister_view(cf_dbg, &debug_sprintf_view);
+ debug_unregister(cf_dbg);
+out1:
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert);
return rc;
}
@@ -1005,7 +1262,6 @@ static int cfset_all_start(struct cfset_request *req)
return rc;
}
-
/* Return the maximum required space for all possible CPUs in case one
* CPU will be onlined during the START, READ, STOP cycles.
* To find out the size of the counter sets, any one CPU will do. They
@@ -1268,7 +1524,7 @@ static struct miscdevice cfset_dev = {
/* Hotplug add of a CPU. Scan through all active processes and add
* that CPU to the list of CPUs supplied with ioctl(..., START, ...).
*/
-int cfset_online_cpu(unsigned int cpu)
+static int cfset_online_cpu(unsigned int cpu)
{
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
@@ -1288,7 +1544,7 @@ int cfset_online_cpu(unsigned int cpu)
/* Hotplug remove of a CPU. Scan through all active processes and clear
* that CPU from the list of CPUs supplied with ioctl(..., START, ...).
*/
-int cfset_offline_cpu(unsigned int cpu)
+static int cfset_offline_cpu(unsigned int cpu)
{
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c
deleted file mode 100644
index 8ee48672233f..000000000000
--- a/arch/s390/kernel/perf_cpum_cf_common.c
+++ /dev/null
@@ -1,233 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CPU-Measurement Counter Facility Support - Common Layer
- *
- * Copyright IBM Corp. 2019
- * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
- */
-#define KMSG_COMPONENT "cpum_cf_common"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/kernel_stat.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <asm/ctl_reg.h>
-#include <asm/irq.h>
-#include <asm/cpu_mcf.h>
-
-/* Per-CPU event structure for the counter facility */
-DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = {
- .ctr_set = {
- [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0),
- },
- .alert = ATOMIC64_INIT(0),
- .state = 0,
- .dev_state = 0,
- .flags = 0,
- .used = 0,
- .usedss = 0,
- .sets = 0
-};
-/* Indicator whether the CPU-Measurement Counter Facility Support is ready */
-static bool cpum_cf_initalized;
-
-/* CPU-measurement alerts for the counter facility */
-static void cpumf_measurement_alert(struct ext_code ext_code,
- unsigned int alert, unsigned long unused)
-{
- struct cpu_cf_events *cpuhw;
-
- if (!(alert & CPU_MF_INT_CF_MASK))
- return;
-
- inc_irq_stat(IRQEXT_CMC);
- cpuhw = this_cpu_ptr(&cpu_cf_events);
-
- /* Measurement alerts are shared and might happen when the PMU
- * is not reserved. Ignore these alerts in this case. */
- if (!(cpuhw->flags & PMU_F_RESERVED))
- return;
-
- /* counter authorization change alert */
- if (alert & CPU_MF_INT_CF_CACA)
- qctri(&cpuhw->info);
-
- /* loss of counter data alert */
- if (alert & CPU_MF_INT_CF_LCDA)
- pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
-
- /* loss of MT counter data alert */
- if (alert & CPU_MF_INT_CF_MTDA)
- pr_warn("CPU[%i] MT counter data was lost\n",
- smp_processor_id());
-
- /* store alert for special handling by in-kernel users */
- atomic64_or(alert, &cpuhw->alert);
-}
-
-#define PMC_INIT 0
-#define PMC_RELEASE 1
-static void cpum_cf_setup_cpu(void *flags)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
-
- switch (*((int *) flags)) {
- case PMC_INIT:
- memset(&cpuhw->info, 0, sizeof(cpuhw->info));
- qctri(&cpuhw->info);
- cpuhw->flags |= PMU_F_RESERVED;
- break;
-
- case PMC_RELEASE:
- cpuhw->flags &= ~PMU_F_RESERVED;
- break;
- }
-
- /* Disable CPU counter sets */
- lcctl(0);
-}
-
-bool kernel_cpumcf_avail(void)
-{
- return cpum_cf_initalized;
-}
-EXPORT_SYMBOL(kernel_cpumcf_avail);
-
-/* Initialize the CPU-measurement counter facility */
-int __kernel_cpumcf_begin(void)
-{
- int flags = PMC_INIT;
-
- on_each_cpu(cpum_cf_setup_cpu, &flags, 1);
- irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
- return 0;
-}
-EXPORT_SYMBOL(__kernel_cpumcf_begin);
-
-/* Obtain the CPU-measurement alerts for the counter facility */
-unsigned long kernel_cpumcf_alert(int clear)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- unsigned long alert;
-
- alert = atomic64_read(&cpuhw->alert);
- if (clear)
- atomic64_set(&cpuhw->alert, 0);
-
- return alert;
-}
-EXPORT_SYMBOL(kernel_cpumcf_alert);
-
-/* Release the CPU-measurement counter facility */
-void __kernel_cpumcf_end(void)
-{
- int flags = PMC_RELEASE;
-
- on_each_cpu(cpum_cf_setup_cpu, &flags, 1);
- irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-}
-EXPORT_SYMBOL(__kernel_cpumcf_end);
-
-static int cpum_cf_setup(unsigned int cpu, int flags)
-{
- local_irq_disable();
- cpum_cf_setup_cpu(&flags);
- local_irq_enable();
- return 0;
-}
-
-static int cpum_cf_online_cpu(unsigned int cpu)
-{
- cpum_cf_setup(cpu, PMC_INIT);
- return cfset_online_cpu(cpu);
-}
-
-static int cpum_cf_offline_cpu(unsigned int cpu)
-{
- cfset_offline_cpu(cpu);
- return cpum_cf_setup(cpu, PMC_RELEASE);
-}
-
-/* Return the maximum possible counter set size (in number of 8 byte counters)
- * depending on type and model number.
- */
-size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
- struct cpumf_ctr_info *info)
-{
- size_t ctrset_size = 0;
-
- switch (ctrset) {
- case CPUMF_CTR_SET_BASIC:
- if (info->cfvn >= 1)
- ctrset_size = 6;
- break;
- case CPUMF_CTR_SET_USER:
- if (info->cfvn == 1)
- ctrset_size = 6;
- else if (info->cfvn >= 3)
- ctrset_size = 2;
- break;
- case CPUMF_CTR_SET_CRYPTO:
- if (info->csvn >= 1 && info->csvn <= 5)
- ctrset_size = 16;
- else if (info->csvn == 6 || info->csvn == 7)
- ctrset_size = 20;
- break;
- case CPUMF_CTR_SET_EXT:
- if (info->csvn == 1)
- ctrset_size = 32;
- else if (info->csvn == 2)
- ctrset_size = 48;
- else if (info->csvn >= 3 && info->csvn <= 5)
- ctrset_size = 128;
- else if (info->csvn == 6 || info->csvn == 7)
- ctrset_size = 160;
- break;
- case CPUMF_CTR_SET_MT_DIAG:
- if (info->csvn > 3)
- ctrset_size = 48;
- break;
- case CPUMF_CTR_SET_MAX:
- break;
- }
-
- return ctrset_size;
-}
-
-static int __init cpum_cf_init(void)
-{
- int rc;
-
- if (!cpum_cf_avail())
- return -ENODEV;
-
- /* clear bit 15 of cr0 to unauthorize problem-state to
- * extract measurement counters */
- ctl_clear_bit(0, 48);
-
- /* register handler for measurement-alert interruptions */
- rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
- cpumf_measurement_alert);
- if (rc) {
- pr_err("Registering for CPU-measurement alerts "
- "failed with rc=%i\n", rc);
- return rc;
- }
-
- rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
- "perf/s390/cf:online",
- cpum_cf_online_cpu, cpum_cf_offline_cpu);
- if (!rc)
- cpum_cf_initalized = true;
-
- return rc;
-}
-early_initcall(cpum_cf_init);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index ce886a03545a..79904a839fb9 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -22,6 +22,7 @@
#include <asm/irq.h>
#include <asm/debug.h>
#include <asm/timex.h>
+#include <asm-generic/io.h>
/* Minimum number of sample-data-block-tables:
* At least one table is required for the sampling buffer structure.
@@ -99,6 +100,57 @@ static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
/* Debug feature */
static debug_info_t *sfdbg;
+/* Sampling control helper functions */
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+ unsigned long freq)
+{
+ return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+ unsigned long rate)
+{
+ return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+/* Return TOD timestamp contained in an trailer entry */
+static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+ /* TOD in STCKE format */
+ if (te->header.t)
+ return *((unsigned long long *)&te->timestamp[1]);
+
+ /* TOD in STCK format */
+ return *((unsigned long long *)&te->timestamp[0]);
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v)
+{
+ void *ret;
+
+ ret = (void *)v;
+ ret += PAGE_SIZE;
+ ret -= sizeof(struct hws_trailer_entry);
+
+ return ret;
+}
+
+/*
+ * Return true if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt
+ */
+static inline int is_link_entry(unsigned long *s)
+{
+ return *s & 0x1UL ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+ return phys_to_virt(*s & ~0x1UL);
+}
+
/*
* sf_disable() - Switch off sampling facility
*/
@@ -150,7 +202,7 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
} else {
/* Process SDB pointer */
if (*curr) {
- free_page(*curr);
+ free_page((unsigned long)phys_to_virt(*curr));
curr++;
}
}
@@ -170,11 +222,11 @@ static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
sdb = get_zeroed_page(gfp_flags);
if (!sdb)
return -ENOMEM;
- te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+ te = trailer_entry_ptr(sdb);
te->header.a = 1;
/* Link SDB into the sample-data-block-table */
- *sdbt = sdb;
+ *sdbt = virt_to_phys((void *)sdb);
return 0;
}
@@ -233,7 +285,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
}
sfb->num_sdbt++;
/* Link current page to tail of chain */
- *tail = (unsigned long)(void *) new + 1;
+ *tail = virt_to_phys((void *)new) + 1;
tail_prev = tail;
tail = new;
}
@@ -263,7 +315,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
}
/* Link sampling buffer to its origin */
- *tail = (unsigned long) sfb->sdbt + 1;
+ *tail = virt_to_phys(sfb->sdbt) + 1;
sfb->tail = tail;
debug_sprintf_event(sfdbg, 4, "%s: new buffer"
@@ -301,7 +353,7 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
* realloc_sampling_buffer() invocation.
*/
sfb->tail = sfb->sdbt;
- *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
+ *sfb->tail = virt_to_phys((void *)sfb->sdbt) + 1;
/* Allocate requested number of sample-data-blocks */
rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
@@ -557,9 +609,6 @@ static void setup_pmc_cpu(void *flags)
if (err)
pr_err("Switching off the sampling facility failed "
"with rc %i\n", err);
- debug_sprintf_event(sfdbg, 5,
- "%s: initialized: cpuhw %p\n", __func__,
- cpusf);
break;
case PMC_RELEASE:
cpusf->flags &= ~PMU_F_RESERVED;
@@ -569,9 +618,6 @@ static void setup_pmc_cpu(void *flags)
"with rc %i\n", err);
} else
deallocate_buffers(cpusf);
- debug_sprintf_event(sfdbg, 5,
- "%s: released: cpuhw %p\n", __func__,
- cpusf);
break;
}
if (err)
@@ -672,7 +718,8 @@ static void cpumsf_output_event_pid(struct perf_event *event,
/* Protect callchain buffers, tasks */
rcu_read_lock();
- perf_prepare_sample(&header, data, event, regs);
+ perf_prepare_sample(data, event, regs);
+ perf_prepare_header(&header, data, event, regs);
if (perf_output_begin(&handle, data, event, header.size))
goto out;
@@ -1176,8 +1223,8 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
struct hws_trailer_entry *te;
struct hws_basic_entry *sample;
- te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
- sample = (struct hws_basic_entry *) *sdbt;
+ te = trailer_entry_ptr((unsigned long)sdbt);
+ sample = (struct hws_basic_entry *)sdbt;
while ((unsigned long *) sample < (unsigned long *) te) {
/* Check for an empty sample */
if (!sample->def || sample->LS)
@@ -1258,7 +1305,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
union hws_trailer_header old, prev, new;
struct hw_perf_event *hwc = &event->hw;
struct hws_trailer_entry *te;
- unsigned long *sdbt;
+ unsigned long *sdbt, sdb;
int done;
/*
@@ -1275,7 +1322,8 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
done = event_overflow = sampl_overflow = num_sdb = 0;
while (!done) {
/* Get the trailer entry of the sample-data-block */
- te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+ sdb = (unsigned long)phys_to_virt(*sdbt);
+ te = trailer_entry_ptr(sdb);
/* Leave loop if no more work to do (block full indicator) */
if (!te->header.f) {
@@ -1293,16 +1341,17 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
sampl_overflow += te->header.overflow;
/* Timestamps are valid for full sample-data-blocks only */
- debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
+ debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx/%#lx "
"overflow %llu timestamp %#llx\n",
- __func__, (unsigned long)sdbt, te->header.overflow,
+ __func__, sdb, (unsigned long)sdbt,
+ te->header.overflow,
(te->header.f) ? trailer_timestamp(te) : 0ULL);
/* Collect all samples from a single sample-data-block and
* flag if an (perf) event overflow happened. If so, the PMU
* is stopped and remaining samples will be discarded.
*/
- hw_collect_samples(event, sdbt, &event_overflow);
+ hw_collect_samples(event, (unsigned long *)sdb, &event_overflow);
num_sdb++;
/* Reset trailer (using compare-double-and-swap) */
@@ -1360,10 +1409,26 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
OVERFLOW_REG(hwc), num_sdb);
}
-#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
-#define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0)
-#define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark)
-#define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark)
+static inline unsigned long aux_sdb_index(struct aux_buffer *aux,
+ unsigned long i)
+{
+ return i % aux->sfb.num_sdb;
+}
+
+static inline unsigned long aux_sdb_num(unsigned long start, unsigned long end)
+{
+ return end >= start ? end - start + 1 : 0;
+}
+
+static inline unsigned long aux_sdb_num_alert(struct aux_buffer *aux)
+{
+ return aux_sdb_num(aux->head, aux->alert_mark);
+}
+
+static inline unsigned long aux_sdb_num_empty(struct aux_buffer *aux)
+{
+ return aux_sdb_num(aux->head, aux->empty_mark);
+}
/*
* Get trailer entry by index of SDB.
@@ -1373,9 +1438,9 @@ static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux,
{
unsigned long sdb;
- index = AUX_SDB_INDEX(aux, index);
+ index = aux_sdb_index(aux, index);
sdb = aux->sdb_index[index];
- return (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+ return trailer_entry_ptr(sdb);
}
/*
@@ -1397,7 +1462,7 @@ static void aux_output_end(struct perf_output_handle *handle)
if (!aux)
return;
- range_scan = AUX_SDB_NUM_ALERT(aux);
+ range_scan = aux_sdb_num_alert(aux);
for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
if (!te->header.f)
@@ -1427,9 +1492,7 @@ static int aux_output_begin(struct perf_output_handle *handle,
struct aux_buffer *aux,
struct cpu_hw_sf *cpuhw)
{
- unsigned long range;
- unsigned long i, range_scan, idx;
- unsigned long head, base, offset;
+ unsigned long range, i, range_scan, idx, head, base, offset;
struct hws_trailer_entry *te;
if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
@@ -1448,8 +1511,8 @@ static int aux_output_begin(struct perf_output_handle *handle,
"%s: range %ld head %ld alert %ld empty %ld\n",
__func__, range, aux->head, aux->alert_mark,
aux->empty_mark);
- if (range > AUX_SDB_NUM_EMPTY(aux)) {
- range_scan = range - AUX_SDB_NUM_EMPTY(aux);
+ if (range > aux_sdb_num_empty(aux)) {
+ range_scan = range - aux_sdb_num_empty(aux);
idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
@@ -1467,11 +1530,11 @@ static int aux_output_begin(struct perf_output_handle *handle,
te->header.a = 1;
/* Reset hardware buffer head */
- head = AUX_SDB_INDEX(aux, aux->head);
+ head = aux_sdb_index(aux, aux->head);
base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE];
offset = head % CPUM_SF_SDB_PER_TABLE;
- cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
- cpuhw->lsctl.dear = aux->sdb_index[head];
+ cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long);
+ cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]);
debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
"index %ld tear %#lx dear %#lx\n", __func__,
@@ -1549,7 +1612,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
"empty %ld\n", __func__, range, aux->head,
aux->alert_mark, aux->empty_mark);
- if (range <= AUX_SDB_NUM_EMPTY(aux))
+ if (range <= aux_sdb_num_empty(aux))
/*
* No need to scan. All SDBs in range are marked as empty.
* Just set alert indicator. Should check race with hardware
@@ -1570,7 +1633,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
* Start scanning from one SDB behind empty_mark. If the new alert
* indicator fall into this range, set it.
*/
- range_scan = range - AUX_SDB_NUM_EMPTY(aux);
+ range_scan = range - aux_sdb_num_empty(aux);
idx_old = idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
@@ -1617,7 +1680,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
return;
/* Inform user space new data arrived */
- size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+ size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
size >> PAGE_SHIFT);
perf_aux_output_end(handle, size);
@@ -1659,7 +1722,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
"overflow %lld\n", __func__,
aux->head, range, overflow);
} else {
- size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+ size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
"already full, try another\n",
@@ -1701,7 +1764,7 @@ static void aux_sdb_init(unsigned long sdb)
{
struct hws_trailer_entry *te;
- te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+ te = trailer_entry_ptr(sdb);
/* Save clock base */
te->clock_base = 1;
@@ -1781,18 +1844,18 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
goto no_sdbt;
aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new;
/* Link current page to tail of chain */
- *tail = (unsigned long)(void *) new + 1;
+ *tail = virt_to_phys(new) + 1;
tail = new;
}
/* Tail is the entry in a SDBT */
- *tail = (unsigned long)pages[i];
+ *tail = virt_to_phys(pages[i]);
aux->sdb_index[i] = (unsigned long)pages[i];
aux_sdb_init((unsigned long)pages[i]);
}
sfb->num_sdb = nr_pages;
/* Link the last entry in the SDBT to the first SDBT */
- *tail = (unsigned long) sfb->sdbt + 1;
+ *tail = virt_to_phys(sfb->sdbt) + 1;
sfb->tail = tail;
/*
@@ -1932,7 +1995,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
cpuhw->lsctl.h = 1;
cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
if (!SAMPL_DIAG_MODE(&event->hw)) {
- cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
+ cpuhw->lsctl.tear = virt_to_phys(cpuhw->sfb.sdbt);
cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt;
}
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 985e243a2ed8..a7b339c4fd7c 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -362,9 +362,7 @@ static int paicrypt_push_sample(void)
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
- raw.size = raw.frag.size;
- data.raw = &raw;
- data.sample_flags |= PERF_SAMPLE_RAW;
+ perf_sample_save_raw_data(&data, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index 1138f57baae3..fcea307d7529 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -16,8 +16,8 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/perf_event.h>
-#include <asm/cpu_mcf.h>
#include <asm/ctl_reg.h>
#include <asm/pai.h>
#include <asm/debug.h>
@@ -451,9 +451,7 @@ static int paiext_push_sample(void)
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
- raw.size = raw.frag.size;
- data.raw = &raw;
- data.sample_flags |= PERF_SAMPLE_RAW;
+ perf_sample_save_raw_data(&data, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 3f5d2db0b854..67df64ef4839 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -147,8 +147,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (unlikely(args->fn)) {
/* kernel thread */
memset(&frame->childregs, 0, sizeof(struct pt_regs));
- frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO |
+ PSW_MASK_EXT | PSW_MASK_MCHECK;
frame->childregs.psw.addr =
(unsigned long)__ret_from_fork;
frame->childregs.gprs[9] = (unsigned long)args->fn;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 53e0209229f8..cf9659e13f03 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -990,7 +990,7 @@ static int s390_vxrs_low_get(struct task_struct *target,
if (target == current)
save_fpu_regs();
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
+ vxrs[i] = target->thread.fpu.vxrs[i].low;
return membuf_write(&to, vxrs, sizeof(vxrs));
}
@@ -1008,12 +1008,12 @@ static int s390_vxrs_low_set(struct task_struct *target,
save_fpu_regs();
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
+ vxrs[i] = target->thread.fpu.vxrs[i].low;
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
if (rc == 0)
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
+ target->thread.fpu.vxrs[i].low = vxrs[i];
return rc;
}
diff --git a/arch/s390/kernel/rethook.c b/arch/s390/kernel/rethook.c
new file mode 100644
index 000000000000..af10e6bdd34e
--- /dev/null
+++ b/arch/s390/kernel/rethook.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/rethook.h>
+#include <linux/kprobes.h>
+#include "rethook.h"
+
+void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount)
+{
+ rh->ret_addr = regs->gprs[14];
+ rh->frame = regs->gprs[15];
+
+ /* Replace the return addr with trampoline addr */
+ regs->gprs[14] = (unsigned long)&arch_rethook_trampoline;
+}
+NOKPROBE_SYMBOL(arch_rethook_prepare);
+
+void arch_rethook_fixup_return(struct pt_regs *regs,
+ unsigned long correct_ret_addr)
+{
+ /* Replace fake return address with real one. */
+ regs->gprs[14] = correct_ret_addr;
+}
+NOKPROBE_SYMBOL(arch_rethook_fixup_return);
+
+/*
+ * Called from arch_rethook_trampoline
+ */
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs)
+{
+ return rethook_trampoline_handler(regs, regs->gprs[15]);
+}
+NOKPROBE_SYMBOL(arch_rethook_trampoline_callback);
+
+/* assembler function that handles the rethook must not be probed itself */
+NOKPROBE_SYMBOL(arch_rethook_trampoline);
diff --git a/arch/s390/kernel/rethook.h b/arch/s390/kernel/rethook.h
new file mode 100644
index 000000000000..32f069eed3f3
--- /dev/null
+++ b/arch/s390/kernel/rethook.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __S390_RETHOOK_H
+#define __S390_RETHOOK_H
+
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs);
+
+#endif
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 696c9e007a36..8ec5cdf9dadc 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -149,6 +149,9 @@ int __bootdata(noexec_disabled);
unsigned long __bootdata(ident_map_size);
struct mem_detect_info __bootdata(mem_detect);
struct initrd_data __bootdata(initrd_data);
+unsigned long __bootdata(pgalloc_pos);
+unsigned long __bootdata(pgalloc_end);
+unsigned long __bootdata(pgalloc_low);
unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata(__amode31_base);
@@ -411,15 +414,10 @@ void __init arch_call_rest_init(void)
call_on_stack_noreturn(rest_init, stack);
}
-static void __init setup_lowcore_dat_off(void)
+static void __init setup_lowcore(void)
{
- unsigned long int_psw_mask = PSW_KERNEL_BITS;
- struct lowcore *abs_lc, *lc;
+ struct lowcore *lc, *abs_lc;
unsigned long mcck_stack;
- unsigned long flags;
-
- if (IS_ENABLED(CONFIG_KASAN))
- int_psw_mask |= PSW_MASK_DAT;
/*
* Setup lowcore for boot cpu
@@ -430,17 +428,17 @@ static void __init setup_lowcore_dat_off(void)
panic("%s: Failed to allocate %zu bytes align=%zx\n",
__func__, sizeof(*lc), sizeof(*lc));
- lc->restart_psw.mask = PSW_KERNEL_BITS;
- lc->restart_psw.addr = (unsigned long) restart_int_handler;
- lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
+ lc->restart_psw.addr = __pa(restart_int_handler);
+ lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->external_new_psw.addr = (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->svc_new_psw.addr = (unsigned long) system_call;
- lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
- lc->mcck_new_psw.mask = int_psw_mask;
+ lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
- lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->io_new_psw.addr = (unsigned long) io_int_handler;
lc->clock_comparator = clock_comparator_max;
lc->nodat_stack = ((unsigned long) &init_thread_union)
@@ -477,15 +475,7 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_fn = (unsigned long) do_restart;
lc->restart_data = 0;
lc->restart_source = -1U;
-
- abs_lc = get_abs_lowcore(&flags);
- abs_lc->restart_stack = lc->restart_stack;
- abs_lc->restart_fn = lc->restart_fn;
- abs_lc->restart_data = lc->restart_data;
- abs_lc->restart_source = lc->restart_source;
- abs_lc->restart_psw = lc->restart_psw;
- abs_lc->mcesad = lc->mcesad;
- put_abs_lowcore(abs_lc, flags);
+ __ctl_store(lc->cregs_save_area, 0, 15);
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
if (!mcck_stack)
@@ -499,34 +489,25 @@ static void __init setup_lowcore_dat_off(void)
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
lc->preempt_count = PREEMPT_DISABLED;
+ lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->user_asce = S390_lowcore.user_asce;
+
+ abs_lc = get_abs_lowcore();
+ abs_lc->restart_stack = lc->restart_stack;
+ abs_lc->restart_fn = lc->restart_fn;
+ abs_lc->restart_data = lc->restart_data;
+ abs_lc->restart_source = lc->restart_source;
+ abs_lc->restart_psw = lc->restart_psw;
+ abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+ memcpy(abs_lc->cregs_save_area, lc->cregs_save_area, sizeof(abs_lc->cregs_save_area));
+ abs_lc->program_new_psw = lc->program_new_psw;
+ abs_lc->mcesad = lc->mcesad;
+ put_abs_lowcore(abs_lc);
set_prefix(__pa(lc));
lowcore_ptr[0] = lc;
-}
-
-static void __init setup_lowcore_dat_on(void)
-{
- struct lowcore *abs_lc;
- unsigned long flags;
- int i;
-
- __ctl_clear_bit(0, 28);
- S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.mcck_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
- __ctl_set_bit(0, 28);
- __ctl_store(S390_lowcore.cregs_save_area, 0, 15);
- if (abs_lowcore_map(0, lowcore_ptr[0], true))
+ if (abs_lowcore_map(0, lowcore_ptr[0], false))
panic("Couldn't setup absolute lowcore");
- abs_lowcore_mapped = true;
- abs_lc = get_abs_lowcore(&flags);
- abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
- abs_lc->program_new_psw = S390_lowcore.program_new_psw;
- for (i = 0; i < 16; i++)
- abs_lc->cregs_save_area[i] = S390_lowcore.cregs_save_area[i];
- put_abs_lowcore(abs_lc, flags);
}
static struct resource code_resource = {
@@ -619,7 +600,6 @@ static void __init setup_resources(void)
static void __init setup_memory_end(void)
{
- memblock_remove(ident_map_size, PHYS_ADDR_MAX - ident_map_size);
max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
}
@@ -651,6 +631,14 @@ static struct notifier_block kdump_mem_nb = {
#endif
/*
+ * Reserve page tables created by decompressor
+ */
+static void __init reserve_pgtables(void)
+{
+ memblock_reserve(pgalloc_pos, pgalloc_end - pgalloc_pos);
+}
+
+/*
* Reserve memory for kdump kernel to be loaded with kexec
*/
static void __init reserve_crashkernel(void)
@@ -784,10 +772,10 @@ static void __init memblock_add_mem_detect_info(void)
get_mem_info_source(), mem_detect.info_source);
/* keep memblock lists close to the kernel */
memblock_set_bottom_up(true);
- for_each_mem_detect_block(i, &start, &end) {
+ for_each_mem_detect_usable_block(i, &start, &end)
memblock_add(start, end - start);
+ for_each_mem_detect_block(i, &start, &end)
memblock_physmem_add(start, end - start);
- }
memblock_set_bottom_up(false);
memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
}
@@ -1005,6 +993,7 @@ void __init setup_arch(char **cmdline_p)
setup_control_program_code();
/* Do some memory reservations *before* memory is added to memblock */
+ reserve_pgtables();
reserve_kernel();
reserve_initrd();
reserve_certificate_list();
@@ -1039,7 +1028,7 @@ void __init setup_arch(char **cmdline_p)
#endif
setup_resources();
- setup_lowcore_dat_off();
+ setup_lowcore();
smp_fill_possible_mask();
cpu_detect_mhz_feature();
cpu_init();
@@ -1051,15 +1040,14 @@ void __init setup_arch(char **cmdline_p)
static_branch_enable(&cpu_has_bear);
/*
- * Create kernel page tables and switch to virtual addressing.
+ * Create kernel page tables.
*/
paging_init();
- memcpy_real_init();
+
/*
* After paging_init created the kernel page table, the new PSWs
* in lowcore can now run with DAT enabled.
*/
- setup_lowcore_dat_on();
#ifdef CONFIG_CRASH_DUMP
smp_save_dump_ipl_cpu();
#endif
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 38258f817048..d63557d3868c 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -184,7 +184,7 @@ static int save_sigregs_ext(struct pt_regs *regs,
/* Save vector registers to signal stack */
if (MACHINE_HAS_VX) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
+ vxrs[i] = current->thread.fpu.vxrs[i].low;
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
sizeof(sregs_ext->vxrs_low)) ||
__copy_to_user(&sregs_ext->vxrs_high,
@@ -210,7 +210,7 @@ static int restore_sigregs_ext(struct pt_regs *regs,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
+ current->thread.fpu.vxrs[i].low = vxrs[i];
}
return 0;
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 0031325ce4bc..23c427284773 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -323,11 +323,10 @@ static void pcpu_delegate(struct pcpu *pcpu,
{
struct lowcore *lc, *abs_lc;
unsigned int source_cpu;
- unsigned long flags;
lc = lowcore_ptr[pcpu - pcpu_devices];
source_cpu = stap();
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+
if (pcpu->address == source_cpu) {
call_on_stack(2, stack, void, __pcpu_delegate,
pcpu_delegate_fn *, func, void *, data);
@@ -341,12 +340,12 @@ static void pcpu_delegate(struct pcpu *pcpu,
lc->restart_data = (unsigned long)data;
lc->restart_source = source_cpu;
} else {
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
abs_lc->restart_stack = stack;
abs_lc->restart_fn = (unsigned long)func;
abs_lc->restart_data = (unsigned long)data;
abs_lc->restart_source = source_cpu;
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
}
__bpon();
asm volatile(
@@ -488,7 +487,7 @@ void smp_send_stop(void)
int cpu;
/* Disable all interrupts/machine checks */
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+ __load_psw_mask(PSW_KERNEL_BITS);
trace_hardirqs_off();
debug_set_critical();
@@ -593,7 +592,6 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set)
{
struct ec_creg_mask_parms parms = { .cr = cr, };
struct lowcore *abs_lc;
- unsigned long flags;
u64 ctlreg;
if (set) {
@@ -604,11 +602,11 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set)
parms.andval = ~(1UL << bit);
}
spin_lock(&ctl_lock);
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
ctlreg = abs_lc->cregs_save_area[cr];
ctlreg = (ctlreg & parms.andval) | parms.orval;
abs_lc->cregs_save_area[cr] = ctlreg;
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
spin_unlock(&ctl_lock);
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 7ee455e8e3d5..0787010139f7 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -40,12 +40,12 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
if (!addr)
return -EINVAL;
-#ifdef CONFIG_KPROBES
+#ifdef CONFIG_RETHOOK
/*
- * Mark stacktraces with kretprobed functions on them
+ * Mark stacktraces with krethook functions on them
* as unreliable.
*/
- if (state.ip == (unsigned long)__kretprobe_trampoline)
+ if (state.ip == (unsigned long)arch_rethook_trampoline)
return -EINVAL;
#endif
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
index 2c8b14cc5556..e0f01ce251f5 100644
--- a/arch/s390/kernel/text_amode31.S
+++ b/arch/s390/kernel/text_amode31.S
@@ -63,6 +63,19 @@ ENTRY(_diag210_amode31)
ENDPROC(_diag210_amode31)
/*
+ * int diag8c(struct diag8c *addr, struct ccw_dev_id *devno, size_t len)
+*/
+ENTRY(_diag8c_amode31)
+ llgf %r3,0(%r3)
+ sam31
+ diag %r2,%r4,0x8c
+.Ldiag8c_ex:
+ sam64
+ lgfr %r2,%r3
+ BR_EX_AMODE31_r14
+ EX_TABLE_AMODE31(.Ldiag8c_ex, .Ldiag8c_ex)
+ENDPROC(_diag8c_amode31)
+/*
* int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode)
*/
ENTRY(_diag26c_amode31)
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index ff7bf4432229..bbaefd84f15e 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -59,11 +59,9 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
mmap_read_lock(mm);
for_each_vma(vmi, vma) {
- unsigned long size = vma->vm_end - vma->vm_start;
-
if (!vma_is_special_mapping(vma, &vvar_mapping))
continue;
- zap_page_range(vma, vma->vm_start, size);
+ zap_vma_pages(vma);
break;
}
mmap_read_unlock(mm);
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index cbf9c1b0beda..b653ba8d51e6 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -44,7 +44,6 @@ SECTIONS
HEAD_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
@@ -217,6 +216,9 @@ SECTIONS
QUAD(__rela_dyn_start) /* rela_dyn_start */
QUAD(__rela_dyn_end) /* rela_dyn_end */
QUAD(_eamode31 - _samode31) /* amode31_size */
+ QUAD(init_mm)
+ QUAD(swapper_pg_dir)
+ QUAD(invalid_pg_dir)
} :NONE
/* Debugging sections. */
@@ -228,5 +230,6 @@ SECTIONS
DISCARDS
/DISCARD/ : {
*(.eh_frame)
+ *(.interp)
}
}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 9436f3053b88..e0a88dcaf5cb 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -7,13 +7,13 @@
*/
#include <linux/kernel_stat.h>
-#include <linux/sched/cputime.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/timex.h>
#include <linux/types.h>
#include <linux/time.h>
#include <asm/alternative.h>
+#include <asm/cputime.h>
#include <asm/vtimer.h>
#include <asm/vtime.h>
#include <asm/cpu_mf.h>
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 5a053b393d5c..7231bf97b93a 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -47,7 +47,7 @@ static void print_backtrace(char *bt)
static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
unsigned long sp)
{
- int frame_count, prev_is_func2, seen_func2_func1, seen_kretprobe_trampoline;
+ int frame_count, prev_is_func2, seen_func2_func1, seen_arch_rethook_trampoline;
const int max_frames = 128;
struct unwind_state state;
size_t bt_pos = 0;
@@ -63,7 +63,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
frame_count = 0;
prev_is_func2 = 0;
seen_func2_func1 = 0;
- seen_kretprobe_trampoline = 0;
+ seen_arch_rethook_trampoline = 0;
unwind_for_each_frame(&state, task, regs, sp) {
unsigned long addr = unwind_get_return_address(&state);
char sym[KSYM_SYMBOL_LEN];
@@ -89,8 +89,8 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1"))
seen_func2_func1 = 1;
prev_is_func2 = str_has_prefix(sym, "unwindme_func2");
- if (str_has_prefix(sym, "__kretprobe_trampoline+0x0/"))
- seen_kretprobe_trampoline = 1;
+ if (str_has_prefix(sym, "arch_rethook_trampoline+0x0/"))
+ seen_arch_rethook_trampoline = 1;
}
/* Check the results. */
@@ -106,8 +106,8 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
kunit_err(current_test, "Maximum number of frames exceeded\n");
ret = -EINVAL;
}
- if (seen_kretprobe_trampoline) {
- kunit_err(current_test, "__kretprobe_trampoline+0x0 in unwinding results\n");
+ if (seen_arch_rethook_trampoline) {
+ kunit_err(current_test, "arch_rethook_trampoline+0x0 in unwinding results\n");
ret = -EINVAL;
}
if (ret || force_bt)
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 9953819d7959..ba5f80268878 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -33,10 +33,6 @@ enum address_markers_idx {
#endif
IDENTITY_AFTER_NR,
IDENTITY_AFTER_END_NR,
-#ifdef CONFIG_KASAN
- KASAN_SHADOW_START_NR,
- KASAN_SHADOW_END_NR,
-#endif
VMEMMAP_NR,
VMEMMAP_END_NR,
VMALLOC_NR,
@@ -47,6 +43,10 @@ enum address_markers_idx {
ABS_LOWCORE_END_NR,
MEMCPY_REAL_NR,
MEMCPY_REAL_END_NR,
+#ifdef CONFIG_KASAN
+ KASAN_SHADOW_START_NR,
+ KASAN_SHADOW_END_NR,
+#endif
};
static struct addr_marker address_markers[] = {
@@ -62,10 +62,6 @@ static struct addr_marker address_markers[] = {
#endif
[IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"},
[IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"},
-#ifdef CONFIG_KASAN
- [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
- [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
-#endif
[VMEMMAP_NR] = {0, "vmemmap Area Start"},
[VMEMMAP_END_NR] = {0, "vmemmap Area End"},
[VMALLOC_NR] = {0, "vmalloc Area Start"},
@@ -76,6 +72,10 @@ static struct addr_marker address_markers[] = {
[ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"},
[MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"},
[MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"},
+#ifdef CONFIG_KASAN
+ [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
+ [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
+#endif
{ -1, NULL }
};
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
index 1e4d2187541a..fe87291df95d 100644
--- a/arch/s390/mm/extable.c
+++ b/arch/s390/mm/extable.c
@@ -47,13 +47,16 @@ static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struc
return true;
}
-static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, struct pt_regs *regs)
+static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex,
+ bool pair, struct pt_regs *regs)
{
unsigned int reg_zero = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
regs->gprs[reg_err] = -EFAULT;
regs->gprs[reg_zero] = 0;
+ if (pair)
+ regs->gprs[reg_zero + 1] = 0;
regs->psw.addr = extable_fixup(ex);
return true;
}
@@ -75,7 +78,9 @@ bool fixup_exception(struct pt_regs *regs)
case EX_TYPE_UA_LOAD_MEM:
return ex_handler_ua_load_mem(ex, regs);
case EX_TYPE_UA_LOAD_REG:
- return ex_handler_ua_load_reg(ex, regs);
+ return ex_handler_ua_load_reg(ex, false, regs);
+ case EX_TYPE_UA_LOAD_REGPAIR:
+ return ex_handler_ua_load_reg(ex, true, regs);
}
panic("invalid exception table entry");
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 9649d9382e0a..a2632fd97d00 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -46,11 +46,15 @@
#define __SUBCODE_MASK 0x0600
#define __PF_RES_FIELD 0x8000000000000000ULL
-#define VM_FAULT_BADCONTEXT ((__force vm_fault_t) 0x010000)
-#define VM_FAULT_BADMAP ((__force vm_fault_t) 0x020000)
-#define VM_FAULT_BADACCESS ((__force vm_fault_t) 0x040000)
-#define VM_FAULT_SIGNAL ((__force vm_fault_t) 0x080000)
-#define VM_FAULT_PFAULT ((__force vm_fault_t) 0x100000)
+/*
+ * Allocate private vm_fault_reason from top. Please make sure it won't
+ * collide with vm_fault_reason.
+ */
+#define VM_FAULT_BADCONTEXT ((__force vm_fault_t)0x80000000)
+#define VM_FAULT_BADMAP ((__force vm_fault_t)0x40000000)
+#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x20000000)
+#define VM_FAULT_SIGNAL ((__force vm_fault_t)0x10000000)
+#define VM_FAULT_PFAULT ((__force vm_fault_t)0x8000000)
enum fault_type {
KERNEL_FAULT,
@@ -96,6 +100,20 @@ static enum fault_type get_fault_type(struct pt_regs *regs)
return KERNEL_FAULT;
}
+static unsigned long get_fault_address(struct pt_regs *regs)
+{
+ unsigned long trans_exc_code = regs->int_parm_long;
+
+ return trans_exc_code & __FAIL_ADDR_MASK;
+}
+
+static bool fault_is_write(struct pt_regs *regs)
+{
+ unsigned long trans_exc_code = regs->int_parm_long;
+
+ return (trans_exc_code & store_indication) == 0x400;
+}
+
static int bad_address(void *p)
{
unsigned long dummy;
@@ -228,15 +246,26 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
}
-static noinline void do_no_context(struct pt_regs *regs)
+static noinline void do_no_context(struct pt_regs *regs, vm_fault_t fault)
{
+ enum fault_type fault_type;
+ unsigned long address;
+ bool is_write;
+
if (fixup_exception(regs))
return;
+ fault_type = get_fault_type(regs);
+ if ((fault_type == KERNEL_FAULT) && (fault == VM_FAULT_BADCONTEXT)) {
+ address = get_fault_address(regs);
+ is_write = fault_is_write(regs);
+ if (kfence_handle_page_fault(address, is_write, regs))
+ return;
+ }
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
- if (get_fault_type(regs) == KERNEL_FAULT)
+ if (fault_type == KERNEL_FAULT)
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
" in virtual kernel address space\n");
else
@@ -255,7 +284,7 @@ static noinline void do_low_address(struct pt_regs *regs)
die (regs, "Low-address protection");
}
- do_no_context(regs);
+ do_no_context(regs, VM_FAULT_BADACCESS);
}
static noinline void do_sigbus(struct pt_regs *regs)
@@ -286,28 +315,28 @@ static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
fallthrough;
case VM_FAULT_BADCONTEXT:
case VM_FAULT_PFAULT:
- do_no_context(regs);
+ do_no_context(regs, fault);
break;
case VM_FAULT_SIGNAL:
if (!user_mode(regs))
- do_no_context(regs);
+ do_no_context(regs, fault);
break;
default: /* fault & VM_FAULT_ERROR */
if (fault & VM_FAULT_OOM) {
if (!user_mode(regs))
- do_no_context(regs);
+ do_no_context(regs, fault);
else
pagefault_out_of_memory();
} else if (fault & VM_FAULT_SIGSEGV) {
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
- do_no_context(regs);
+ do_no_context(regs, fault);
else
do_sigsegv(regs, SEGV_MAPERR);
} else if (fault & VM_FAULT_SIGBUS) {
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
- do_no_context(regs);
+ do_no_context(regs, fault);
else
do_sigbus(regs);
} else
@@ -334,7 +363,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
struct mm_struct *mm;
struct vm_area_struct *vma;
enum fault_type type;
- unsigned long trans_exc_code;
unsigned long address;
unsigned int flags;
vm_fault_t fault;
@@ -351,9 +379,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
return 0;
mm = tsk->mm;
- trans_exc_code = regs->int_parm_long;
- address = trans_exc_code & __FAIL_ADDR_MASK;
- is_write = (trans_exc_code & store_indication) == 0x400;
+ address = get_fault_address(regs);
+ is_write = fault_is_write(regs);
/*
* Verify that the fault happened in user space, that
@@ -364,8 +391,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
type = get_fault_type(regs);
switch (type) {
case KERNEL_FAULT:
- if (kfence_handle_page_fault(address, is_write, regs))
- return 0;
goto out;
case USER_FAULT:
case GMAP_FAULT:
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 74e1d873dce0..5a716bdcba05 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -722,7 +722,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
if (is_vm_hugetlb_page(vma))
continue;
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
- zap_page_range(vma, vmaddr, size);
+ zap_page_range_single(vma, vmaddr, size, NULL);
}
mmap_read_unlock(gmap->mm);
}
@@ -2522,8 +2522,7 @@ static inline void thp_split_mm(struct mm_struct *mm)
VMA_ITERATOR(vmi, mm, 0);
for_each_vma(vmi, vma) {
- vma->vm_flags &= ~VM_HUGEPAGE;
- vma->vm_flags |= VM_NOHUGEPAGE;
+ vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE);
walk_page_vma(vma, &thp_split_walk_ops, NULL);
}
mm->def_flags |= VM_NOHUGEPAGE;
@@ -2588,14 +2587,18 @@ int gmap_mark_unmergeable(void)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
+ unsigned long vm_flags;
int ret;
VMA_ITERATOR(vmi, mm, 0);
for_each_vma(vmi, vma) {
+ /* Copy vm_flags to avoid partial modifications in ksm_madvise */
+ vm_flags = vma->vm_flags;
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
- MADV_UNMERGEABLE, &vma->vm_flags);
+ MADV_UNMERGEABLE, &vm_flags);
if (ret)
return ret;
+ vm_flags_reset(vma, vm_flags);
}
mm->def_flags &= ~VM_MERGEABLE;
return 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 30ab55f868f6..144447d5cb4c 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -52,9 +52,9 @@
#include <linux/virtio_config.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
-static pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
+pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
-unsigned long s390_invalid_asce;
+unsigned long __bootdata_preserved(s390_invalid_asce);
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
@@ -93,37 +93,8 @@ static void __init setup_zero_pages(void)
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
- unsigned long pgd_type, asce_bits;
- psw_t psw;
-
- s390_invalid_asce = (unsigned long)invalid_pg_dir;
- s390_invalid_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
- crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
- init_mm.pgd = swapper_pg_dir;
- if (VMALLOC_END > _REGION2_SIZE) {
- asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
- pgd_type = _REGION2_ENTRY_EMPTY;
- } else {
- asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
- pgd_type = _REGION3_ENTRY_EMPTY;
- }
- init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
- S390_lowcore.kernel_asce = init_mm.context.asce;
- S390_lowcore.user_asce = s390_invalid_asce;
- crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
- vmem_map_init();
- kasan_copy_shadow_mapping();
-
- /* enable virtual mapping in kernel mode */
- __ctl_load(S390_lowcore.kernel_asce, 1, 1);
- __ctl_load(S390_lowcore.user_asce, 7, 7);
- __ctl_load(S390_lowcore.kernel_asce, 13, 13);
- psw.mask = __extract_psw();
- psw_bits(psw).dat = 1;
- psw_bits(psw).as = PSW_BITS_AS_HOME;
- __load_psw_mask(psw.mask);
- kasan_free_early_identity();
+ vmem_map_init();
sparse_init();
zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 9f988d4582ed..ef89a5f26853 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kasan.h>
#include <linux/sched/task.h>
-#include <linux/memblock.h>
#include <linux/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/kasan.h>
@@ -15,16 +14,11 @@
static unsigned long segment_pos __initdata;
static unsigned long segment_low __initdata;
-static unsigned long pgalloc_pos __initdata;
-static unsigned long pgalloc_low __initdata;
-static unsigned long pgalloc_freeable __initdata;
static bool has_edat __initdata;
static bool has_nx __initdata;
#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
-static pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
-
static void __init kasan_early_panic(const char *reason)
{
sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n");
@@ -39,7 +33,7 @@ static void * __init kasan_early_alloc_segment(void)
if (segment_pos < segment_low)
kasan_early_panic("out of memory during initialisation\n");
- return (void *)segment_pos;
+ return __va(segment_pos);
}
static void * __init kasan_early_alloc_pages(unsigned int order)
@@ -49,7 +43,7 @@ static void * __init kasan_early_alloc_pages(unsigned int order)
if (pgalloc_pos < pgalloc_low)
kasan_early_panic("out of memory during initialisation\n");
- return (void *)pgalloc_pos;
+ return __va(pgalloc_pos);
}
static void * __init kasan_early_crst_alloc(unsigned long val)
@@ -81,35 +75,37 @@ static pte_t * __init kasan_early_pte_alloc(void)
}
enum populate_mode {
- POPULATE_ONE2ONE,
POPULATE_MAP,
POPULATE_ZERO_SHADOW,
POPULATE_SHALLOW
};
+
+static inline pgprot_t pgprot_clear_bit(pgprot_t pgprot, unsigned long bit)
+{
+ return __pgprot(pgprot_val(pgprot) & ~bit);
+}
+
static void __init kasan_early_pgtable_populate(unsigned long address,
unsigned long end,
enum populate_mode mode)
{
- unsigned long pgt_prot_zero, pgt_prot, sgt_prot;
+ pgprot_t pgt_prot_zero = PAGE_KERNEL_RO;
+ pgprot_t pgt_prot = PAGE_KERNEL;
+ pgprot_t sgt_prot = SEGMENT_KERNEL;
pgd_t *pg_dir;
p4d_t *p4_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
pte_t *pt_dir;
+ pmd_t pmd;
+ pte_t pte;
- pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO);
- if (!has_nx)
- pgt_prot_zero &= ~_PAGE_NOEXEC;
- pgt_prot = pgprot_val(PAGE_KERNEL);
- sgt_prot = pgprot_val(SEGMENT_KERNEL);
- if (!has_nx || mode == POPULATE_ONE2ONE) {
- pgt_prot &= ~_PAGE_NOEXEC;
- sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
+ if (!has_nx) {
+ pgt_prot_zero = pgprot_clear_bit(pgt_prot_zero, _PAGE_NOEXEC);
+ pgt_prot = pgprot_clear_bit(pgt_prot, _PAGE_NOEXEC);
+ sgt_prot = pgprot_clear_bit(sgt_prot, _SEGMENT_ENTRY_NOEXEC);
}
- /*
- * The first 1MB of 1:1 mapping is mapped with 4KB pages
- */
while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
@@ -166,16 +162,13 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte);
address = (address + PMD_SIZE) & PMD_MASK;
continue;
- } else if (has_edat && address) {
- void *page;
-
- if (mode == POPULATE_ONE2ONE) {
- page = (void *)address;
- } else {
- page = kasan_early_alloc_segment();
- memset(page, 0, _SEGMENT_SIZE);
- }
- set_pmd(pm_dir, __pmd(__pa(page) | sgt_prot));
+ } else if (has_edat) {
+ void *page = kasan_early_alloc_segment();
+
+ memset(page, 0, _SEGMENT_SIZE);
+ pmd = __pmd(__pa(page));
+ pmd = set_pmd_bit(pmd, sgt_prot);
+ set_pmd(pm_dir, pmd);
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
@@ -192,18 +185,18 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
void *page;
switch (mode) {
- case POPULATE_ONE2ONE:
- page = (void *)address;
- set_pte(pt_dir, __pte(__pa(page) | pgt_prot));
- break;
case POPULATE_MAP:
page = kasan_early_alloc_pages(0);
memset(page, 0, PAGE_SIZE);
- set_pte(pt_dir, __pte(__pa(page) | pgt_prot));
+ pte = __pte(__pa(page));
+ pte = set_pte_bit(pte, pgt_prot);
+ set_pte(pt_dir, pte);
break;
case POPULATE_ZERO_SHADOW:
page = kasan_early_shadow_page;
- set_pte(pt_dir, __pte(__pa(page) | pgt_prot_zero));
+ pte = __pte(__pa(page));
+ pte = set_pte_bit(pte, pgt_prot_zero);
+ set_pte(pt_dir, pte);
break;
case POPULATE_SHALLOW:
/* should never happen */
@@ -214,29 +207,6 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
}
}
-static void __init kasan_set_pgd(pgd_t *pgd, unsigned long asce_type)
-{
- unsigned long asce_bits;
-
- asce_bits = asce_type | _ASCE_TABLE_LENGTH;
- S390_lowcore.kernel_asce = (__pa(pgd) & PAGE_MASK) | asce_bits;
- S390_lowcore.user_asce = S390_lowcore.kernel_asce;
-
- __ctl_load(S390_lowcore.kernel_asce, 1, 1);
- __ctl_load(S390_lowcore.kernel_asce, 7, 7);
- __ctl_load(S390_lowcore.kernel_asce, 13, 13);
-}
-
-static void __init kasan_enable_dat(void)
-{
- psw_t psw;
-
- psw.mask = __extract_psw();
- psw_bits(psw).dat = 1;
- psw_bits(psw).as = PSW_BITS_AS_HOME;
- __load_psw_mask(psw.mask);
-}
-
static void __init kasan_early_detect_facilities(void)
{
if (test_facility(8)) {
@@ -251,153 +221,81 @@ static void __init kasan_early_detect_facilities(void)
void __init kasan_early_init(void)
{
- unsigned long shadow_alloc_size;
- unsigned long initrd_end;
- unsigned long memsize;
- unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO);
- pte_t pte_z;
+ pte_t pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
+ unsigned long untracked_end = MODULES_VADDR;
+ unsigned long shadow_alloc_size;
+ unsigned long start, end;
+ int i;
kasan_early_detect_facilities();
if (!has_nx)
- pgt_prot &= ~_PAGE_NOEXEC;
- pte_z = __pte(__pa(kasan_early_shadow_page) | pgt_prot);
-
- memsize = get_mem_detect_end();
- if (!memsize)
- kasan_early_panic("cannot detect physical memory size\n");
- /*
- * Kasan currently supports standby memory but only if it follows
- * online memory (default allocation), i.e. no memory holes.
- * - memsize represents end of online memory
- * - ident_map_size represents online + standby and memory limits
- * accounted.
- * Kasan maps "memsize" right away.
- * [0, memsize] - as identity mapping
- * [__sha(0), __sha(memsize)] - shadow memory for identity mapping
- * The rest [memsize, ident_map_size] if memsize < ident_map_size
- * could be mapped/unmapped dynamically later during memory hotplug.
- */
- memsize = min(memsize, ident_map_size);
+ pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE));
- crst_table_init((unsigned long *)early_pg_dir, _REGION2_ENTRY_EMPTY);
/* init kasan zero shadow */
- crst_table_init((unsigned long *)kasan_early_shadow_p4d,
- p4d_val(p4d_z));
- crst_table_init((unsigned long *)kasan_early_shadow_pud,
- pud_val(pud_z));
- crst_table_init((unsigned long *)kasan_early_shadow_pmd,
- pmd_val(pmd_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
- shadow_alloc_size = memsize >> KASAN_SHADOW_SCALE_SHIFT;
- pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE);
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
- initrd_end =
- round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
- pgalloc_low = max(pgalloc_low, initrd_end);
- }
-
- if (pgalloc_low + shadow_alloc_size > memsize)
- kasan_early_panic("out of memory during initialisation\n");
-
if (has_edat) {
- segment_pos = round_down(memsize, _SEGMENT_SIZE);
+ shadow_alloc_size = get_mem_detect_usable_total() >> KASAN_SHADOW_SCALE_SHIFT;
+ segment_pos = round_down(pgalloc_pos, _SEGMENT_SIZE);
segment_low = segment_pos - shadow_alloc_size;
+ segment_low = round_down(segment_low, _SEGMENT_SIZE);
pgalloc_pos = segment_low;
- } else {
- pgalloc_pos = memsize;
}
- init_mm.pgd = early_pg_dir;
/*
* Current memory layout:
- * +- 0 -------------+ +- shadow start -+
- * | 1:1 ram mapping | /| 1/8 ram |
- * | | / | |
- * +- end of ram ----+ / +----------------+
- * | ... gap ... | / | |
- * | |/ | kasan |
- * +- shadow start --+ | zero |
- * | 1/8 addr space | | page |
- * +- shadow end -+ | mapping |
- * | ... gap ... |\ | (untracked) |
- * +- vmalloc area -+ \ | |
- * | vmalloc_size | \ | |
- * +- modules vaddr -+ \ +----------------+
- * | 2Gb | \| unmapped | allocated per module
- * +-----------------+ +- shadow end ---+
+ * +- 0 -------------+ +- shadow start -+
+ * |1:1 ident mapping| /|1/8 of ident map|
+ * | | / | |
+ * +-end of ident map+ / +----------------+
+ * | ... gap ... | / | kasan |
+ * | | / | zero page |
+ * +- vmalloc area -+ / | mapping |
+ * | vmalloc_size | / | (untracked) |
+ * +- modules vaddr -+ / +----------------+
+ * | 2Gb |/ | unmapped | allocated per module
+ * +- shadow start -+ +----------------+
+ * | 1/8 addr space | | zero pg mapping| (untracked)
+ * +- shadow end ----+---------+- shadow end ---+
*
* Current memory layout (KASAN_VMALLOC):
- * +- 0 -------------+ +- shadow start -+
- * | 1:1 ram mapping | /| 1/8 ram |
- * | | / | |
- * +- end of ram ----+ / +----------------+
- * | ... gap ... | / | kasan |
- * | |/ | zero |
- * +- shadow start --+ | page |
- * | 1/8 addr space | | mapping |
- * +- shadow end -+ | (untracked) |
- * | ... gap ... |\ | |
- * +- vmalloc area -+ \ +- vmalloc area -+
- * | vmalloc_size | \ |shallow populate|
- * +- modules vaddr -+ \ +- modules area -+
- * | 2Gb | \|shallow populate|
- * +-----------------+ +- shadow end ---+
+ * +- 0 -------------+ +- shadow start -+
+ * |1:1 ident mapping| /|1/8 of ident map|
+ * | | / | |
+ * +-end of ident map+ / +----------------+
+ * | ... gap ... | / | kasan zero page| (untracked)
+ * | | / | mapping |
+ * +- vmalloc area -+ / +----------------+
+ * | vmalloc_size | / |shallow populate|
+ * +- modules vaddr -+ / +----------------+
+ * | 2Gb |/ |shallow populate|
+ * +- shadow start -+ +----------------+
+ * | 1/8 addr space | | zero pg mapping| (untracked)
+ * +- shadow end ----+---------+- shadow end ---+
*/
/* populate kasan shadow (for identity mapping and zero page mapping) */
- kasan_early_pgtable_populate(__sha(0), __sha(memsize), POPULATE_MAP);
+ for_each_mem_detect_usable_block(i, &start, &end)
+ kasan_early_pgtable_populate(__sha(start), __sha(end), POPULATE_MAP);
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ untracked_end = VMALLOC_START;
/* shallowly populate kasan shadow for vmalloc and modules */
kasan_early_pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END),
POPULATE_SHALLOW);
}
/* populate kasan shadow for untracked memory */
- kasan_early_pgtable_populate(__sha(ident_map_size),
- IS_ENABLED(CONFIG_KASAN_VMALLOC) ?
- __sha(VMALLOC_START) :
- __sha(MODULES_VADDR),
+ kasan_early_pgtable_populate(__sha(ident_map_size), __sha(untracked_end),
POPULATE_ZERO_SHADOW);
kasan_early_pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE),
POPULATE_ZERO_SHADOW);
- /* memory allocated for identity mapping structs will be freed later */
- pgalloc_freeable = pgalloc_pos;
- /* populate identity mapping */
- kasan_early_pgtable_populate(0, memsize, POPULATE_ONE2ONE);
- kasan_set_pgd(early_pg_dir, _ASCE_TYPE_REGION2);
- kasan_enable_dat();
/* enable kasan */
init_task.kasan_depth = 0;
- memblock_reserve(pgalloc_pos, memsize - pgalloc_pos);
sclp_early_printk("KernelAddressSanitizer initialized\n");
}
-
-void __init kasan_copy_shadow_mapping(void)
-{
- /*
- * At this point we are still running on early pages setup early_pg_dir,
- * while swapper_pg_dir has just been initialized with identity mapping.
- * Carry over shadow memory region from early_pg_dir to swapper_pg_dir.
- */
-
- pgd_t *pg_dir_src;
- pgd_t *pg_dir_dst;
- p4d_t *p4_dir_src;
- p4d_t *p4_dir_dst;
-
- pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START);
- pg_dir_dst = pgd_offset_raw(init_mm.pgd, KASAN_SHADOW_START);
- p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START);
- p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START);
- memcpy(p4_dir_dst, p4_dir_src,
- (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t));
-}
-
-void __init kasan_free_early_identity(void)
-{
- memblock_phys_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos);
-}
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 4824d1cd33d8..d02a61620cfa 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -21,7 +21,7 @@
#include <asm/maccess.h>
unsigned long __bootdata_preserved(__memcpy_real_area);
-static __ro_after_init pte_t *memcpy_real_ptep;
+pte_t *__bootdata_preserved(memcpy_real_ptep);
static DEFINE_MUTEX(memcpy_real_mutex);
static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
@@ -68,28 +68,17 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
long copied;
spin_lock_irqsave(&s390_kernel_write_lock, flags);
- if (!(flags & PSW_MASK_DAT)) {
- memcpy(dst, src, size);
- } else {
- while (size) {
- copied = s390_kernel_write_odd(tmp, src, size);
- tmp += copied;
- src += copied;
- size -= copied;
- }
+ while (size) {
+ copied = s390_kernel_write_odd(tmp, src, size);
+ tmp += copied;
+ src += copied;
+ size -= copied;
}
spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
return dst;
}
-void __init memcpy_real_init(void)
-{
- memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true);
- if (!memcpy_real_ptep)
- panic("Couldn't setup memcpy real area");
-}
-
size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
size_t len, copied, res = 0;
@@ -162,7 +151,6 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
void *ptr = phys_to_virt(addr);
void *bounce = ptr;
struct lowcore *abs_lc;
- unsigned long flags;
unsigned long size;
int this_cpu, cpu;
@@ -178,10 +166,10 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
goto out;
size = PAGE_SIZE - (addr & ~PAGE_MASK);
if (addr < sizeof(struct lowcore)) {
- abs_lc = get_abs_lowcore(&flags);
+ abs_lc = get_abs_lowcore();
ptr = (void *)abs_lc + addr;
memcpy(bounce, ptr, size);
- put_abs_lowcore(abs_lc, flags);
+ put_abs_lowcore(abs_lc);
} else if (cpu == this_cpu) {
ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu]));
memcpy(bounce, ptr, size);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4909dcd762e8..6effb24de6d9 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -302,6 +302,31 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(ptep_xchg_direct);
+/*
+ * Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that
+ * RDP can be used instead of IPTE. See also comments at pte_allow_rdp().
+ */
+void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t new)
+{
+ preempt_disable();
+ atomic_inc(&mm->context.flush_count);
+ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __ptep_rdp(addr, ptep, 0, 0, 1);
+ else
+ __ptep_rdp(addr, ptep, 0, 0, 0);
+ /*
+ * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
+ * means it is still valid and active, and must not be changed according
+ * to the architecture. But writing a new value that only differs in SW
+ * bits is allowed.
+ */
+ set_pte(ptep, new);
+ atomic_dec(&mm->context.flush_count);
+ preempt_enable();
+}
+EXPORT_SYMBOL(ptep_reset_dat_prot);
+
pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t new)
{
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index ee1a97078527..4113a7ffa149 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/sort.h>
#include <asm/cacheflush.h>
#include <asm/nospec-branch.h>
#include <asm/pgalloc.h>
@@ -296,10 +297,7 @@ static void try_free_pmd_table(pud_t *pud, unsigned long start)
/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
if (end > VMALLOC_START)
return;
-#ifdef CONFIG_KASAN
- if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
- return;
-#endif
+
pmd = pmd_offset(pud, start);
for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
if (!pmd_none(*pmd))
@@ -371,10 +369,6 @@ static void try_free_pud_table(p4d_t *p4d, unsigned long start)
/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
if (end > VMALLOC_START)
return;
-#ifdef CONFIG_KASAN
- if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
- return;
-#endif
pud = pud_offset(p4d, start);
for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
@@ -425,10 +419,6 @@ static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
if (end > VMALLOC_START)
return;
-#ifdef CONFIG_KASAN
- if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
- return;
-#endif
p4d = p4d_offset(pgd, start);
for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
@@ -657,6 +647,23 @@ void vmem_unmap_4k_page(unsigned long addr)
mutex_unlock(&vmem_mutex);
}
+static int __init memblock_region_cmp(const void *a, const void *b)
+{
+ const struct memblock_region *r1 = a;
+ const struct memblock_region *r2 = b;
+
+ if (r1->base < r2->base)
+ return -1;
+ if (r1->base > r2->base)
+ return 1;
+ return 0;
+}
+
+static void __init memblock_region_swap(void *a, void *b, int size)
+{
+ swap(*(struct memblock_region *)a, *(struct memblock_region *)b);
+}
+
/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug
@@ -664,11 +671,68 @@ void vmem_unmap_4k_page(unsigned long addr)
*/
void __init vmem_map_init(void)
{
+ struct memblock_region memory_rwx_regions[] = {
+ {
+ .base = 0,
+ .size = sizeof(struct lowcore),
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __pa(_stext),
+ .size = _etext - _stext,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __pa(_sinittext),
+ .size = _einittext - _sinittext,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __stext_amode31,
+ .size = __etext_amode31 - __stext_amode31,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ };
+ struct memblock_type memory_rwx = {
+ .regions = memory_rwx_regions,
+ .cnt = ARRAY_SIZE(memory_rwx_regions),
+ .max = ARRAY_SIZE(memory_rwx_regions),
+ };
phys_addr_t base, end;
u64 i;
- for_each_mem_range(i, &base, &end)
- vmem_add_range(base, end - base);
+ /*
+ * Set RW+NX attribute on all memory, except regions enumerated with
+ * memory_rwx exclude type. These regions need different attributes,
+ * which are enforced afterwards.
+ *
+ * __for_each_mem_range() iterate and exclude types should be sorted.
+ * The relative location of _stext and _sinittext is hardcoded in the
+ * linker script. However a location of __stext_amode31 and the kernel
+ * image itself are chosen dynamically. Thus, sort the exclude type.
+ */
+ sort(&memory_rwx_regions,
+ ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]),
+ memblock_region_cmp, memblock_region_swap);
+ __for_each_mem_range(i, &memblock.memory, &memory_rwx,
+ NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) {
+ __set_memory((unsigned long)__va(base),
+ (end - base) >> PAGE_SHIFT,
+ SET_MEMORY_RW | SET_MEMORY_NX);
+ }
+
__set_memory((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
@@ -678,15 +742,14 @@ void __init vmem_map_init(void)
__set_memory((unsigned long)_sinittext,
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
- __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
+ __set_memory(__stext_amode31,
+ (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
- /* lowcore requires 4k mapping for real addresses / prefixing */
- set_memory_4k(0, LC_PAGES);
-
/* lowcore must be executable for LPSWE */
- if (!static_key_enabled(&cpu_has_bear))
- set_memory_x(0, 1);
+ if (static_key_enabled(&cpu_has_bear))
+ set_memory_nx(0, 1);
+ set_memory_nx(PAGE_SIZE, 1);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index af35052d06ed..d0846ba818ee 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -30,6 +30,7 @@
#include <asm/facility.h>
#include <asm/nospec-branch.h>
#include <asm/set_memory.h>
+#include <asm/text-patching.h>
#include "bpf_jit.h"
struct bpf_jit {
@@ -50,12 +51,13 @@ struct bpf_jit {
int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
int excnt; /* Number of exception table entries */
+ int prologue_plt_ret; /* Return address for prologue hotpatch PLT */
+ int prologue_plt; /* Start of prologue hotpatch PLT */
};
#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
#define SEEN_LITERAL BIT(1) /* code uses literals */
#define SEEN_FUNC BIT(2) /* calls C functions */
-#define SEEN_TAIL_CALL BIT(3) /* code uses tail calls */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
/*
@@ -68,6 +70,10 @@ struct bpf_jit {
#define REG_0 REG_W0 /* Register 0 */
#define REG_1 REG_W1 /* Register 1 */
#define REG_2 BPF_REG_1 /* Register 2 */
+#define REG_3 BPF_REG_2 /* Register 3 */
+#define REG_4 BPF_REG_3 /* Register 4 */
+#define REG_7 BPF_REG_6 /* Register 7 */
+#define REG_8 BPF_REG_7 /* Register 8 */
#define REG_14 BPF_REG_0 /* Register 14 */
/*
@@ -507,20 +513,58 @@ static void bpf_skip(struct bpf_jit *jit, int size)
}
/*
+ * PLT for hotpatchable calls. The calling convention is the same as for the
+ * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
+ */
+extern const char bpf_plt[];
+extern const char bpf_plt_ret[];
+extern const char bpf_plt_target[];
+extern const char bpf_plt_end[];
+#define BPF_PLT_SIZE 32
+asm(
+ ".pushsection .rodata\n"
+ " .align 8\n"
+ "bpf_plt:\n"
+ " lgrl %r0,bpf_plt_ret\n"
+ " lgrl %r1,bpf_plt_target\n"
+ " br %r1\n"
+ " .align 8\n"
+ "bpf_plt_ret: .quad 0\n"
+ "bpf_plt_target: .quad 0\n"
+ "bpf_plt_end:\n"
+ " .popsection\n"
+);
+
+static void bpf_jit_plt(void *plt, void *ret, void *target)
+{
+ memcpy(plt, bpf_plt, BPF_PLT_SIZE);
+ *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
+ *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target;
+}
+
+/*
* Emit function prologue
*
* Save registers and create stack frame if necessary.
- * See stack frame layout desription in "bpf_jit.h"!
+ * See stack frame layout description in "bpf_jit.h"!
*/
-static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
+static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
+ u32 stack_depth)
{
- if (jit->seen & SEEN_TAIL_CALL) {
+ /* No-op for hotpatching */
+ /* brcl 0,prologue_plt */
+ EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
+ jit->prologue_plt_ret = jit->prg;
+
+ if (fp->aux->func_idx == 0) {
+ /* Initialize the tail call counter in the main program. */
/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
} else {
/*
- * There are no tail calls. Insert nops in order to have
- * tail_call_start at a predictable offset.
+ * Skip the tail call counter initialization in subprograms.
+ * Insert nops in order to have tail_call_start at a
+ * predictable offset.
*/
bpf_skip(jit, 6);
}
@@ -558,6 +602,43 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
}
/*
+ * Emit an expoline for a jump that follows
+ */
+static void emit_expoline(struct bpf_jit *jit)
+{
+ /* exrl %r0,.+10 */
+ EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+}
+
+/*
+ * Emit __s390_indirect_jump_r1 thunk if necessary
+ */
+static void emit_r1_thunk(struct bpf_jit *jit)
+{
+ if (nospec_uses_trampoline()) {
+ jit->r1_thunk_ip = jit->prg;
+ emit_expoline(jit);
+ /* br %r1 */
+ _EMIT2(0x07f1);
+ }
+}
+
+/*
+ * Call r1 either directly or via __s390_indirect_jump_r1 thunk
+ */
+static void call_r1(struct bpf_jit *jit)
+{
+ if (nospec_uses_trampoline())
+ /* brasl %r14,__s390_indirect_jump_r1 */
+ EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+ else
+ /* basr %r14,%r1 */
+ EMIT2(0x0d00, REG_14, REG_1);
+}
+
+/*
* Function epilogue
*/
static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
@@ -570,25 +651,20 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
if (nospec_uses_trampoline()) {
jit->r14_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r14 thunk */
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
+ emit_expoline(jit);
}
/* br %r14 */
_EMIT2(0x07fe);
- if ((nospec_uses_trampoline()) &&
- (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
- jit->r1_thunk_ip = jit->prg;
- /* Generate __s390_indirect_jump_r1 thunk */
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
- /* br %r1 */
- _EMIT2(0x07f1);
- }
+ if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
+ emit_r1_thunk(jit);
+
+ jit->prg = ALIGN(jit->prg, 8);
+ jit->prologue_plt = jit->prg;
+ if (jit->prg_buf)
+ bpf_jit_plt(jit->prg_buf + jit->prg,
+ jit->prg_buf + jit->prologue_plt_ret, NULL);
+ jit->prg += BPF_PLT_SIZE;
}
static int get_probe_mem_regno(const u8 *insn)
@@ -663,6 +739,34 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
}
/*
+ * Sign-extend the register if necessary
+ */
+static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
+{
+ if (!(flags & BTF_FMODEL_SIGNED_ARG))
+ return 0;
+
+ switch (size) {
+ case 1:
+ /* lgbr %r,%r */
+ EMIT4(0xb9060000, r, r);
+ return 0;
+ case 2:
+ /* lghr %r,%r */
+ EMIT4(0xb9070000, r, r);
+ return 0;
+ case 4:
+ /* lgfr %r,%r */
+ EMIT4(0xb9140000, r, r);
+ return 0;
+ case 8:
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+/*
* Compile one eBPF instruction into s390x code
*
* NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
@@ -1297,9 +1401,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
*/
case BPF_JMP | BPF_CALL:
{
- u64 func;
+ const struct btf_func_model *m;
bool func_addr_fixed;
- int ret;
+ int j, ret;
+ u64 func;
ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
&func, &func_addr_fixed);
@@ -1308,15 +1413,38 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
+ /*
+ * Copy the tail call counter to where the callee expects it.
+ *
+ * Note 1: The callee can increment the tail call counter, but
+ * we do not load it back, since the x86 JIT does not do this
+ * either.
+ *
+ * Note 2: We assume that the verifier does not let us call the
+ * main program, which clears the tail call counter on entry.
+ */
+ /* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
+ _EMIT6(0xd203f000 | STK_OFF_TCCNT,
+ 0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));
+
+ /* Sign-extend the kfunc arguments. */
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ m = bpf_jit_find_kfunc_model(fp, insn);
+ if (!m)
+ return -1;
+
+ for (j = 0; j < m->nr_args; j++) {
+ if (sign_extend(jit, BPF_REG_1 + j,
+ m->arg_size[j],
+ m->arg_flags[j]))
+ return -1;
+ }
+ }
+
/* lgrl %w1,func */
EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
- if (nospec_uses_trampoline()) {
- /* brasl %r14,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
- } else {
- /* basr %r14,%w1 */
- EMIT2(0x0d00, REG_14, REG_W1);
- }
+ /* %r1() */
+ call_r1(jit);
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
break;
@@ -1329,10 +1457,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
* B1: pointer to ctx
* B2: pointer to bpf_array
* B3: index in bpf_array
- */
- jit->seen |= SEEN_TAIL_CALL;
-
- /*
+ *
* if (index >= array->map.max_entries)
* goto out;
*/
@@ -1393,8 +1518,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* lg %r1,bpf_func(%r1) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
offsetof(struct bpf_prog, bpf_func));
- /* bc 0xf,tail_call_start(%r1) */
- _EMIT4(0x47f01000 + jit->tail_call_start);
+ if (nospec_uses_trampoline()) {
+ jit->seen |= SEEN_FUNC;
+ /* aghi %r1,tail_call_start */
+ EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
+ /* brcl 0xf,__s390_indirect_jump_r1 */
+ EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
+ } else {
+ /* bc 0xf,tail_call_start(%r1) */
+ _EMIT4(0x47f01000 + jit->tail_call_start);
+ }
/* out: */
if (jit->prg_buf) {
*(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
@@ -1688,7 +1821,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
jit->prg = 0;
jit->excnt = 0;
- bpf_jit_prologue(jit, stack_depth);
+ bpf_jit_prologue(jit, fp, stack_depth);
if (bpf_set_addr(jit, 0) < 0)
return -1;
for (i = 0; i < fp->len; i += insn_count) {
@@ -1768,6 +1901,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_jit jit;
int pass;
+ if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
+ return orig_fp;
+
if (!fp->jit_requested)
return orig_fp;
@@ -1859,3 +1995,508 @@ out:
tmp : orig_fp);
return fp;
}
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+ return true;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr)
+{
+ struct {
+ u16 opc;
+ s32 disp;
+ } __packed insn;
+ char expected_plt[BPF_PLT_SIZE];
+ char current_plt[BPF_PLT_SIZE];
+ char *plt;
+ int err;
+
+ /* Verify the branch to be patched. */
+ err = copy_from_kernel_nofault(&insn, ip, sizeof(insn));
+ if (err < 0)
+ return err;
+ if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
+ return -EINVAL;
+
+ if (t == BPF_MOD_JUMP &&
+ insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
+ /*
+ * The branch already points to the destination,
+ * there is no PLT.
+ */
+ } else {
+ /* Verify the PLT. */
+ plt = (char *)ip + (insn.disp << 1);
+ err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
+ if (err < 0)
+ return err;
+ bpf_jit_plt(expected_plt, (char *)ip + 6, old_addr);
+ if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
+ return -EINVAL;
+ /* Adjust the call address. */
+ s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
+ &new_addr, sizeof(void *));
+ }
+
+ /* Adjust the mask of the branch. */
+ insn.opc = 0xc004 | (new_addr ? 0xf0 : 0);
+ s390_kernel_write((char *)ip + 1, (char *)&insn.opc + 1, 1);
+
+ /* Make the new code visible to the other CPUs. */
+ text_poke_sync_lock();
+
+ return 0;
+}
+
+struct bpf_tramp_jit {
+ struct bpf_jit common;
+ int orig_stack_args_off;/* Offset of arguments placed on stack by the
+ * func_addr's original caller
+ */
+ int stack_size; /* Trampoline stack size */
+ int stack_args_off; /* Offset of stack arguments for calling
+ * func_addr, has to be at the top
+ */
+ int reg_args_off; /* Offset of register arguments for calling
+ * func_addr
+ */
+ int ip_off; /* For bpf_get_func_ip(), has to be at
+ * (ctx - 16)
+ */
+ int arg_cnt_off; /* For bpf_get_func_arg_cnt(), has to be at
+ * (ctx - 8)
+ */
+ int bpf_args_off; /* Offset of BPF_PROG context, which consists
+ * of BPF arguments followed by return value
+ */
+ int retval_off; /* Offset of return value (see above) */
+ int r7_r8_off; /* Offset of saved %r7 and %r8, which are used
+ * for __bpf_prog_enter() return value and
+ * func_addr respectively
+ */
+ int r14_off; /* Offset of saved %r14 */
+ int run_ctx_off; /* Offset of struct bpf_tramp_run_ctx */
+ int do_fexit; /* do_fexit: label */
+};
+
+static void load_imm64(struct bpf_jit *jit, int dst_reg, u64 val)
+{
+ /* llihf %dst_reg,val_hi */
+ EMIT6_IMM(0xc00e0000, dst_reg, (val >> 32));
+ /* oilf %rdst_reg,val_lo */
+ EMIT6_IMM(0xc00d0000, dst_reg, val);
+}
+
+static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
+ const struct btf_func_model *m,
+ struct bpf_tramp_link *tlink, bool save_ret)
+{
+ struct bpf_jit *jit = &tjit->common;
+ int cookie_off = tjit->run_ctx_off +
+ offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
+ struct bpf_prog *p = tlink->link.prog;
+ int patch;
+
+ /*
+ * run_ctx.cookie = tlink->cookie;
+ */
+
+ /* %r0 = tlink->cookie */
+ load_imm64(jit, REG_W0, tlink->cookie);
+ /* stg %r0,cookie_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, REG_0, REG_15, cookie_off);
+
+ /*
+ * if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
+ * goto skip;
+ */
+
+ /* %r1 = __bpf_prog_enter */
+ load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p));
+ /* %r2 = p */
+ load_imm64(jit, REG_2, (u64)p);
+ /* la %r3,run_ctx_off(%r15) */
+ EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
+ /* %r1() */
+ call_r1(jit);
+ /* ltgr %r7,%r2 */
+ EMIT4(0xb9020000, REG_7, REG_2);
+ /* brcl 8,skip */
+ patch = jit->prg;
+ EMIT6_PCREL_RILC(0xc0040000, 8, 0);
+
+ /*
+ * retval = bpf_func(args, p->insnsi);
+ */
+
+ /* %r1 = p->bpf_func */
+ load_imm64(jit, REG_1, (u64)p->bpf_func);
+ /* la %r2,bpf_args_off(%r15) */
+ EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
+ /* %r3 = p->insnsi */
+ if (!p->jited)
+ load_imm64(jit, REG_3, (u64)p->insnsi);
+ /* %r1() */
+ call_r1(jit);
+ /* stg %r2,retval_off(%r15) */
+ if (save_ret) {
+ if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
+ return -1;
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
+ tjit->retval_off);
+ }
+
+ /* skip: */
+ if (jit->prg_buf)
+ *(u32 *)&jit->prg_buf[patch + 2] = (jit->prg - patch) >> 1;
+
+ /*
+ * __bpf_prog_exit(p, start, &run_ctx);
+ */
+
+ /* %r1 = __bpf_prog_exit */
+ load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p));
+ /* %r2 = p */
+ load_imm64(jit, REG_2, (u64)p);
+ /* lgr %r3,%r7 */
+ EMIT4(0xb9040000, REG_3, REG_7);
+ /* la %r4,run_ctx_off(%r15) */
+ EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
+ /* %r1() */
+ call_r1(jit);
+
+ return 0;
+}
+
+static int alloc_stack(struct bpf_tramp_jit *tjit, size_t size)
+{
+ int stack_offset = tjit->stack_size;
+
+ tjit->stack_size += size;
+ return stack_offset;
+}
+
+/* ABI uses %r2 - %r6 for parameter passing. */
+#define MAX_NR_REG_ARGS 5
+
+/* The "L" field of the "mvc" instruction is 8 bits. */
+#define MAX_MVC_SIZE 256
+#define MAX_NR_STACK_ARGS (MAX_MVC_SIZE / sizeof(u64))
+
+/* -mfentry generates a 6-byte nop on s390x. */
+#define S390X_PATCH_SIZE 6
+
+static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
+ struct bpf_tramp_jit *tjit,
+ const struct btf_func_model *m,
+ u32 flags,
+ struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ int nr_bpf_args, nr_reg_args, nr_stack_args;
+ struct bpf_jit *jit = &tjit->common;
+ int arg, bpf_arg_off;
+ int i, j;
+
+ /* Support as many stack arguments as "mvc" instruction can handle. */
+ nr_reg_args = min_t(int, m->nr_args, MAX_NR_REG_ARGS);
+ nr_stack_args = m->nr_args - nr_reg_args;
+ if (nr_stack_args > MAX_NR_STACK_ARGS)
+ return -ENOTSUPP;
+
+ /* Return to %r14, since func_addr and %r0 are not available. */
+ if (!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK))
+ flags |= BPF_TRAMP_F_SKIP_FRAME;
+
+ /*
+ * Compute how many arguments we need to pass to BPF programs.
+ * BPF ABI mirrors that of x86_64: arguments that are 16 bytes or
+ * smaller are packed into 1 or 2 registers; larger arguments are
+ * passed via pointers.
+ * In s390x ABI, arguments that are 8 bytes or smaller are packed into
+ * a register; larger arguments are passed via pointers.
+ * We need to deal with this difference.
+ */
+ nr_bpf_args = 0;
+ for (i = 0; i < m->nr_args; i++) {
+ if (m->arg_size[i] <= 8)
+ nr_bpf_args += 1;
+ else if (m->arg_size[i] <= 16)
+ nr_bpf_args += 2;
+ else
+ return -ENOTSUPP;
+ }
+
+ /*
+ * Calculate the stack layout.
+ */
+
+ /* Reserve STACK_FRAME_OVERHEAD bytes for the callees. */
+ tjit->stack_size = STACK_FRAME_OVERHEAD;
+ tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64));
+ tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64));
+ tjit->ip_off = alloc_stack(tjit, sizeof(u64));
+ tjit->arg_cnt_off = alloc_stack(tjit, sizeof(u64));
+ tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64));
+ tjit->retval_off = alloc_stack(tjit, sizeof(u64));
+ tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64));
+ tjit->r14_off = alloc_stack(tjit, sizeof(u64));
+ tjit->run_ctx_off = alloc_stack(tjit,
+ sizeof(struct bpf_tramp_run_ctx));
+ /* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */
+ tjit->stack_size -= STACK_FRAME_OVERHEAD;
+ tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
+
+ /* aghi %r15,-stack_size */
+ EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
+ /* stmg %r2,%rN,fwd_reg_args_off(%r15) */
+ if (nr_reg_args)
+ EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
+ REG_2 + (nr_reg_args - 1), REG_15,
+ tjit->reg_args_off);
+ for (i = 0, j = 0; i < m->nr_args; i++) {
+ if (i < MAX_NR_REG_ARGS)
+ arg = REG_2 + i;
+ else
+ arg = tjit->orig_stack_args_off +
+ (i - MAX_NR_REG_ARGS) * sizeof(u64);
+ bpf_arg_off = tjit->bpf_args_off + j * sizeof(u64);
+ if (m->arg_size[i] <= 8) {
+ if (i < MAX_NR_REG_ARGS)
+ /* stg %arg,bpf_arg_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, arg,
+ REG_0, REG_15, bpf_arg_off);
+ else
+ /* mvc bpf_arg_off(8,%r15),arg(%r15) */
+ _EMIT6(0xd207f000 | bpf_arg_off,
+ 0xf000 | arg);
+ j += 1;
+ } else {
+ if (i < MAX_NR_REG_ARGS) {
+ /* mvc bpf_arg_off(16,%r15),0(%arg) */
+ _EMIT6(0xd20ff000 | bpf_arg_off,
+ reg2hex[arg] << 12);
+ } else {
+ /* lg %r1,arg(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_0,
+ REG_15, arg);
+ /* mvc bpf_arg_off(16,%r15),0(%r1) */
+ _EMIT6(0xd20ff000 | bpf_arg_off, 0x1000);
+ }
+ j += 2;
+ }
+ }
+ /* stmg %r7,%r8,r7_r8_off(%r15) */
+ EMIT6_DISP_LH(0xeb000000, 0x0024, REG_7, REG_8, REG_15,
+ tjit->r7_r8_off);
+ /* stg %r14,r14_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_14, REG_0, REG_15, tjit->r14_off);
+
+ if (flags & BPF_TRAMP_F_ORIG_STACK) {
+ /*
+ * The ftrace trampoline puts the return address (which is the
+ * address of the original function + S390X_PATCH_SIZE) into
+ * %r0; see ftrace_shared_hotpatch_trampoline_br and
+ * ftrace_init_nop() for details.
+ */
+
+ /* lgr %r8,%r0 */
+ EMIT4(0xb9040000, REG_8, REG_0);
+ } else {
+ /* %r8 = func_addr + S390X_PATCH_SIZE */
+ load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE);
+ }
+
+ /*
+ * ip = func_addr;
+ * arg_cnt = m->nr_args;
+ */
+
+ if (flags & BPF_TRAMP_F_IP_ARG) {
+ /* %r0 = func_addr */
+ load_imm64(jit, REG_0, (u64)func_addr);
+ /* stg %r0,ip_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
+ tjit->ip_off);
+ }
+ /* lghi %r0,nr_bpf_args */
+ EMIT4_IMM(0xa7090000, REG_0, nr_bpf_args);
+ /* stg %r0,arg_cnt_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
+ tjit->arg_cnt_off);
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ /*
+ * __bpf_tramp_enter(im);
+ */
+
+ /* %r1 = __bpf_tramp_enter */
+ load_imm64(jit, REG_1, (u64)__bpf_tramp_enter);
+ /* %r2 = im */
+ load_imm64(jit, REG_2, (u64)im);
+ /* %r1() */
+ call_r1(jit);
+ }
+
+ for (i = 0; i < fentry->nr_links; i++)
+ if (invoke_bpf_prog(tjit, m, fentry->links[i],
+ flags & BPF_TRAMP_F_RET_FENTRY_RET))
+ return -EINVAL;
+
+ if (fmod_ret->nr_links) {
+ /*
+ * retval = 0;
+ */
+
+ /* xc retval_off(8,%r15),retval_off(%r15) */
+ _EMIT6(0xd707f000 | tjit->retval_off,
+ 0xf000 | tjit->retval_off);
+
+ for (i = 0; i < fmod_ret->nr_links; i++) {
+ if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true))
+ return -EINVAL;
+
+ /*
+ * if (retval)
+ * goto do_fexit;
+ */
+
+ /* ltg %r0,retval_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0002, REG_0, REG_0, REG_15,
+ tjit->retval_off);
+ /* brcl 7,do_fexit */
+ EMIT6_PCREL_RILC(0xc0040000, 7, tjit->do_fexit);
+ }
+ }
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ /*
+ * retval = func_addr(args);
+ */
+
+ /* lmg %r2,%rN,reg_args_off(%r15) */
+ if (nr_reg_args)
+ EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
+ REG_2 + (nr_reg_args - 1), REG_15,
+ tjit->reg_args_off);
+ /* mvc stack_args_off(N,%r15),orig_stack_args_off(%r15) */
+ if (nr_stack_args)
+ _EMIT6(0xd200f000 |
+ (nr_stack_args * sizeof(u64) - 1) << 16 |
+ tjit->stack_args_off,
+ 0xf000 | tjit->orig_stack_args_off);
+ /* lgr %r1,%r8 */
+ EMIT4(0xb9040000, REG_1, REG_8);
+ /* %r1() */
+ call_r1(jit);
+ /* stg %r2,retval_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
+ tjit->retval_off);
+
+ im->ip_after_call = jit->prg_buf + jit->prg;
+
+ /*
+ * The following nop will be patched by bpf_tramp_image_put().
+ */
+
+ /* brcl 0,im->ip_epilogue */
+ EMIT6_PCREL_RILC(0xc0040000, 0, (u64)im->ip_epilogue);
+ }
+
+ /* do_fexit: */
+ tjit->do_fexit = jit->prg;
+ for (i = 0; i < fexit->nr_links; i++)
+ if (invoke_bpf_prog(tjit, m, fexit->links[i], false))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ im->ip_epilogue = jit->prg_buf + jit->prg;
+
+ /*
+ * __bpf_tramp_exit(im);
+ */
+
+ /* %r1 = __bpf_tramp_exit */
+ load_imm64(jit, REG_1, (u64)__bpf_tramp_exit);
+ /* %r2 = im */
+ load_imm64(jit, REG_2, (u64)im);
+ /* %r1() */
+ call_r1(jit);
+ }
+
+ /* lmg %r2,%rN,reg_args_off(%r15) */
+ if ((flags & BPF_TRAMP_F_RESTORE_REGS) && nr_reg_args)
+ EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
+ REG_2 + (nr_reg_args - 1), REG_15,
+ tjit->reg_args_off);
+ /* lgr %r1,%r8 */
+ if (!(flags & BPF_TRAMP_F_SKIP_FRAME))
+ EMIT4(0xb9040000, REG_1, REG_8);
+ /* lmg %r7,%r8,r7_r8_off(%r15) */
+ EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
+ tjit->r7_r8_off);
+ /* lg %r14,r14_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_14, REG_0, REG_15, tjit->r14_off);
+ /* lg %r2,retval_off(%r15) */
+ if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
+ tjit->retval_off);
+ /* aghi %r15,stack_size */
+ EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
+ /* Emit an expoline for the following indirect jump. */
+ if (nospec_uses_trampoline())
+ emit_expoline(jit);
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* br %r14 */
+ _EMIT2(0x07fe);
+ else
+ /* br %r1 */
+ _EMIT2(0x07f1);
+
+ emit_r1_thunk(jit);
+
+ return 0;
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
+ void *image_end, const struct btf_func_model *m,
+ u32 flags, struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ struct bpf_tramp_jit tjit;
+ int ret;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ if (i == 0) {
+ /* Compute offsets, check whether the code fits. */
+ memset(&tjit, 0, sizeof(tjit));
+ } else {
+ /* Generate the code. */
+ tjit.common.prg = 0;
+ tjit.common.prg_buf = image;
+ }
+ ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
+ tlinks, func_addr);
+ if (ret < 0)
+ return ret;
+ if (tjit.common.prg > (char *)image_end - (char *)image)
+ /*
+ * Use the same error code as for exceeding
+ * BPF_MAX_TRAMP_LINKS.
+ */
+ return -E2BIG;
+ }
+
+ return ret;
+}
+
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+ return true;
+}