diff options
Diffstat (limited to 'arch')
73 files changed, 919 insertions, 374 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 63ed39cbd3bd..165f268beafc 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -463,3 +463,7 @@ 532 common getppid sys_getppid # all other architectures have common numbers for new syscall, alpha # is the exception. +534 common pidfd_send_signal sys_pidfd_send_signal +535 common io_uring_setup sys_io_uring_setup +536 common io_uring_enter sys_io_uring_enter +537 common io_uring_register sys_io_uring_register diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 850b4805e2d1..9aed25a6019b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -73,7 +73,7 @@ config ARM select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_EXIT_THREAD select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL + select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 6d6e0330930b..e388af4594a6 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -47,8 +47,8 @@ config DEBUG_WX choice prompt "Choose kernel unwinder" - default UNWINDER_ARM if AEABI && !FUNCTION_GRAPH_TRACER - default UNWINDER_FRAME_POINTER if !AEABI || FUNCTION_GRAPH_TRACER + default UNWINDER_ARM if AEABI + default UNWINDER_FRAME_POINTER if !AEABI help This determines which method will be used for unwinding kernel stack traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack, @@ -65,7 +65,7 @@ config UNWINDER_FRAME_POINTER config UNWINDER_ARM bool "ARM EABI stack unwinder" - depends on AEABI + depends on AEABI && !FUNCTION_GRAPH_TRACER select ARM_UNWIND help This option enables stack unwinding support in the kernel diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 6c7ccb428c07..7135820f76d4 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1438,7 +1438,21 @@ ENTRY(efi_stub_entry) @ Preserve return value of efi_entry() in r4 mov r4, r0 - bl cache_clean_flush + + @ our cache maintenance code relies on CP15 barrier instructions + @ but since we arrived here with the MMU and caches configured + @ by UEFI, we must check that the CP15BEN bit is set in SCTLR. + @ Note that this bit is RAO/WI on v6 and earlier, so the ISB in + @ the enable path will be executed on v7+ only. + mrc p15, 0, r1, c1, c0, 0 @ read SCTLR + tst r1, #(1 << 5) @ CP15BEN bit set? + bne 0f + orr r1, r1, #(1 << 5) @ CP15 barrier instructions + mcr p15, 0, r1, c1, c0, 0 @ write SCTLR + ARM( .inst 0xf57ff06f @ v7+ isb ) + THUMB( isb ) + +0: bl cache_clean_flush bl cache_off @ Set parameters for booting zImage according to boot protocol diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index c08d2d890f7b..b38bbd011b35 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -133,9 +133,9 @@ __secondary_data: */ .text __after_proc_init: -#ifdef CONFIG_ARM_MPU M_CLASS(movw r12, #:lower16:BASEADDR_V7M_SCB) M_CLASS(movt r12, #:upper16:BASEADDR_V7M_SCB) +#ifdef CONFIG_ARM_MPU M_CLASS(ldr r3, [r12, 0x50]) AR_CLASS(mrc p15, 0, r3, c0, c1, 4) @ Read ID_MMFR0 and r3, r3, #(MMFR0_PMSA) @ PMSA field diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 9016f4081bb9..0393917eaa57 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -437,3 +437,7 @@ 421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422 common futex_time64 sys_futex 423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index e1d95f08f8e1..c7e1a7837706 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -50,7 +50,7 @@ do { \ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr) { - int oldval, ret, tmp; + int oldval = 0, ret, tmp; u32 __user *uaddr = __uaccess_mask_ptr(_uaddr); pagefault_disable(); diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index d1dd93436e1e..f2a83ff6b73c 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -44,7 +44,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 424 +#define __NR_compat_syscalls 428 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 5590f2623690..23f1a44acada 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64) __SYSCALL(__NR_futex_time64, sys_futex) #define __NR_sched_rr_get_interval_time64 423 __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval) +#define __NR_pidfd_send_signal 424 +__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal) +#define __NR_io_uring_setup 425 +__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) +#define __NR_io_uring_enter 426 +__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) +#define __NR_io_uring_register 427 +__SYSCALL(__NR_io_uring_register, sys_io_uring_register) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 07b298120182..65a51331088e 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -103,10 +103,15 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * to be revisited if support for multiple ftrace entry points * is added in the future, but for now, the pr_err() below * deals with a theoretical issue only. + * + * Note that PLTs are place relative, and plt_entries_equal() + * checks whether they point to the same target. Here, we need + * to check if the actual opcodes are in fact identical, + * regardless of the offset in memory so use memcmp() instead. */ trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline); - if (!plt_entries_equal(mod->arch.ftrace_trampoline, - &trampoline)) { + if (memcmp(mod->arch.ftrace_trampoline, &trampoline, + sizeof(trampoline))) { if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) { pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); return -EINVAL; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 6bc135042f5e..7cae155e81a5 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -363,7 +363,7 @@ void __init arm64_memblock_init(void) * Otherwise, this is a no-op */ u64 base = phys_initrd_start & PAGE_MASK; - u64 size = PAGE_ALIGN(phys_initrd_size); + u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base; /* * We can only add back the initrd memory if we don't end up diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index ab9cda5f6136..56e3d0b685e1 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -344,3 +344,7 @@ 332 common pkey_free sys_pkey_free 333 common rseq sys_rseq # 334 through 423 are reserved to sync up with other architectures +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 125c14178979..df4ec3ec71d1 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -423,3 +423,7 @@ 421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422 common futex_time64 sys_futex 423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 8ee3a8c18498..4964947732af 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -429,3 +429,7 @@ 421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422 common futex_time64 sys_futex 423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 4a70c5de8c92..25a57895a3a3 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -210,12 +210,6 @@ const char *get_system_type(void) return ath79_sys_type; } -int get_c0_perfcount_int(void) -{ - return ATH79_MISC_IRQ(5); -} -EXPORT_SYMBOL_GPL(get_c0_perfcount_int); - unsigned int get_c0_compare_int(void) { return CP0_LEGACY_COMPARE_IRQ; diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index f158c5894a9a..feb2653490df 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -125,7 +125,7 @@ trace_a_syscall: subu t1, v0, __NR_O32_Linux move a1, v0 bnez t1, 1f /* __NR_syscall at offset 0 */ - lw a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ + ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ .set pop 1: jal syscall_trace_enter diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 15f4117900ee..9392dfe33f97 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -362,3 +362,7 @@ 421 n32 rt_sigtimedwait_time64 compat_sys_rt_sigtimedwait_time64 422 n32 futex_time64 sys_futex 423 n32 sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 n32 pidfd_send_signal sys_pidfd_send_signal +425 n32 io_uring_setup sys_io_uring_setup +426 n32 io_uring_enter sys_io_uring_enter +427 n32 io_uring_register sys_io_uring_register diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index c85502e67b44..cd0c8aa21fba 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -338,3 +338,7 @@ 327 n64 rseq sys_rseq 328 n64 io_pgetevents sys_io_pgetevents # 329 through 423 are reserved to sync up with other architectures +424 n64 pidfd_send_signal sys_pidfd_send_signal +425 n64 io_uring_setup sys_io_uring_setup +426 n64 io_uring_enter sys_io_uring_enter +427 n64 io_uring_register sys_io_uring_register diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 2e063d0f837e..e849e8ffe4a2 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -411,3 +411,7 @@ 421 o32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64 422 o32 futex_time64 sys_futex sys_futex 423 o32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval +424 o32 pidfd_send_signal sys_pidfd_send_signal +425 o32 io_uring_setup sys_io_uring_setup +426 o32 io_uring_enter sys_io_uring_enter +427 o32 io_uring_register sys_io_uring_register diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index b26766c6647d..fe8ca623add8 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -420,3 +420,7 @@ 421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64 422 32 futex_time64 sys_futex sys_futex 423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 5ba131c30f6b..1bcd468ab422 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -266,6 +266,7 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y +CONFIG_HUGETLBFS=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_NLS=y diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index b18abb0c3dae..00f5a63c8d9a 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -505,3 +505,7 @@ 421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64 422 32 futex_time64 sys_futex sys_futex 423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index e7a9c4f6bfca..8330f135294f 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -95,28 +95,15 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, unsigned long entries, unsigned long dev_hpa, struct mm_iommu_table_group_mem_t **pmem) { - struct mm_iommu_table_group_mem_t *mem; - long i, ret, locked_entries = 0; + struct mm_iommu_table_group_mem_t *mem, *mem2; + long i, ret, locked_entries = 0, pinned = 0; unsigned int pageshift; - - mutex_lock(&mem_list_mutex); - - list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, - next) { - /* Overlap? */ - if ((mem->ua < (ua + (entries << PAGE_SHIFT))) && - (ua < (mem->ua + - (mem->entries << PAGE_SHIFT)))) { - ret = -EINVAL; - goto unlock_exit; - } - - } + unsigned long entry, chunk; if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { ret = mm_iommu_adjust_locked_vm(mm, entries, true); if (ret) - goto unlock_exit; + return ret; locked_entries = entries; } @@ -148,17 +135,27 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, } down_read(&mm->mmap_sem); - ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL); + chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) / + sizeof(struct vm_area_struct *); + chunk = min(chunk, entries); + for (entry = 0; entry < entries; entry += chunk) { + unsigned long n = min(entries - entry, chunk); + + ret = get_user_pages_longterm(ua + (entry << PAGE_SHIFT), n, + FOLL_WRITE, mem->hpages + entry, NULL); + if (ret == n) { + pinned += n; + continue; + } + if (ret > 0) + pinned += ret; + break; + } up_read(&mm->mmap_sem); - if (ret != entries) { - /* free the reference taken */ - for (i = 0; i < ret; i++) - put_page(mem->hpages[i]); - - vfree(mem->hpas); - kfree(mem); - ret = -EFAULT; - goto unlock_exit; + if (pinned != entries) { + if (!ret) + ret = -EFAULT; + goto free_exit; } pageshift = PAGE_SHIFT; @@ -183,21 +180,43 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, } good_exit: - ret = 0; atomic64_set(&mem->mapped, 1); mem->used = 1; mem->ua = ua; mem->entries = entries; - *pmem = mem; - list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); + mutex_lock(&mem_list_mutex); -unlock_exit: - if (locked_entries && ret) - mm_iommu_adjust_locked_vm(mm, locked_entries, false); + list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) { + /* Overlap? */ + if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && + (ua < (mem2->ua + + (mem2->entries << PAGE_SHIFT)))) { + ret = -EINVAL; + mutex_unlock(&mem_list_mutex); + goto free_exit; + } + } + + list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); mutex_unlock(&mem_list_mutex); + *pmem = mem; + + return 0; + +free_exit: + /* free the reference taken */ + for (i = 0; i < pinned; i++) + put_page(mem->hpages[i]); + + vfree(mem->hpas); + kfree(mem); + +unlock_exit: + mm_iommu_adjust_locked_vm(mm, locked_entries, false); + return ret; } @@ -266,7 +285,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) { long ret = 0; - unsigned long entries, dev_hpa; + unsigned long unlock_entries = 0; mutex_lock(&mem_list_mutex); @@ -287,17 +306,17 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) goto unlock_exit; } + if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) + unlock_entries = mem->entries; + /* @mapped became 0 so now mappings are disabled, release the region */ - entries = mem->entries; - dev_hpa = mem->dev_hpa; mm_iommu_release(mem); - if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) - mm_iommu_adjust_locked_vm(mm, entries, false); - unlock_exit: mutex_unlock(&mem_list_mutex); + mm_iommu_adjust_locked_vm(mm, unlock_entries, false); + return ret; } EXPORT_SYMBOL_GPL(mm_iommu_put); diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 842b2c7e156a..50cd09b4e05d 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -324,7 +324,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK config PPC_RADIX_MMU bool "Radix MMU Support" - depends on PPC_BOOK3S_64 + depends on PPC_BOOK3S_64 && HUGETLB_PAGE select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA default y help diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig new file mode 100644 index 000000000000..1a911ed8e772 --- /dev/null +++ b/arch/riscv/configs/rv32_defconfig @@ -0,0 +1,84 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_CGROUP_BPF=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_EXPERT=y +CONFIG_BPF_SYSCALL=y +CONFIG_ARCH_RV32I=y +CONFIG_SMP=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_NETLINK_DIAG=y +CONFIG_PCI=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCIE_XILINX=y +CONFIG_DEVTMPFS=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_NETDEVICES=y +CONFIG_VIRTIO_NET=y +CONFIG_MACB=y +CONFIG_E1000E=y +CONFIG_R8169=y +CONFIG_MICROSEMI_PHY=y +CONFIG_INPUT_MOUSEDEV=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SERIAL_EARLYCON_RISCV_SBI=y +CONFIG_HVC_RISCV_SBI=y +# CONFIG_PTP_1588_CLOCK is not set +CONFIG_DRM=y +CONFIG_DRM_RADEON=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_USB=y +CONFIG_USB_XHCI_HCD=y +CONFIG_USB_XHCI_PLATFORM=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_STORAGE=y +CONFIG_USB_UAS=y +CONFIG_VIRTIO_MMIO=y +CONFIG_SIFIVE_PLIC=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_AUTOFS4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_NFS_FS=y +CONFIG_NFS_V4=y +CONFIG_NFS_V4_1=y +CONFIG_NFS_V4_2=y +CONFIG_ROOT_NFS=y +CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_CRYPTO_DEV_VIRTIO=y +CONFIG_PRINTK_TIME=y +# CONFIG_RCU_TRACE is not set diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 5fd8c922e1c2..bc7b77e34d09 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -121,6 +121,14 @@ void __init setup_bootmem(void) */ memblock_reserve(reg->base, vmlinux_end - reg->base); mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET); + + /* + * Remove memblock from the end of usable area to the + * end of region + */ + if (reg->base + mem_size < end) + memblock_remove(reg->base + mem_size, + end - reg->base - mem_size); } } BUG_ON(mem_size == 0); diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 4cb771ba13fa..5d316fe40480 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -25,7 +25,7 @@ static void *mem_detect_alloc_extended(void) { unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64)); - if (IS_ENABLED(BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE && + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE && INITRD_START < offset + ENTRIES_EXTENDED_MAX) offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64)); diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 594464f2129d..0da378e2eb25 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -23,7 +23,7 @@ void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags) if (flags & KERNEL_FPC) /* Save floating point control */ - asm volatile("stfpc %0" : "=m" (state->fpc)); + asm volatile("stfpc %0" : "=Q" (state->fpc)); if (!MACHINE_HAS_VX) { if (flags & KERNEL_VXR_V0V7) { diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 02579f95f391..061418f787c3 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -426,3 +426,7 @@ 421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64 422 32 futex_time64 - sys_futex 423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register sys_io_uring_register diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index a69a0911ed0e..c475ca49cfc6 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -37,7 +37,7 @@ static inline u64 get_vtimer(void) { u64 timer; - asm volatile("stpt %0" : "=m" (timer)); + asm volatile("stpt %0" : "=Q" (timer)); return timer; } @@ -48,7 +48,7 @@ static inline void set_vtimer(u64 expires) asm volatile( " stpt %0\n" /* Store current cpu timer value */ " spt %1" /* Set new value imm. afterwards */ - : "=m" (timer) : "m" (expires)); + : "=Q" (timer) : "Q" (expires)); S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; S390_lowcore.last_update_timer = expires; } @@ -135,8 +135,8 @@ static int do_account_vtime(struct task_struct *tsk) #else " stck %1" /* Store current tod clock value */ #endif - : "=m" (S390_lowcore.last_update_timer), - "=m" (S390_lowcore.last_update_clock)); + : "=Q" (S390_lowcore.last_update_timer), + "=Q" (S390_lowcore.last_update_clock)); clock = S390_lowcore.last_update_clock - clock; timer -= S390_lowcore.last_update_timer; diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index bfda678576e4..480b057556ee 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -426,3 +426,7 @@ 421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422 common futex_time64 sys_futex 423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index b9a5a04b2d2c..a1dd24307b00 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -469,3 +469,7 @@ 421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64 422 32 futex_time64 sys_futex sys_futex 423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5ad92419be19..62fc3fda1a05 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1499,7 +1499,7 @@ config X86_CPA_STATISTICS depends on DEBUG_FS ---help--- Expose statistics about the Change Page Attribute mechanims, which - helps to determine the effectivness of preserving large and huge + helps to determine the effectiveness of preserving large and huge page mappings when mapping protections are changed. config ARCH_HAS_MEM_ENCRYPT diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index c0d6c560df69..5a237e8dbf8d 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -352,7 +352,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, boot_params->hdr.loadflags &= ~KASLR_FLAG; /* Save RSDP address for later use. */ - boot_params->acpi_rsdp_addr = get_rsdp_addr(); + /* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */ sanitize_boot_params(boot_params); diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S index 3b6e70d085da..8457cdd47f75 100644 --- a/arch/x86/crypto/poly1305-avx2-x86_64.S +++ b/arch/x86/crypto/poly1305-avx2-x86_64.S @@ -323,6 +323,12 @@ ENTRY(poly1305_4block_avx2) vpaddq t2,t1,t1 vmovq t1x,d4 + # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> + # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small + # amount. Careful: we must not assume the carry bits 'd0 >> 26', + # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit + # integers. It's true in a single-block implementation, but not here. + # d1 += d0 >> 26 mov d0,%rax shr $26,%rax @@ -361,16 +367,16 @@ ENTRY(poly1305_4block_avx2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S index e6add74d78a5..6f0be7a86964 100644 --- a/arch/x86/crypto/poly1305-sse2-x86_64.S +++ b/arch/x86/crypto/poly1305-sse2-x86_64.S @@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx @@ -524,6 +524,12 @@ ENTRY(poly1305_2block_sse2) paddq t2,t1 movq t1,d4 + # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> + # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small + # amount. Careful: we must not assume the carry bits 'd0 >> 26', + # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit + # integers. It's true in a single-block implementation, but not here. + # d1 += d0 >> 26 mov d0,%rax shr $26,%rax @@ -562,16 +568,16 @@ ENTRY(poly1305_2block_sse2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 0ecfac84ba91..d45f3fbd232e 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -117,22 +117,39 @@ static __initconst const u64 amd_hw_cache_event_ids }; /* - * AMD Performance Monitor K7 and later. + * AMD Performance Monitor K7 and later, up to and including Family 16h: */ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, - [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, + [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ +}; + +/* + * AMD Performance Monitor Family 17h and later: + */ +static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, }; static u64 amd_pmu_event_map(int hw_event) { + if (boot_cpu_data.x86 >= 0x17) + return amd_f17h_perfmon_event_map[hw_event]; + return amd_perfmon_event_map[hw_event]; } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f61dcbef20ff..f9451566cd9b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3131,7 +3131,7 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) flags &= ~PERF_SAMPLE_TIME; if (!event->attr.exclude_kernel) flags &= ~PERF_SAMPLE_REGS_USER; - if (event->attr.sample_regs_user & ~PEBS_REGS) + if (event->attr.sample_regs_user & ~PEBS_GP_REGS) flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); return flags; } diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 94a4b7fc75d0..d41de9af7a39 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -76,15 +76,15 @@ * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 - * Available model: HSW ULT,CNL + * Available model: HSW ULT,KBL,CNL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 - * Available model: HSW ULT,CNL + * Available model: HSW ULT,KBL,CNL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 - * Available model: HSW ULT,GLM,CNL + * Available model: HSW ULT,KBL,GLM,CNL * Scope: Package (physical package) * */ @@ -566,8 +566,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, hswult_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, hswult_cstates), X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates), diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index a75955741c50..1e98a42b560a 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -96,25 +96,25 @@ struct amd_nb { PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \ PERF_SAMPLE_PERIOD) -#define PEBS_REGS \ - (PERF_REG_X86_AX | \ - PERF_REG_X86_BX | \ - PERF_REG_X86_CX | \ - PERF_REG_X86_DX | \ - PERF_REG_X86_DI | \ - PERF_REG_X86_SI | \ - PERF_REG_X86_SP | \ - PERF_REG_X86_BP | \ - PERF_REG_X86_IP | \ - PERF_REG_X86_FLAGS | \ - PERF_REG_X86_R8 | \ - PERF_REG_X86_R9 | \ - PERF_REG_X86_R10 | \ - PERF_REG_X86_R11 | \ - PERF_REG_X86_R12 | \ - PERF_REG_X86_R13 | \ - PERF_REG_X86_R14 | \ - PERF_REG_X86_R15) +#define PEBS_GP_REGS \ + ((1ULL << PERF_REG_X86_AX) | \ + (1ULL << PERF_REG_X86_BX) | \ + (1ULL << PERF_REG_X86_CX) | \ + (1ULL << PERF_REG_X86_DX) | \ + (1ULL << PERF_REG_X86_DI) | \ + (1ULL << PERF_REG_X86_SI) | \ + (1ULL << PERF_REG_X86_SP) | \ + (1ULL << PERF_REG_X86_BP) | \ + (1ULL << PERF_REG_X86_IP) | \ + (1ULL << PERF_REG_X86_FLAGS) | \ + (1ULL << PERF_REG_X86_R8) | \ + (1ULL << PERF_REG_X86_R9) | \ + (1ULL << PERF_REG_X86_R10) | \ + (1ULL << PERF_REG_X86_R11) | \ + (1ULL << PERF_REG_X86_R12) | \ + (1ULL << PERF_REG_X86_R13) | \ + (1ULL << PERF_REG_X86_R14) | \ + (1ULL << PERF_REG_X86_R15)) /* * Per register state. diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 93c4bf598fb0..feab24cac610 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -226,7 +226,9 @@ struct x86_emulate_ops { unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags); - int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, u64 smbase); + int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, + const char *smstate); + void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt); }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 159b5988292f..a9d03af34030 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -126,7 +126,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) } #define KVM_PERMILLE_MMU_PAGES 20 -#define KVM_MIN_ALLOC_MMU_PAGES 64 +#define KVM_MIN_ALLOC_MMU_PAGES 64UL #define KVM_MMU_HASH_SHIFT 12 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) #define KVM_MIN_FREE_MMU_PAGES 5 @@ -844,9 +844,9 @@ enum kvm_irqchip_mode { }; struct kvm_arch { - unsigned int n_used_mmu_pages; - unsigned int n_requested_mmu_pages; - unsigned int n_max_mmu_pages; + unsigned long n_used_mmu_pages; + unsigned long n_requested_mmu_pages; + unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* @@ -1182,7 +1182,7 @@ struct kvm_x86_ops { int (*smi_allowed)(struct kvm_vcpu *vcpu); int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); - int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase); + int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); int (*enable_smi_window)(struct kvm_vcpu *vcpu); int (*mem_enc_op)(struct kvm *kvm, void __user *argp); @@ -1256,8 +1256,8 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); -unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); +unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); bool pdptrs_changed(struct kvm_vcpu *vcpu); @@ -1592,4 +1592,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define put_smstate(type, buf, offset, val) \ *(type *)((buf) + (offset) - 0x7e00) = val +#define GET_SMSTATE(type, buf, offset) \ + (*(type *)((buf) + (offset) - 0x7e00)) + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index f0b0c90dd398..d213ec5c3766 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -146,6 +146,7 @@ #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 +#define VMX_ABORT_VMCS_CORRUPTED 3 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index cfd24f9f7614..1796d2bdcaaa 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -28,7 +28,7 @@ obj-y += cpuid-deps.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o intel_epb.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2da82eff0eb4..b91b3bfa5cfb 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -275,7 +275,7 @@ static const struct { const char *option; enum spectre_v2_user_cmd cmd; bool secure; -} v2_user_options[] __initdata = { +} v2_user_options[] __initconst = { { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, @@ -419,7 +419,7 @@ static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; -} mitigation_options[] __initdata = { +} mitigation_options[] __initconst = { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, @@ -658,7 +658,7 @@ static const char * const ssb_strings[] = { static const struct { const char *option; enum ssb_mitigation_cmd cmd; -} ssb_mitigation_options[] __initdata = { +} ssb_mitigation_options[] __initconst = { { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cb28e98a0659..5e37dfa4d9df 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1864,23 +1864,6 @@ void cpu_init(void) } #endif -static void bsp_resume(void) -{ - if (this_cpu->c_bsp_resume) - this_cpu->c_bsp_resume(&boot_cpu_data); -} - -static struct syscore_ops cpu_syscore_ops = { - .resume = bsp_resume, -}; - -static int __init init_cpu_syscore(void) -{ - register_syscore_ops(&cpu_syscore_ops); - return 0; -} -core_initcall(init_cpu_syscore); - /* * The microcode loader calls this upon late microcode load to recheck features, * only when microcode has been updated. Caller holds microcode_mutex and CPU diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 5eb946b9a9f3..c0e2407abdd6 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -14,7 +14,6 @@ struct cpu_dev { void (*c_init)(struct cpuinfo_x86 *); void (*c_identify)(struct cpuinfo_x86 *); void (*c_detect_tlb)(struct cpuinfo_x86 *); - void (*c_bsp_resume)(struct cpuinfo_x86 *); int c_x86_vendor; #ifdef CONFIG_X86_32 /* Optional vendor specific routine to obtain the cache size. */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fc3c07fe7df5..f17c1a714779 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -596,36 +596,6 @@ detect_keyid_bits: c->x86_phys_bits -= keyid_bits; } -static void init_intel_energy_perf(struct cpuinfo_x86 *c) -{ - u64 epb; - - /* - * Initialize MSR_IA32_ENERGY_PERF_BIAS if not already initialized. - * (x86_energy_perf_policy(8) is available to change it at run-time.) - */ - if (!cpu_has(c, X86_FEATURE_EPB)) - return; - - rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); - if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE) - return; - - pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); - pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); - epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; - wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); -} - -static void intel_bsp_resume(struct cpuinfo_x86 *c) -{ - /* - * MSR_IA32_ENERGY_PERF_BIAS is lost across suspend/resume, - * so reinitialize it properly like during bootup: - */ - init_intel_energy_perf(c); -} - static void init_cpuid_fault(struct cpuinfo_x86 *c) { u64 msr; @@ -763,8 +733,6 @@ static void init_intel(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_TME)) detect_tme(c); - init_intel_energy_perf(c); - init_intel_misc_features(c); } @@ -1023,9 +991,7 @@ static const struct cpu_dev intel_cpu_dev = { .c_detect_tlb = intel_detect_tlb, .c_early_init = early_init_intel, .c_init = init_intel, - .c_bsp_resume = intel_bsp_resume, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); - diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c new file mode 100644 index 000000000000..f4dd73396f28 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Performance and Energy Bias Hint support. + * + * Copyright (C) 2019 Intel Corporation + * + * Author: + * Rafael J. Wysocki <rafael.j.wysocki@intel.com> + */ + +#include <linux/cpuhotplug.h> +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/syscore_ops.h> +#include <linux/pm.h> + +#include <asm/cpufeature.h> +#include <asm/msr.h> + +/** + * DOC: overview + * + * The Performance and Energy Bias Hint (EPB) allows software to specify its + * preference with respect to the power-performance tradeoffs present in the + * processor. Generally, the EPB is expected to be set by user space (directly + * via sysfs or with the help of the x86_energy_perf_policy tool), but there are + * two reasons for the kernel to update it. + * + * First, there are systems where the platform firmware resets the EPB during + * system-wide transitions from sleep states back into the working state + * effectively causing the previous EPB updates by user space to be lost. + * Thus the kernel needs to save the current EPB values for all CPUs during + * system-wide transitions to sleep states and restore them on the way back to + * the working state. That can be achieved by saving EPB for secondary CPUs + * when they are taken offline during transitions into system sleep states and + * for the boot CPU in a syscore suspend operation, so that it can be restored + * for the boot CPU in a syscore resume operation and for the other CPUs when + * they are brought back online. However, CPUs that are already offline when + * a system-wide PM transition is started are not taken offline again, but their + * EPB values may still be reset by the platform firmware during the transition, + * so in fact it is necessary to save the EPB of any CPU taken offline and to + * restore it when the given CPU goes back online at all times. + * + * Second, on many systems the initial EPB value coming from the platform + * firmware is 0 ('performance') and at least on some of them that is because + * the platform firmware does not initialize EPB at all with the assumption that + * the OS will do that anyway. That sometimes is problematic, as it may cause + * the system battery to drain too fast, for example, so it is better to adjust + * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the + * kernel changes it to 6 ('normal'). + */ + +static DEFINE_PER_CPU(u8, saved_epb); + +#define EPB_MASK 0x0fULL +#define EPB_SAVED 0x10ULL +#define MAX_EPB EPB_MASK + +static int intel_epb_save(void) +{ + u64 epb; + + rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); + /* + * Ensure that saved_epb will always be nonzero after this write even if + * the EPB value read from the MSR is 0. + */ + this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED); + + return 0; +} + +static void intel_epb_restore(void) +{ + u64 val = this_cpu_read(saved_epb); + u64 epb; + + rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); + if (val) { + val &= EPB_MASK; + } else { + /* + * Because intel_epb_save() has not run for the current CPU yet, + * it is going online for the first time, so if its EPB value is + * 0 ('performance') at this point, assume that it has not been + * initialized by the platform firmware and set it to 6 + * ('normal'). + */ + val = epb & EPB_MASK; + if (val == ENERGY_PERF_BIAS_PERFORMANCE) { + val = ENERGY_PERF_BIAS_NORMAL; + pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); + } + } + wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val); +} + +static struct syscore_ops intel_epb_syscore_ops = { + .suspend = intel_epb_save, + .resume = intel_epb_restore, +}; + +static const char * const energy_perf_strings[] = { + "performance", + "balance-performance", + "normal", + "balance-power", + "power" +}; +static const u8 energ_perf_values[] = { + ENERGY_PERF_BIAS_PERFORMANCE, + ENERGY_PERF_BIAS_BALANCE_PERFORMANCE, + ENERGY_PERF_BIAS_NORMAL, + ENERGY_PERF_BIAS_BALANCE_POWERSAVE, + ENERGY_PERF_BIAS_POWERSAVE +}; + +static ssize_t energy_perf_bias_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned int cpu = dev->id; + u64 epb; + int ret; + + ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); + if (ret < 0) + return ret; + + return sprintf(buf, "%llu\n", epb); +} + +static ssize_t energy_perf_bias_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int cpu = dev->id; + u64 epb, val; + int ret; + + ret = __sysfs_match_string(energy_perf_strings, + ARRAY_SIZE(energy_perf_strings), buf); + if (ret >= 0) + val = energ_perf_values[ret]; + else if (kstrtou64(buf, 0, &val) || val > MAX_EPB) + return -EINVAL; + + ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb); + if (ret < 0) + return ret; + + ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, + (epb & ~EPB_MASK) | val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR_RW(energy_perf_bias); + +static struct attribute *intel_epb_attrs[] = { + &dev_attr_energy_perf_bias.attr, + NULL +}; + +static const struct attribute_group intel_epb_attr_group = { + .name = power_group_name, + .attrs = intel_epb_attrs +}; + +static int intel_epb_online(unsigned int cpu) +{ + struct device *cpu_dev = get_cpu_device(cpu); + + intel_epb_restore(); + if (!cpuhp_tasks_frozen) + sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group); + + return 0; +} + +static int intel_epb_offline(unsigned int cpu) +{ + struct device *cpu_dev = get_cpu_device(cpu); + + if (!cpuhp_tasks_frozen) + sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group); + + intel_epb_save(); + return 0; +} + +static __init int intel_epb_init(void) +{ + int ret; + + if (!boot_cpu_has(X86_FEATURE_EPB)) + return -ENODEV; + + ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE, + "x86/intel/epb:online", intel_epb_online, + intel_epb_offline); + if (ret < 0) + goto err_out_online; + + register_syscore_ops(&intel_epb_syscore_ops); + return 0; + +err_out_online: + cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE); + return ret; +} +subsys_initcall(intel_epb_init); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 54b9eef3eea9..85212a32b54d 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2610,9 +2610,10 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) rdt_last_cmd_puts("Failed to initialize allocations\n"); return ret; } - rdtgrp->mode = RDT_MODE_SHAREABLE; } + rdtgrp->mode = RDT_MODE_SHAREABLE; + return 0; } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index a034cb808e7e..fed46ddb1eef 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -569,6 +569,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) unsigned long *sara = stack_addr(regs); ri->ret_addr = (kprobe_opcode_t *) *sara; + ri->fp = sara; /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; @@ -748,26 +749,48 @@ asm( NOKPROBE_SYMBOL(kretprobe_trampoline); STACK_FRAME_NON_STANDARD(kretprobe_trampoline); +static struct kprobe kretprobe_kprobe = { + .addr = (void *)kretprobe_trampoline, +}; + /* * Called from kretprobe_trampoline */ static __used void *trampoline_handler(struct pt_regs *regs) { + struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; kprobe_opcode_t *correct_ret_addr = NULL; + void *frame_pointer; + bool skipped = false; + + preempt_disable(); + + /* + * Set a dummy kprobe for avoiding kretprobe recursion. + * Since kretprobe never run in kprobe handler, kprobe must not + * be running at this point. + */ + kcb = get_kprobe_ctlblk(); + __this_cpu_write(current_kprobe, &kretprobe_kprobe); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ #ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; + /* On x86-64, we use pt_regs->sp for return address holder. */ + frame_pointer = ®s->sp; #else regs->cs = __KERNEL_CS | get_kernel_rpl(); regs->gs = 0; + /* On x86-32, we use pt_regs->flags for return address holder. */ + frame_pointer = ®s->flags; #endif regs->ip = trampoline_address; regs->orig_ax = ~0UL; @@ -789,8 +812,25 @@ static __used void *trampoline_handler(struct pt_regs *regs) if (ri->task != current) /* another task is sharing our hash bucket */ continue; + /* + * Return probes must be pushed on this hash list correct + * order (same as return order) so that it can be poped + * correctly. However, if we find it is pushed it incorrect + * order, this means we find a function which should not be + * probed, because the wrong order entry is pushed on the + * path of processing other kretprobe itself. + */ + if (ri->fp != frame_pointer) { + if (!skipped) + pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n"); + skipped = true; + continue; + } orig_ret_address = (unsigned long)ri->ret_addr; + if (skipped) + pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n", + ri->rp->kp.addr); if (orig_ret_address != trampoline_address) /* @@ -808,14 +848,15 @@ static __used void *trampoline_handler(struct pt_regs *regs) if (ri->task != current) /* another task is sharing our hash bucket */ continue; + if (ri->fp != frame_pointer) + continue; orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __this_cpu_write(current_kprobe, &ri->rp->kp); - get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, NULL); + __this_cpu_write(current_kprobe, &kretprobe_kprobe); } recycle_rp_inst(ri, &empty_rp); @@ -831,6 +872,9 @@ static __used void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); + __this_cpu_write(current_kprobe, NULL); + preempt_enable(); + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 58ac7be52c7a..957eae13b370 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -426,6 +426,8 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, u64 msr = x86_spec_ctrl_base; bool updmsr = false; + lockdep_assert_irqs_disabled(); + /* * If TIF_SSBD is different, select the proper mitigation * method. Note that if SSBD mitigation is disabled or permanentely @@ -477,10 +479,12 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) void speculation_ctrl_update(unsigned long tif) { + unsigned long flags; + /* Forced update. Make sure all relevant TIF flags are different */ - preempt_disable(); + local_irq_save(flags); __speculation_ctrl_update(~tif, tif); - preempt_enable(); + local_irq_restore(flags); } /* Called from seccomp/prctl update */ diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 725624b6c0c0..8fd3cedd9acc 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -81,6 +81,19 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) return 0; } +/* + * Some machines don't handle the default ACPI reboot method and + * require the EFI reboot method: + */ +static int __init set_efi_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) { + reboot_type = BOOT_EFI; + pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident); + } + return 0; +} + void __noreturn machine_real_restart(unsigned int type) { local_irq_disable(); @@ -166,6 +179,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), }, }, + { /* Handle reboot issue on Acer TravelMate X514-51T */ + .callback = set_efi_reboot, + .ident = "Acer TravelMate X514-51T", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"), + }, + }, /* Apple */ { /* Handle problems with rebooting on Apple MacBook5 */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3fae23834069..cc6df5c6d7b3 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -185,8 +185,7 @@ static void __init cyc2ns_init_boot_cpu(void) /* * Secondary CPUs do not run through tsc_init(), so set up * all the scale factors for all CPUs, assuming the same - * speed as the bootup CPU. (cpufreq notifiers will fix this - * up if their speed diverges) + * speed as the bootup CPU. */ static void __init cyc2ns_init_secondary_cpus(void) { @@ -937,12 +936,12 @@ void tsc_restore_sched_clock_state(void) } #ifdef CONFIG_CPU_FREQ -/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency +/* + * Frequency scaling support. Adjust the TSC based timer when the CPU frequency * changes. * - * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's - * not that important because current Opteron setups do not support - * scaling on SMP anyroads. + * NOTE: On SMP the situation is not fixable in general, so simply mark the TSC + * as unstable and give up in those cases. * * Should fix up last_tsc too. Currently gettimeofday in the * first tick after the change will be slightly wrong. @@ -956,22 +955,22 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { struct cpufreq_freqs *freq = data; - unsigned long *lpj; - lpj = &boot_cpu_data.loops_per_jiffy; -#ifdef CONFIG_SMP - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - lpj = &cpu_data(freq->cpu).loops_per_jiffy; -#endif + if (num_online_cpus() > 1) { + mark_tsc_unstable("cpufreq changes on SMP"); + return 0; + } if (!ref_freq) { ref_freq = freq->old; - loops_per_jiffy_ref = *lpj; + loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy; tsc_khz_ref = tsc_khz; } + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { - *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { + boot_cpu_data.loops_per_jiffy = + cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); if (!(freq->flags & CPUFREQ_CONST_LOOPS)) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index bad8c51fee6e..a5127b2c195f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -362,7 +362,7 @@ SECTIONS .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; *(.bss..page_aligned) - *(.bss) + *(BSS_MAIN) BSS_DECRYPTED . = ALIGN(PAGE_SIZE); __bss_stop = .; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c338984c850d..d0d5dd44b4f4 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2331,24 +2331,18 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt) static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) { +#ifdef CONFIG_X86_64 u32 eax, ebx, ecx, edx; eax = 0x80000001; ecx = 0; ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); return edx & bit(X86_FEATURE_LM); +#else + return false; +#endif } -#define GET_SMSTATE(type, smbase, offset) \ - ({ \ - type __val; \ - int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \ - sizeof(__val)); \ - if (r != X86EMUL_CONTINUE) \ - return X86EMUL_UNHANDLEABLE; \ - __val; \ - }) - static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags) { desc->g = (flags >> 23) & 1; @@ -2361,27 +2355,30 @@ static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags) desc->type = (flags >> 8) & 15; } -static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) +static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate, + int n) { struct desc_struct desc; int offset; u16 selector; - selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4); + selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4); if (n < 3) offset = 0x7f84 + n * 12; else offset = 0x7f2c + (n - 3) * 12; - set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8)); - set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset)); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset)); ctxt->ops->set_segment(ctxt, selector, &desc, 0, n); return X86EMUL_CONTINUE; } -static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) +#ifdef CONFIG_X86_64 +static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate, + int n) { struct desc_struct desc; int offset; @@ -2390,15 +2387,16 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) offset = 0x7e00 + n * 16; - selector = GET_SMSTATE(u16, smbase, offset); - rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4)); - set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8)); - base3 = GET_SMSTATE(u32, smbase, offset + 12); + selector = GET_SMSTATE(u16, smstate, offset); + rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); + base3 = GET_SMSTATE(u32, smstate, offset + 12); ctxt->ops->set_segment(ctxt, selector, &desc, base3, n); return X86EMUL_CONTINUE; } +#endif static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, u64 cr0, u64 cr3, u64 cr4) @@ -2445,7 +2443,8 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; } -static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) +static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, + const char *smstate) { struct desc_struct desc; struct desc_ptr dt; @@ -2453,53 +2452,55 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) u32 val, cr0, cr3, cr4; int i; - cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); - cr3 = GET_SMSTATE(u32, smbase, 0x7ff8); - ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; - ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); + cr0 = GET_SMSTATE(u32, smstate, 0x7ffc); + cr3 = GET_SMSTATE(u32, smstate, 0x7ff8); + ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED; + ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0); for (i = 0; i < 8; i++) - *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4); + *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); - val = GET_SMSTATE(u32, smbase, 0x7fcc); + val = GET_SMSTATE(u32, smstate, 0x7fcc); ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); - val = GET_SMSTATE(u32, smbase, 0x7fc8); + val = GET_SMSTATE(u32, smstate, 0x7fc8); ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); - selector = GET_SMSTATE(u32, smbase, 0x7fc4); - set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64)); - set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c)); + selector = GET_SMSTATE(u32, smstate, 0x7fc4); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64)); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60)); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c)); ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR); - selector = GET_SMSTATE(u32, smbase, 0x7fc0); - set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80)); - set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78)); + selector = GET_SMSTATE(u32, smstate, 0x7fc0); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80)); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c)); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78)); ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR); - dt.address = GET_SMSTATE(u32, smbase, 0x7f74); - dt.size = GET_SMSTATE(u32, smbase, 0x7f70); + dt.address = GET_SMSTATE(u32, smstate, 0x7f74); + dt.size = GET_SMSTATE(u32, smstate, 0x7f70); ctxt->ops->set_gdt(ctxt, &dt); - dt.address = GET_SMSTATE(u32, smbase, 0x7f58); - dt.size = GET_SMSTATE(u32, smbase, 0x7f54); + dt.address = GET_SMSTATE(u32, smstate, 0x7f58); + dt.size = GET_SMSTATE(u32, smstate, 0x7f54); ctxt->ops->set_idt(ctxt, &dt); for (i = 0; i < 6; i++) { - int r = rsm_load_seg_32(ctxt, smbase, i); + int r = rsm_load_seg_32(ctxt, smstate, i); if (r != X86EMUL_CONTINUE) return r; } - cr4 = GET_SMSTATE(u32, smbase, 0x7f14); + cr4 = GET_SMSTATE(u32, smstate, 0x7f14); - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); + ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8)); return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); } -static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) +#ifdef CONFIG_X86_64 +static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, + const char *smstate) { struct desc_struct desc; struct desc_ptr dt; @@ -2509,43 +2510,43 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) int i, r; for (i = 0; i < 16; i++) - *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8); + *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); - ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78); - ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED; + ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); + ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED; - val = GET_SMSTATE(u32, smbase, 0x7f68); + val = GET_SMSTATE(u32, smstate, 0x7f68); ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); - val = GET_SMSTATE(u32, smbase, 0x7f60); + val = GET_SMSTATE(u32, smstate, 0x7f60); ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); - cr0 = GET_SMSTATE(u64, smbase, 0x7f58); - cr3 = GET_SMSTATE(u64, smbase, 0x7f50); - cr4 = GET_SMSTATE(u64, smbase, 0x7f48); - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); - val = GET_SMSTATE(u64, smbase, 0x7ed0); + cr0 = GET_SMSTATE(u64, smstate, 0x7f58); + cr3 = GET_SMSTATE(u64, smstate, 0x7f50); + cr4 = GET_SMSTATE(u64, smstate, 0x7f48); + ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00)); + val = GET_SMSTATE(u64, smstate, 0x7ed0); ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA); - selector = GET_SMSTATE(u32, smbase, 0x7e90); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94)); - set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98)); - base3 = GET_SMSTATE(u32, smbase, 0x7e9c); + selector = GET_SMSTATE(u32, smstate, 0x7e90); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94)); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98)); + base3 = GET_SMSTATE(u32, smstate, 0x7e9c); ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR); - dt.size = GET_SMSTATE(u32, smbase, 0x7e84); - dt.address = GET_SMSTATE(u64, smbase, 0x7e88); + dt.size = GET_SMSTATE(u32, smstate, 0x7e84); + dt.address = GET_SMSTATE(u64, smstate, 0x7e88); ctxt->ops->set_idt(ctxt, &dt); - selector = GET_SMSTATE(u32, smbase, 0x7e70); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74)); - set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78)); - base3 = GET_SMSTATE(u32, smbase, 0x7e7c); + selector = GET_SMSTATE(u32, smstate, 0x7e70); + rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8); + set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74)); + set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78)); + base3 = GET_SMSTATE(u32, smstate, 0x7e7c); ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR); - dt.size = GET_SMSTATE(u32, smbase, 0x7e64); - dt.address = GET_SMSTATE(u64, smbase, 0x7e68); + dt.size = GET_SMSTATE(u32, smstate, 0x7e64); + dt.address = GET_SMSTATE(u64, smstate, 0x7e68); ctxt->ops->set_gdt(ctxt, &dt); r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); @@ -2553,37 +2554,49 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) return r; for (i = 0; i < 6; i++) { - r = rsm_load_seg_64(ctxt, smbase, i); + r = rsm_load_seg_64(ctxt, smstate, i); if (r != X86EMUL_CONTINUE) return r; } return X86EMUL_CONTINUE; } +#endif static int em_rsm(struct x86_emulate_ctxt *ctxt) { unsigned long cr0, cr4, efer; + char buf[512]; u64 smbase; int ret; if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0) return emulate_ud(ctxt); + smbase = ctxt->ops->get_smbase(ctxt); + + ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf)); + if (ret != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; + + if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) + ctxt->ops->set_nmi_mask(ctxt, false); + + ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) & + ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK)); + /* * Get back to real mode, to prepare a safe state in which to load * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU * supports long mode. */ - cr4 = ctxt->ops->get_cr(ctxt, 4); if (emulator_has_longmode(ctxt)) { struct desc_struct cs_desc; /* Zero CR4.PCIDE before CR0.PG. */ - if (cr4 & X86_CR4_PCIDE) { + cr4 = ctxt->ops->get_cr(ctxt, 4); + if (cr4 & X86_CR4_PCIDE) ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); - cr4 &= ~X86_CR4_PCIDE; - } /* A 32-bit code segment is required to clear EFER.LMA. */ memset(&cs_desc, 0, sizeof(cs_desc)); @@ -2597,39 +2610,39 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) if (cr0 & X86_CR0_PE) ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); - /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */ - if (cr4 & X86_CR4_PAE) - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); - - /* And finally go back to 32-bit mode. */ - efer = 0; - ctxt->ops->set_msr(ctxt, MSR_EFER, efer); + if (emulator_has_longmode(ctxt)) { + /* Clear CR4.PAE before clearing EFER.LME. */ + cr4 = ctxt->ops->get_cr(ctxt, 4); + if (cr4 & X86_CR4_PAE) + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); - smbase = ctxt->ops->get_smbase(ctxt); + /* And finally go back to 32-bit mode. */ + efer = 0; + ctxt->ops->set_msr(ctxt, MSR_EFER, efer); + } /* * Give pre_leave_smm() a chance to make ISA-specific changes to the * vCPU state (e.g. enter guest mode) before loading state from the SMM * state-save area. */ - if (ctxt->ops->pre_leave_smm(ctxt, smbase)) + if (ctxt->ops->pre_leave_smm(ctxt, buf)) return X86EMUL_UNHANDLEABLE; +#ifdef CONFIG_X86_64 if (emulator_has_longmode(ctxt)) - ret = rsm_load_state_64(ctxt, smbase + 0x8000); + ret = rsm_load_state_64(ctxt, buf); else - ret = rsm_load_state_32(ctxt, smbase + 0x8000); +#endif + ret = rsm_load_state_32(ctxt, buf); if (ret != X86EMUL_CONTINUE) { /* FIXME: should triple fault */ return X86EMUL_UNHANDLEABLE; } - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) - ctxt->ops->set_nmi_mask(ctxt, false); + ctxt->ops->post_leave_smm(ctxt); - ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) & - ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK)); return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 991fdf7fc17f..9bf70cf84564 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -138,6 +138,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, if (offset <= max_apic_id) { u8 cluster_size = min(max_apic_id - offset + 1, 16U); + offset = array_index_nospec(offset, map->max_apic_id + 1); *cluster = &map->phys_map[offset]; *mask = dest_id & (0xffff >> (16 - cluster_size)); } else { @@ -901,7 +902,8 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, if (irq->dest_id > map->max_apic_id) { *bitmap = 0; } else { - *dst = &map->phys_map[irq->dest_id]; + u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1); + *dst = &map->phys_map[dest_id]; *bitmap = 1; } return true; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eee455a8a612..e10962dfc203 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2007,7 +2007,7 @@ static int is_empty_shadow_page(u64 *spt) * aggregate version in order to make the slab shrinker * faster */ -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr) +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr) { kvm->arch.n_used_mmu_pages += nr; percpu_counter_add(&kvm_total_used_mmu_pages, nr); @@ -2238,7 +2238,7 @@ static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm, struct list_head *invalid_list, bool remote_flush) { - if (!remote_flush && !list_empty(invalid_list)) + if (!remote_flush && list_empty(invalid_list)) return false; if (!list_empty(invalid_list)) @@ -2763,7 +2763,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, * Changing the number of mmu pages allocated to the vm * Note: if goal_nr_mmu_pages is too small, you will get dead lock */ -void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) +void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages) { LIST_HEAD(invalid_list); @@ -6031,10 +6031,10 @@ out: /* * Calculate mmu pages needed for kvm. */ -unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) +unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) { - unsigned int nr_mmu_pages; - unsigned int nr_pages = 0; + unsigned long nr_mmu_pages; + unsigned long nr_pages = 0; struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int i; @@ -6047,8 +6047,7 @@ unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) } nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; - nr_mmu_pages = max(nr_mmu_pages, - (unsigned int) KVM_MIN_ALLOC_MMU_PAGES); + nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES); return nr_mmu_pages; } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index bbdc60f2fae8..54c2a377795b 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -64,7 +64,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, u64 fault_address, char *insn, int insn_len); -static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) +static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm) { if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) return kvm->arch.n_max_mmu_pages - diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 58ead7db71a3..e39741997893 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -281,9 +281,13 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) { bool fast_mode = idx & (1u << 31); + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; u64 ctr_val; + if (!pmu->version) + return 1; + if (is_vmware_backdoor_pmc(idx)) return kvm_pmu_rdpmc_vmware(vcpu, idx, data); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e0a791c3d4fc..406b558abfef 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -262,6 +262,7 @@ struct amd_svm_iommu_ir { }; #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) #define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL) @@ -2692,6 +2693,7 @@ static int npf_interception(struct vcpu_svm *svm) static int db_interception(struct vcpu_svm *svm) { struct kvm_run *kvm_run = svm->vcpu.run; + struct kvm_vcpu *vcpu = &svm->vcpu; if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && @@ -2702,6 +2704,8 @@ static int db_interception(struct vcpu_svm *svm) if (svm->nmi_singlestep) { disable_nmi_singlestep(svm); + /* Make sure we check for pending NMIs upon entry */ + kvm_make_request(KVM_REQ_EVENT, vcpu); } if (svm->vcpu.guest_debug & @@ -4517,14 +4521,25 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) kvm_lapic_reg_write(apic, APIC_ICR, icrl); break; case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: { + int i; + struct kvm_vcpu *vcpu; + struct kvm *kvm = svm->vcpu.kvm; struct kvm_lapic *apic = svm->vcpu.arch.apic; /* - * Update ICR high and low, then emulate sending IPI, - * which is handled when writing APIC_ICR. + * At this point, we expect that the AVIC HW has already + * set the appropriate IRR bits on the valid target + * vcpus. So, we just need to kick the appropriate vcpu. */ - kvm_lapic_reg_write(apic, APIC_ICR2, icrh); - kvm_lapic_reg_write(apic, APIC_ICR, icrl); + kvm_for_each_vcpu(i, vcpu, kvm) { + bool m = kvm_apic_match_dest(vcpu, apic, + icrl & KVM_APIC_SHORT_MASK, + GET_APIC_DEST_FIELD(icrh), + icrl & KVM_APIC_DEST_MASK); + + if (m && !avic_vcpu_is_running(vcpu)) + kvm_vcpu_wake_up(vcpu); + } break; } case AVIC_IPI_FAILURE_INVALID_TARGET: @@ -4596,7 +4611,7 @@ static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu) u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat); if (entry) - WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK); + clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry); } static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) @@ -5621,6 +5636,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) svm->vmcb->save.cr2 = vcpu->arch.cr2; clgi(); + kvm_load_guest_xcr0(vcpu); /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if @@ -5766,6 +5782,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(&svm->vcpu); + kvm_put_guest_xcr0(vcpu); stgi(); /* Any pending NMI will happen here */ @@ -6215,32 +6232,24 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) return 0; } -static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase) +static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *nested_vmcb; struct page *page; - struct { - u64 guest; - u64 vmcb; - } svm_state_save; - int ret; + u64 guest; + u64 vmcb; - ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save, - sizeof(svm_state_save)); - if (ret) - return ret; + guest = GET_SMSTATE(u64, smstate, 0x7ed8); + vmcb = GET_SMSTATE(u64, smstate, 0x7ee0); - if (svm_state_save.guest) { - vcpu->arch.hflags &= ~HF_SMM_MASK; - nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page); - if (nested_vmcb) - enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page); - else - ret = 1; - vcpu->arch.hflags |= HF_SMM_MASK; + if (guest) { + nested_vmcb = nested_svm_map(svm, vmcb, &page); + if (!nested_vmcb) + return 1; + enter_svm_guest_mode(svm, vmcb, nested_vmcb, page); } - return ret; + return 0; } static int enable_smi_window(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 6432d08c7de7..4d47a2631d1f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -438,13 +438,13 @@ TRACE_EVENT(kvm_apic_ipi, ); TRACE_EVENT(kvm_apic_accept_irq, - TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec), + TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec), TP_ARGS(apicid, dm, tm, vec), TP_STRUCT__entry( __field( __u32, apicid ) __field( __u16, dm ) - __field( __u8, tm ) + __field( __u16, tm ) __field( __u8, vec ) ), diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 7ec9bb1dd723..6401eb7ef19c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2873,20 +2873,27 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) /* * If translation failed, VM entry will fail because * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull. - * Failing the vm entry is _not_ what the processor - * does but it's basically the only possibility we - * have. We could still enter the guest if CR8 load - * exits are enabled, CR8 store exits are enabled, and - * virtualize APIC access is disabled; in this case - * the processor would never use the TPR shadow and we - * could simply clear the bit from the execution - * control. But such a configuration is useless, so - * let's keep the code simple. */ if (!is_error_page(page)) { vmx->nested.virtual_apic_page = page; hpa = page_to_phys(vmx->nested.virtual_apic_page); vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); + } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) && + nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) && + !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + /* + * The processor will never use the TPR shadow, simply + * clear the bit from the execution control. Such a + * configuration is useless, but it happens in tests. + * For any other configuration, failing the vm entry is + * _not_ what the processor does but it's basically the + * only possibility we have. + */ + vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_TPR_SHADOW); + } else { + printk("bad virtual-APIC page address\n"); + dump_vmcs(); } } @@ -3789,8 +3796,18 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW)); nested_ept_uninit_mmu_context(vcpu); - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + + /* + * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3 + * points to shadow pages! Fortunately we only get here after a WARN_ON + * if EPT is disabled, so a VMabort is perfectly fine. + */ + if (enable_ept) { + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + } else { + nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED); + } /* * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs @@ -5738,6 +5755,14 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) { int i; + /* + * Without EPT it is not possible to restore L1's CR3 and PDPTR on + * VMfail, because they are not available in vmcs01. Just always + * use hardware checks. + */ + if (!enable_ept) + nested_early_check = 1; + if (!cpu_has_vmx_shadow_vmcs()) enable_shadow_vmcs = 0; if (enable_shadow_vmcs) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ab432a930ae8..b4e7d645275a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5603,7 +5603,7 @@ static void vmx_dump_dtsel(char *name, uint32_t limit) vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); } -static void dump_vmcs(void) +void dump_vmcs(void) { u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); @@ -6410,6 +6410,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) vmx_set_interrupt_shadow(vcpu, 0); + kvm_load_guest_xcr0(vcpu); + if (static_cpu_has(X86_FEATURE_PKU) && kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && vcpu->arch.pkru != vmx->host_pkru) @@ -6506,6 +6508,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) __write_pkru(vmx->host_pkru); } + kvm_put_guest_xcr0(vcpu); + vmx->nested.nested_run_pending = 0; vmx->idt_vectoring_info = 0; @@ -6852,6 +6856,30 @@ static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) } } +static bool guest_cpuid_has_pmu(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *entry; + union cpuid10_eax eax; + + entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); + if (!entry) + return false; + + eax.full = entry->eax; + return (eax.split.version_id > 0); +} + +static void nested_vmx_procbased_ctls_update(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + bool pmu_enabled = guest_cpuid_has_pmu(vcpu); + + if (pmu_enabled) + vmx->nested.msrs.procbased_ctls_high |= CPU_BASED_RDPMC_EXITING; + else + vmx->nested.msrs.procbased_ctls_high &= ~CPU_BASED_RDPMC_EXITING; +} + static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6940,6 +6968,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); nested_vmx_entry_exit_ctls_update(vcpu); + nested_vmx_procbased_ctls_update(vcpu); } if (boot_cpu_has(X86_FEATURE_INTEL_PT) && @@ -7369,7 +7398,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) return 0; } -static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase) +static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) { struct vcpu_vmx *vmx = to_vmx(vcpu); int ret; @@ -7380,9 +7409,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase) } if (vmx->nested.smm.guest_mode) { - vcpu->arch.hflags &= ~HF_SMM_MASK; ret = nested_vmx_enter_non_root_mode(vcpu, false); - vcpu->arch.hflags |= HF_SMM_MASK; if (ret) return ret; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index a1e00d0a2482..f879529906b4 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -517,4 +517,6 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx) vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); } +void dump_vmcs(void); + #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 099b851dabaf..a0d1fc80ac5a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -800,7 +800,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) } EXPORT_SYMBOL_GPL(kvm_lmsw); -static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) +void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) { if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && !vcpu->guest_xcr0_loaded) { @@ -810,8 +810,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) vcpu->guest_xcr0_loaded = 1; } } +EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0); -static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) +void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) { if (vcpu->guest_xcr0_loaded) { if (vcpu->arch.xcr0 != host_xcr0) @@ -819,6 +820,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) vcpu->guest_xcr0_loaded = 0; } } +EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0); static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { @@ -3093,7 +3095,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NESTED_STATE: r = kvm_x86_ops->get_nested_state ? - kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0; + kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0; break; default: break; @@ -3528,7 +3530,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, memset(&events->reserved, 0, sizeof(events->reserved)); } -static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags); +static void kvm_smm_changed(struct kvm_vcpu *vcpu); static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) @@ -3588,12 +3590,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { - u32 hflags = vcpu->arch.hflags; - if (events->smi.smm) - hflags |= HF_SMM_MASK; - else - hflags &= ~HF_SMM_MASK; - kvm_set_hflags(vcpu, hflags); + if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { + if (events->smi.smm) + vcpu->arch.hflags |= HF_SMM_MASK; + else + vcpu->arch.hflags &= ~HF_SMM_MASK; + kvm_smm_changed(vcpu); + } vcpu->arch.smi_pending = events->smi.pending; @@ -4270,7 +4273,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm, } static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, - u32 kvm_nr_mmu_pages) + unsigned long kvm_nr_mmu_pages) { if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) return -EINVAL; @@ -4284,7 +4287,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, return 0; } -static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) +static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) { return kvm->arch.n_max_mmu_pages; } @@ -5958,12 +5961,18 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags) { - kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags); + emul_to_vcpu(ctxt)->arch.hflags = emul_flags; +} + +static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, + const char *smstate) +{ + return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate); } -static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase) +static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt) { - return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase); + kvm_smm_changed(emul_to_vcpu(ctxt)); } static const struct x86_emulate_ops emulate_ops = { @@ -6006,6 +6015,7 @@ static const struct x86_emulate_ops emulate_ops = { .get_hflags = emulator_get_hflags, .set_hflags = emulator_set_hflags, .pre_leave_smm = emulator_pre_leave_smm, + .post_leave_smm = emulator_post_leave_smm, }; static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) @@ -6247,16 +6257,6 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu) kvm_mmu_reset_context(vcpu); } -static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags) -{ - unsigned changed = vcpu->arch.hflags ^ emul_flags; - - vcpu->arch.hflags = emul_flags; - - if (changed & HF_SMM_MASK) - kvm_smm_changed(vcpu); -} - static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, unsigned long *db) { @@ -7441,9 +7441,9 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf) put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase); } +#ifdef CONFIG_X86_64 static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) { -#ifdef CONFIG_X86_64 struct desc_ptr dt; struct kvm_segment seg; unsigned long val; @@ -7493,10 +7493,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) for (i = 0; i < 6; i++) enter_smm_save_seg_64(vcpu, buf, i); -#else - WARN_ON_ONCE(1); -#endif } +#endif static void enter_smm(struct kvm_vcpu *vcpu) { @@ -7507,9 +7505,11 @@ static void enter_smm(struct kvm_vcpu *vcpu) trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); memset(buf, 0, 512); +#ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) enter_smm_save_state_64(vcpu, buf); else +#endif enter_smm_save_state_32(vcpu, buf); /* @@ -7567,8 +7567,10 @@ static void enter_smm(struct kvm_vcpu *vcpu) kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); +#ifdef CONFIG_X86_64 if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) kvm_x86_ops->set_efer(vcpu, 0); +#endif kvm_update_cpuid(vcpu); kvm_mmu_reset_context(vcpu); @@ -7865,8 +7867,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto cancel_injection; } - kvm_load_guest_xcr0(vcpu); - if (req_immediate_exit) { kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_x86_ops->request_immediate_exit(vcpu); @@ -7919,8 +7919,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - kvm_put_guest_xcr0(vcpu); - kvm_before_interrupt(vcpu); kvm_x86_ops->handle_external_intr(vcpu); kvm_after_interrupt(vcpu); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 28406aa1136d..aedc5d0d4989 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -347,4 +347,6 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) __this_cpu_write(current_vcpu, NULL); } +void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu); +void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index ee8f8ab46941..c0309ea9abee 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -259,7 +259,8 @@ static void note_wx(struct pg_state *st) #endif /* Account the WX pages */ st->wx_pages += npages; - WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %pS\n", + WARN_ONCE(__supported_pte_mask & _PAGE_NX, + "x86/mm: Found insecure W+X mapping at address %pS\n", (void *)st->start_address); } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f905a2371080..8dacdb96899e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -5,6 +5,7 @@ #include <linux/memblock.h> #include <linux/swapfile.h> #include <linux/swapops.h> +#include <linux/kmemleak.h> #include <asm/set_memory.h> #include <asm/e820/api.h> @@ -766,6 +767,11 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) if (debug_pagealloc_enabled()) { pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n", begin, end - 1); + /* + * Inform kmemleak about the hole in the memory since the + * corresponding pages will be unmapped. + */ + kmemleak_free_part((void *)begin, end - begin); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); } else { /* diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 0029604af8a4..dd73d5d74393 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -825,7 +825,7 @@ void __init __early_set_fixmap(enum fixed_addresses idx, pte = early_ioremap_pte(addr); /* Sanitize 'prot' against any unsupported bits: */ - pgprot_val(flags) &= __default_kernel_pte_mask; + pgprot_val(flags) &= __supported_pte_mask; if (pgprot_val(flags)) set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 3f452ffed7e9..d669c5e797e0 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -94,7 +94,7 @@ void __init kernel_randomize_memory(void) if (!kaslr_memory_enabled()) return; - kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT); + kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT); kaslr_regions[1].size_tb = VMALLOC_SIZE_TB; /* diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index bc4bc7b2f075..487b8474c01c 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -728,7 +728,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, { int cpu; - struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = { + struct flush_tlb_info info = { .mm = mm, .stride_shift = stride_shift, .freed_tables = freed_tables, diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 6af49929de85..30084eaf8422 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -394,3 +394,7 @@ 421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait 422 common futex_time64 sys_futex 423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register |