summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig41
-rw-r--r--arch/alpha/include/uapi/asm/socket.h4
-rw-r--r--arch/alpha/kernel/osf_sys.c53
-rw-r--r--arch/arm/configs/lpc32xx_defconfig2
-rw-r--r--arch/arm/crypto/aes-ce-glue.c6
-rw-r--r--arch/arm/crypto/crc32-ce-glue.c6
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c6
-rw-r--r--arch/arm/crypto/sha1-ce-glue.c5
-rw-r--r--arch/arm/crypto/sha2-ce-glue.c5
-rw-r--r--arch/arm/include/asm/assembler.h2
-rw-r--r--arch/arm/include/asm/kvm_arm.h10
-rw-r--r--arch/arm/include/asm/kvm_host.h28
-rw-r--r--arch/arm/include/asm/system_misc.h5
-rw-r--r--arch/arm/include/asm/xen/events.h2
-rw-r--r--arch/arm/include/uapi/asm/kvm.h8
-rw-r--r--arch/arm/kernel/entry-armv.S5
-rw-r--r--arch/arm/kernel/perf_event_v6.c2
-rw-r--r--arch/arm/kvm/guest.c51
-rw-r--r--arch/arm/kvm/handle_exit.c1
-rw-r--r--arch/arm/kvm/hyp/switch.c2
-rw-r--r--arch/arm/kvm/reset.c16
-rw-r--r--arch/arm/mm/proc-macros.S10
-rw-r--r--arch/arm/xen/p2m.c10
-rw-r--r--arch/arm64/Kconfig20
-rw-r--r--arch/arm64/Makefile7
-rw-r--r--arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts2
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S6
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c11
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S6
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c13
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h2
-rw-r--r--arch/arm64/include/asm/checksum.h2
-rw-r--r--arch/arm64/include/asm/cpucaps.h3
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/dma-mapping.h2
-rw-r--r--arch/arm64/include/asm/elf.h6
-rw-r--r--arch/arm64/include/asm/esr.h25
-rw-r--r--arch/arm64/include/asm/futex.h8
-rw-r--r--arch/arm64/include/asm/kvm_arm.h10
-rw-r--r--arch/arm64/include/asm/kvm_host.h6
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h1
-rw-r--r--arch/arm64/include/asm/module.h3
-rw-r--r--arch/arm64/include/asm/pgtable.h2
-rw-r--r--arch/arm64/include/asm/processor.h3
-rw-r--r--arch/arm64/include/asm/stacktrace.h1
-rw-r--r--arch/arm64/include/asm/sysreg.h23
-rw-r--r--arch/arm64/include/asm/system_misc.h4
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h3
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h55
-rw-r--r--arch/arm64/kernel/Makefile3
-rw-r--r--arch/arm64/kernel/acpi_parking_protocol.c6
-rw-r--r--arch/arm64/kernel/alternative.c8
-rw-r--r--arch/arm64/kernel/cpu_errata.c21
-rw-r--r--arch/arm64/kernel/cpufeature.c25
-rw-r--r--arch/arm64/kernel/cpuinfo.c2
-rw-r--r--arch/arm64/kernel/debug-monitors.c14
-rw-r--r--arch/arm64/kernel/ftrace-mod.S18
-rw-r--r--arch/arm64/kernel/ftrace.c103
-rw-r--r--arch/arm64/kernel/insn.c7
-rw-r--r--arch/arm64/kernel/kaslr.c2
-rw-r--r--arch/arm64/kernel/module.c20
-rw-r--r--arch/arm64/kernel/pci.c3
-rw-r--r--arch/arm64/kernel/perf_event.c2
-rw-r--r--arch/arm64/kernel/probes/kprobes.c4
-rw-r--r--arch/arm64/kernel/process.c9
-rw-r--r--arch/arm64/kernel/ptrace.c39
-rw-r--r--arch/arm64/kernel/setup.c3
-rw-r--r--arch/arm64/kernel/signal.c413
-rw-r--r--arch/arm64/kernel/stacktrace.c1
-rw-r--r--arch/arm64/kernel/traps.c18
-rw-r--r--arch/arm64/kernel/vdso.c10
-rw-r--r--arch/arm64/kvm/guest.c9
-rw-r--r--arch/arm64/kvm/handle_exit.c1
-rw-r--r--arch/arm64/kvm/hyp/switch.c15
-rw-r--r--arch/arm64/kvm/reset.c16
-rw-r--r--arch/arm64/kvm/sys_regs.c27
-rw-r--r--arch/arm64/kvm/vgic-sys-reg-v3.c45
-rw-r--r--arch/arm64/mm/dma-mapping.c9
-rw-r--r--arch/arm64/mm/fault.c215
-rw-r--r--arch/arm64/mm/hugetlbpage.c29
-rw-r--r--arch/arm64/mm/mmap.c19
-rw-r--r--arch/arm64/mm/mmu.c1
-rw-r--r--arch/arm64/net/bpf_jit_comp.c31
-rw-r--r--arch/blackfin/configs/BF609-EZKIT_defconfig2
-rw-r--r--arch/blackfin/mach-bf527/boards/tll6527m.c8
-rw-r--r--arch/blackfin/mach-bf609/boards/ezkit.c4
-rw-r--r--arch/frv/include/uapi/asm/socket.h4
-rw-r--r--arch/ia64/include/uapi/asm/socket.h4
-rw-r--r--arch/m32r/include/uapi/asm/socket.h4
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/boot/dts/ingenic/ci20.dts60
-rw-r--r--arch/mips/boot/dts/ingenic/jz4740.dtsi68
-rw-r--r--arch/mips/boot/dts/ingenic/jz4780.dtsi98
-rw-r--r--arch/mips/boot/dts/ingenic/qi_lb60.dts13
-rw-r--r--arch/mips/include/asm/mach-jz4740/gpio.h371
-rw-r--r--arch/mips/include/uapi/asm/socket.h4
-rw-r--r--arch/mips/jz4740/Makefile2
-rw-r--r--arch/mips/jz4740/board-qi_lb60.c48
-rw-r--r--arch/mips/jz4740/gpio.c519
-rw-r--r--arch/mips/kvm/trap_emul.c2
-rw-r--r--arch/mips/kvm/vz.c2
-rw-r--r--arch/mn10300/include/uapi/asm/socket.h4
-rw-r--r--arch/parisc/include/asm/dma-mapping.h2
-rw-r--r--arch/parisc/include/uapi/asm/socket.h4
-rw-r--r--arch/powerpc/boot/dts/fsl/kmcent2.dts4
-rw-r--r--arch/powerpc/include/asm/hvcall.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h13
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h2
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h2
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h6
-rw-r--r--arch/powerpc/include/uapi/asm/socket.h90
-rw-r--r--arch/powerpc/kernel/asm-offsets.c3
-rw-r--r--arch/powerpc/kernel/mce.c1
-rw-r--r--arch/powerpc/kernel/nvram_64.c14
-rw-r--r--arch/powerpc/kvm/book3s_hv.c511
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S8
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c18
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S165
-rw-r--r--arch/powerpc/kvm/book3s_xive.c4
-rw-r--r--arch/powerpc/kvm/booke.c2
-rw-r--r--arch/powerpc/kvm/emulate.c4
-rw-r--r--arch/powerpc/kvm/powerpc.c45
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c3
-rw-r--r--arch/s390/include/asm/ctl_reg.h4
-rw-r--r--arch/s390/include/asm/diag.h26
-rw-r--r--arch/s390/include/asm/kvm_host.h33
-rw-r--r--arch/s390/include/asm/nmi.h6
-rw-r--r--arch/s390/include/uapi/asm/kvm.h12
-rw-r--r--arch/s390/include/uapi/asm/socket.h4
-rw-r--r--arch/s390/kernel/diag.c29
-rw-r--r--arch/s390/kvm/gaccess.c43
-rw-r--r--arch/s390/kvm/interrupt.c91
-rw-r--r--arch/s390/kvm/kvm-s390.c373
-rw-r--r--arch/s390/kvm/kvm-s390.h2
-rw-r--r--arch/s390/kvm/priv.c103
-rw-r--r--arch/s390/kvm/vsie.c25
-rw-r--r--arch/s390/net/bpf_jit_comp.c3
-rw-r--r--arch/sparc/include/uapi/asm/socket.h4
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c12
-rw-r--r--arch/x86/crypto/aes-x86_64-asm_64.S47
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S231
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S283
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c208
-rw-r--r--arch/x86/crypto/glue_helper.c3
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb.c7
-rw-r--r--arch/x86/include/asm/kvm_host.h50
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h13
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c84
-rw-r--r--arch/x86/kvm/lapic.c116
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c155
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/mmutrace.h6
-rw-r--r--arch/x86/kvm/svm.c95
-rw-r--r--arch/x86/kvm/vmx.c83
-rw-r--r--arch/x86/kvm/x86.c14
-rw-r--r--arch/x86/net/bpf_jit.S20
-rw-r--r--arch/x86/net/bpf_jit_comp.c66
-rw-r--r--arch/x86/xen/enlighten.c154
-rw-r--r--arch/x86/xen/enlighten_hvm.c64
-rw-r--r--arch/x86/xen/enlighten_pv.c89
-rw-r--r--arch/x86/xen/setup.c7
-rw-r--r--arch/x86/xen/smp.c31
-rw-r--r--arch/x86/xen/smp.h2
-rw-r--r--arch/x86/xen/smp_hvm.c14
-rw-r--r--arch/x86/xen/smp_pv.c6
-rw-r--r--arch/x86/xen/suspend_hvm.c11
-rw-r--r--arch/x86/xen/xen-ops.h3
-rw-r--r--arch/xtensa/include/uapi/asm/socket.h4
174 files changed, 4146 insertions, 2284 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index f76b214cf7ad..dc26b6d9175e 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -425,7 +425,7 @@ config GCC_PLUGIN_STRUCTLEAK
bool "Force initialization of variables containing userspace addresses"
depends on GCC_PLUGINS
help
- This plugin zero-initializes any structures that containing a
+ This plugin zero-initializes any structures containing a
__user attribute. This can prevent some classes of information
exposures.
@@ -443,6 +443,45 @@ config GCC_PLUGIN_STRUCTLEAK_VERBOSE
initialized. Since not all existing initializers are detected
by the plugin, this can produce false positive warnings.
+config GCC_PLUGIN_RANDSTRUCT
+ bool "Randomize layout of sensitive kernel structures"
+ depends on GCC_PLUGINS
+ select MODVERSIONS if MODULES
+ help
+ If you say Y here, the layouts of structures explicitly
+ marked by __randomize_layout will be randomized at
+ compile-time. This can introduce the requirement of an
+ additional information exposure vulnerability for exploits
+ targeting these structure types.
+
+ Enabling this feature will introduce some performance impact,
+ slightly increase memory usage, and prevent the use of forensic
+ tools like Volatility against the system (unless the kernel
+ source tree isn't cleaned after kernel installation).
+
+ The seed used for compilation is located at
+ scripts/gcc-plgins/randomize_layout_seed.h. It remains after
+ a make clean to allow for external modules to be compiled with
+ the existing seed and will be removed by a make mrproper or
+ make distclean.
+
+ Note that the implementation requires gcc 4.7 or newer.
+
+ This plugin was ported from grsecurity/PaX. More information at:
+ * https://grsecurity.net/
+ * https://pax.grsecurity.net/
+
+config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
+ bool "Use cacheline-aware structure randomization"
+ depends on GCC_PLUGIN_RANDSTRUCT
+ depends on !COMPILE_TEST
+ help
+ If you say Y here, the RANDSTRUCT randomization will make a
+ best effort at restricting randomization to cacheline-sized
+ groups of elements. It will further not randomize bitfields
+ in structures. This reduces the performance hit of RANDSTRUCT
+ at the cost of weakened randomization.
+
config HAVE_CC_STACKPROTECTOR
bool
help
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 148d7a32754e..7b285dd4fe05 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -105,4 +105,8 @@
#define SO_COOKIE 57
+#define SCM_TIMESTAMPING_PKTINFO 58
+
+#define SO_PEERGROUPS 59
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index ce93124a850b..b23d6fbbb225 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1183,48 +1183,23 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
struct rusage32 __user *, ur)
{
- struct rusage r;
- long ret, err;
unsigned int status = 0;
- mm_segment_t old_fs;
-
+ struct rusage r;
+ long err = kernel_wait4(pid, &status, options, &r);
+ if (err <= 0)
+ return err;
+ if (put_user(status, ustatus))
+ return -EFAULT;
if (!ur)
- return sys_wait4(pid, ustatus, options, NULL);
-
- old_fs = get_fs();
-
- set_fs (KERNEL_DS);
- ret = sys_wait4(pid, (unsigned int __user *) &status, options,
- (struct rusage __user *) &r);
- set_fs (old_fs);
-
- if (!access_ok(VERIFY_WRITE, ur, sizeof(*ur)))
+ return err;
+ if (put_tv32(&ur->ru_utime, &r.ru_utime))
return -EFAULT;
-
- err = put_user(status, ustatus);
- if (ret < 0)
- return err ? err : ret;
-
- err |= __put_user(r.ru_utime.tv_sec, &ur->ru_utime.tv_sec);
- err |= __put_user(r.ru_utime.tv_usec, &ur->ru_utime.tv_usec);
- err |= __put_user(r.ru_stime.tv_sec, &ur->ru_stime.tv_sec);
- err |= __put_user(r.ru_stime.tv_usec, &ur->ru_stime.tv_usec);
- err |= __put_user(r.ru_maxrss, &ur->ru_maxrss);
- err |= __put_user(r.ru_ixrss, &ur->ru_ixrss);
- err |= __put_user(r.ru_idrss, &ur->ru_idrss);
- err |= __put_user(r.ru_isrss, &ur->ru_isrss);
- err |= __put_user(r.ru_minflt, &ur->ru_minflt);
- err |= __put_user(r.ru_majflt, &ur->ru_majflt);
- err |= __put_user(r.ru_nswap, &ur->ru_nswap);
- err |= __put_user(r.ru_inblock, &ur->ru_inblock);
- err |= __put_user(r.ru_oublock, &ur->ru_oublock);
- err |= __put_user(r.ru_msgsnd, &ur->ru_msgsnd);
- err |= __put_user(r.ru_msgrcv, &ur->ru_msgrcv);
- err |= __put_user(r.ru_nsignals, &ur->ru_nsignals);
- err |= __put_user(r.ru_nvcsw, &ur->ru_nvcsw);
- err |= __put_user(r.ru_nivcsw, &ur->ru_nivcsw);
-
- return err ? err : ret;
+ if (put_tv32(&ur->ru_stime, &r.ru_stime))
+ return -EFAULT;
+ if (copy_to_user(&ur->ru_maxrss, &r.ru_maxrss,
+ sizeof(struct rusage32) - offsetof(struct rusage32, ru_maxrss)))
+ return -EFAULT;
+ return err;
}
/*
diff --git a/arch/arm/configs/lpc32xx_defconfig b/arch/arm/configs/lpc32xx_defconfig
index 6ba430d2b5b2..e15fa5f168bb 100644
--- a/arch/arm/configs/lpc32xx_defconfig
+++ b/arch/arm/configs/lpc32xx_defconfig
@@ -112,7 +112,7 @@ CONFIG_GPIO_SX150X=y
CONFIG_GPIO_74X164=y
CONFIG_GPIO_MAX7301=y
CONFIG_GPIO_MC33880=y
-CONFIG_GPIO_MCP23S08=y
+CONFIG_PINCTRL_MCP23S08=y
CONFIG_SENSORS_DS620=y
CONFIG_SENSORS_MAX6639=y
CONFIG_WATCHDOG=y
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 883b84d828c5..0f966a8ca1ce 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -14,6 +14,7 @@
#include <crypto/aes.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <linux/cpufeature.h>
#include <linux/module.h>
#include <crypto/xts.h>
@@ -425,9 +426,6 @@ static int __init aes_init(void)
int err;
int i;
- if (!(elf_hwcap2 & HWCAP2_AES))
- return -ENODEV;
-
err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
if (err)
return err;
@@ -451,5 +449,5 @@ unregister_simds:
return err;
}
-module_init(aes_init);
+module_cpu_feature_match(AES, aes_init);
module_exit(aes_exit);
diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c
index e1566bec1016..1b0e0e86ee9c 100644
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ b/arch/arm/crypto/crc32-ce-glue.c
@@ -8,6 +8,7 @@
* published by the Free Software Foundation.
*/
+#include <linux/cpufeature.h>
#include <linux/crc32.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -233,6 +234,11 @@ static void __exit crc32_pmull_mod_exit(void)
ARRAY_SIZE(crc32_pmull_algs));
}
+static const struct cpu_feature crc32_cpu_feature[] = {
+ { cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
+};
+MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
+
module_init(crc32_pmull_mod_init);
module_exit(crc32_pmull_mod_exit);
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index 7546b3c02466..6bac8bea9f1e 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -15,6 +15,7 @@
#include <crypto/cryptd.h>
#include <crypto/internal/hash.h>
#include <crypto/gf128mul.h>
+#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
@@ -311,9 +312,6 @@ static int __init ghash_ce_mod_init(void)
{
int err;
- if (!(elf_hwcap2 & HWCAP2_PMULL))
- return -ENODEV;
-
err = crypto_register_shash(&ghash_alg);
if (err)
return err;
@@ -334,5 +332,5 @@ static void __exit ghash_ce_mod_exit(void)
crypto_unregister_shash(&ghash_alg);
}
-module_init(ghash_ce_mod_init);
+module_cpu_feature_match(PMULL, ghash_ce_mod_init);
module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c
index 80bc2fcd241a..555f72b5e659 100644
--- a/arch/arm/crypto/sha1-ce-glue.c
+++ b/arch/arm/crypto/sha1-ce-glue.c
@@ -11,6 +11,7 @@
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <crypto/sha1_base.h>
+#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
@@ -82,8 +83,6 @@ static struct shash_alg alg = {
static int __init sha1_ce_mod_init(void)
{
- if (!(elf_hwcap2 & HWCAP2_SHA1))
- return -ENODEV;
return crypto_register_shash(&alg);
}
@@ -92,5 +91,5 @@ static void __exit sha1_ce_mod_fini(void)
crypto_unregister_shash(&alg);
}
-module_init(sha1_ce_mod_init);
+module_cpu_feature_match(SHA1, sha1_ce_mod_init);
module_exit(sha1_ce_mod_fini);
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c
index 0755b2d657f3..df4dcef054ae 100644
--- a/arch/arm/crypto/sha2-ce-glue.c
+++ b/arch/arm/crypto/sha2-ce-glue.c
@@ -11,6 +11,7 @@
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
+#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
@@ -100,8 +101,6 @@ static struct shash_alg algs[] = { {
static int __init sha2_ce_mod_init(void)
{
- if (!(elf_hwcap2 & HWCAP2_SHA2))
- return -ENODEV;
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
@@ -110,5 +109,5 @@ static void __exit sha2_ce_mod_fini(void)
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
-module_init(sha2_ce_mod_init);
+module_cpu_feature_match(SHA2, sha2_ce_mod_init);
module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 68b06f9c65de..ad301f107dd2 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -87,6 +87,8 @@
#define CALGN(code...)
#endif
+#define IMM12_MASK 0xfff
+
/*
* Enable and disable interrupts
*/
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index a3f0b3d50089..ebf020b02bc8 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -187,6 +187,16 @@
#define FSC_FAULT (0x04)
#define FSC_ACCESS (0x08)
#define FSC_PERM (0x0c)
+#define FSC_SEA (0x10)
+#define FSC_SEA_TTW0 (0x14)
+#define FSC_SEA_TTW1 (0x15)
+#define FSC_SEA_TTW2 (0x16)
+#define FSC_SEA_TTW3 (0x17)
+#define FSC_SECC (0x18)
+#define FSC_SECC_TTW0 (0x1c)
+#define FSC_SECC_TTW1 (0x1d)
+#define FSC_SECC_TTW2 (0x1e)
+#define FSC_SECC_TTW3 (0x1f)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~0xf)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index f0e66577ce05..127e2dd2e21c 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -44,7 +44,9 @@
#define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS
#endif
-#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_SLEEP \
+ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
int __attribute_const__ kvm_target_cpu(void);
@@ -233,8 +235,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
-void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu);
-void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu);
int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
@@ -291,20 +291,12 @@ static inline void kvm_arm_init_debug(void) {}
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
-static inline int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr)
-{
- return -ENXIO;
-}
-static inline int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr)
-{
- return -ENXIO;
-}
-static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
- struct kvm_device_attr *attr)
-{
- return -ENXIO;
-}
+
+int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/system_misc.h b/arch/arm/include/asm/system_misc.h
index a3d61ad984af..8c4a89f5ce7d 100644
--- a/arch/arm/include/asm/system_misc.h
+++ b/arch/arm/include/asm/system_misc.h
@@ -22,6 +22,11 @@ extern void (*arm_pm_idle)(void);
extern unsigned int user_debug;
+static inline int handle_guest_sea(phys_addr_t addr, unsigned int esr)
+{
+ return -1;
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_ARM_SYSTEM_MISC_H */
diff --git a/arch/arm/include/asm/xen/events.h b/arch/arm/include/asm/xen/events.h
index 71e473d05fcc..620dc75362e5 100644
--- a/arch/arm/include/asm/xen/events.h
+++ b/arch/arm/include/asm/xen/events.h
@@ -16,7 +16,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
return raw_irqs_disabled_flags(regs->ARM_cpsr);
}
-#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((ptr), \
+#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((long long*)(ptr),\
atomic64_t, \
counter), (val))
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 5e3c673fa3f4..5db2d4c6a55f 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -203,6 +203,14 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+/* Device Control API on vcpu fd */
+#define KVM_ARM_VCPU_PMU_V3_CTRL 0
+#define KVM_ARM_VCPU_PMU_V3_IRQ 0
+#define KVM_ARM_VCPU_PMU_V3_INIT 1
+#define KVM_ARM_VCPU_TIMER_CTRL 1
+#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
+#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
#define KVM_DEV_ARM_ITS_SAVE_TABLES 1
#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 9f157e7c51e7..c731f0d2b2af 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -797,7 +797,10 @@ ENTRY(__switch_to)
#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
ldr r7, [r2, #TI_TASK]
ldr r8, =__stack_chk_guard
- ldr r7, [r7, #TSK_STACK_CANARY]
+ .if (TSK_STACK_CANARY > IMM12_MASK)
+ add r7, r7, #TSK_STACK_CANARY & ~IMM12_MASK
+ .endif
+ ldr r7, [r7, #TSK_STACK_CANARY & IMM12_MASK]
#endif
#ifdef CONFIG_CPU_USE_DOMAINS
mcr p15, 0, r6, c3, c0, 0 @ Set domain register
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 96b7a477a8db..8226d0b71fd3 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -552,7 +552,7 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
return 0;
}
-static struct of_device_id armv6_pmu_of_device_ids[] = {
+static const struct of_device_id armv6_pmu_of_device_ids[] = {
{.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init},
{.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
{.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index fa6182a40941..1e0784ebbfd6 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -301,3 +301,54 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
{
return -EINVAL;
}
+
+int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_set_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_get_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
+
+int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ switch (attr->group) {
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_has_attr(vcpu, attr);
+ break;
+ default:
+ ret = -ENXIO;
+ break;
+ }
+
+ return ret;
+}
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index f86a9aaef462..54442e375354 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -72,6 +72,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_wfx(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
kvm_vcpu_block(vcpu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 624a510d31df..ebd2dd46adf7 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -237,8 +237,10 @@ void __hyp_text __noreturn __hyp_panic(int cause)
vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ __timer_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
+ __banked_restore_state(host_ctxt);
__sysreg_restore_state(host_ctxt);
}
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index 1da8b2d14550..5ed0c3ee33d6 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -37,16 +37,6 @@ static struct kvm_regs cortexa_regs_reset = {
.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
};
-static const struct kvm_irq_level cortexa_ptimer_irq = {
- { .irq = 30 },
- .level = 1,
-};
-
-static const struct kvm_irq_level cortexa_vtimer_irq = {
- { .irq = 27 },
- .level = 1,
-};
-
/*******************************************************************************
* Exported reset function
@@ -62,16 +52,12 @@ static const struct kvm_irq_level cortexa_vtimer_irq = {
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
struct kvm_regs *reset_regs;
- const struct kvm_irq_level *cpu_vtimer_irq;
- const struct kvm_irq_level *cpu_ptimer_irq;
switch (vcpu->arch.target) {
case KVM_ARM_TARGET_CORTEX_A7:
case KVM_ARM_TARGET_CORTEX_A15:
reset_regs = &cortexa_regs_reset;
vcpu->arch.midr = read_cpuid_id();
- cpu_vtimer_irq = &cortexa_vtimer_irq;
- cpu_ptimer_irq = &cortexa_ptimer_irq;
break;
default:
return -ENODEV;
@@ -84,5 +70,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_reset_coprocs(vcpu);
/* Reset arch_timer context */
- return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq);
+ return kvm_timer_vcpu_reset(vcpu);
}
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 0d40c285bd86..f944836da8a2 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -25,11 +25,6 @@
ldr \rd, [\rn, #VMA_VM_FLAGS]
.endm
- .macro tsk_mm, rd, rn
- ldr \rd, [\rn, #TI_TASK]
- ldr \rd, [\rd, #TSK_ACTIVE_MM]
- .endm
-
/*
* act_mm - get current->active_mm
*/
@@ -37,7 +32,10 @@
bic \rd, sp, #8128
bic \rd, \rd, #63
ldr \rd, [\rd, #TI_TASK]
- ldr \rd, [\rd, #TSK_ACTIVE_MM]
+ .if (TSK_ACTIVE_MM > IMM12_MASK)
+ add \rd, \rd, #TSK_ACTIVE_MM & ~IMM12_MASK
+ .endif
+ ldr \rd, [\rd, #TSK_ACTIVE_MM & IMM12_MASK]
.endm
/*
diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c
index 0ed01f2d5ee4..e71eefa2e427 100644
--- a/arch/arm/xen/p2m.c
+++ b/arch/arm/xen/p2m.c
@@ -144,17 +144,17 @@ bool __set_phys_to_machine_multi(unsigned long pfn,
return true;
}
- p2m_entry = kzalloc(sizeof(struct xen_p2m_entry), GFP_NOWAIT);
- if (!p2m_entry) {
- pr_warn("cannot allocate xen_p2m_entry\n");
+ p2m_entry = kzalloc(sizeof(*p2m_entry), GFP_NOWAIT);
+ if (!p2m_entry)
return false;
- }
+
p2m_entry->pfn = pfn;
p2m_entry->nr_pages = nr_pages;
p2m_entry->mfn = mfn;
write_lock_irqsave(&p2m_lock, irqflags);
- if ((rc = xen_add_phys_to_mach_entry(p2m_entry)) < 0) {
+ rc = xen_add_phys_to_mach_entry(p2m_entry);
+ if (rc < 0) {
write_unlock_irqrestore(&p2m_lock, irqflags);
return false;
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 300146dc8433..192208ea2842 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -3,6 +3,7 @@ config ARM64
select ACPI_CCA_REQUIRED if ACPI
select ACPI_GENERIC_GSI if ACPI
select ACPI_GTDT if ACPI
+ select ACPI_IORT if ACPI
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
select ACPI_MCFG if ACPI
select ACPI_SPCR_TABLE if ACPI
@@ -19,7 +20,9 @@ config ARM64
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA
select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
@@ -93,6 +96,7 @@ config ARM64
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP if NUMA
+ select HAVE_NMI if ACPI_APEI_SEA
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -245,6 +249,9 @@ config PGTABLE_LEVELS
config ARCH_SUPPORTS_UPROBES
def_bool y
+config ARCH_PROC_KCORE_TEXT
+ def_bool y
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -481,6 +488,17 @@ config CAVIUM_ERRATUM_27456
If unsure, say Y.
+config CAVIUM_ERRATUM_30115
+ bool "Cavium erratum 30115: Guest may disable interrupts in host"
+ default y
+ help
+ On ThunderX T88 pass 1.x through 2.2, T81 pass 1.0 through
+ 1.2, and T83 Pass 1.0, KVM guest execution may disable
+ interrupts in host. Trapping both GICv3 group-0 and group-1
+ accesses sidesteps the issue.
+
+ If unsure, say Y.
+
config QCOM_FALKOR_ERRATUM_1003
bool "Falkor E1003: Incorrect translation due to ASID change"
default y
@@ -983,7 +1001,7 @@ config RANDOMIZE_BASE
config RANDOMIZE_MODULE_REGION_FULL
bool "Randomize the module region independently from the core kernel"
- depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE
+ depends on RANDOMIZE_BASE
default y
help
Randomizes the location of the module region without considering the
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index f839ecd919f9..9b41f1e3b1a0 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -52,17 +52,19 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
+CHECKFLAGS += -D__AARCH64EB__
AS += -EB
LD += -EB
UTS_MACHINE := aarch64_be
else
KBUILD_CPPFLAGS += -mlittle-endian
+CHECKFLAGS += -D__AARCH64EL__
AS += -EL
LD += -EL
UTS_MACHINE := aarch64
endif
-CHECKFLAGS += -D__aarch64__
+CHECKFLAGS += -D__aarch64__ -m64
ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y)
KBUILD_CFLAGS_MODULE += -mcmodel=large
@@ -70,6 +72,9 @@ endif
ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
+ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o
+endif
endif
# Default value
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
index afae4de6e53f..2b526304ed27 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
+++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts
@@ -141,6 +141,8 @@
bluetooth {
compatible = "ti,wl1835-st";
enable-gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>;
+ clocks = <&pmic>;
+ clock-names = "ext_clock";
};
};
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index c98e7e849f06..8550408735a0 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -82,7 +82,8 @@ ENTRY(sha1_ce_transform)
ldr dgb, [x0, #16]
/* load sha1_ce_state::finalize */
- ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
+ ldr_l w4, sha1_ce_offsetof_finalize, x4
+ ldr w4, [x0, x4]
/* load input */
0: ld1 {v8.4s-v11.4s}, [x1], #64
@@ -132,7 +133,8 @@ CPU_LE( rev32 v11.16b, v11.16b )
* the padding is handled by the C code in that case.
*/
cbz x4, 3f
- ldr x4, [x0, #:lo12:sha1_ce_offsetof_count]
+ ldr_l w4, sha1_ce_offsetof_count, x4
+ ldr x4, [x0, x4]
movi v9.2d, #0
mov x8, #0x80000000
movi v10.2d, #0
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index aefda9868627..ea319c055f5d 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -17,9 +17,6 @@
#include <linux/crypto.h>
#include <linux/module.h>
-#define ASM_EXPORT(sym, val) \
- asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
-
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@@ -32,6 +29,9 @@ struct sha1_ce_state {
asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
int blocks);
+const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count);
+const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize);
+
static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
@@ -52,11 +52,6 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
- ASM_EXPORT(sha1_ce_offsetof_count,
- offsetof(struct sha1_ce_state, sst.count));
- ASM_EXPORT(sha1_ce_offsetof_finalize,
- offsetof(struct sha1_ce_state, finalize));
-
/*
* Allow the asm code to perform the finalization if there is no
* partial data and the input is a round multiple of the block size.
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 01cfee066837..679c6c002f4f 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -88,7 +88,8 @@ ENTRY(sha2_ce_transform)
ld1 {dgav.4s, dgbv.4s}, [x0]
/* load sha256_ce_state::finalize */
- ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
+ ldr_l w4, sha256_ce_offsetof_finalize, x4
+ ldr w4, [x0, x4]
/* load input */
0: ld1 {v16.4s-v19.4s}, [x1], #64
@@ -136,7 +137,8 @@ CPU_LE( rev32 v19.16b, v19.16b )
* the padding is handled by the C code in that case.
*/
cbz x4, 3f
- ldr x4, [x0, #:lo12:sha256_ce_offsetof_count]
+ ldr_l w4, sha256_ce_offsetof_count, x4
+ ldr x4, [x0, x4]
movi v17.2d, #0
mov x8, #0x80000000
movi v18.2d, #0
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 7cd587564a41..0ed9486f75dd 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -17,9 +17,6 @@
#include <linux/crypto.h>
#include <linux/module.h>
-#define ASM_EXPORT(sym, val) \
- asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
-
MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@@ -32,6 +29,11 @@ struct sha256_ce_state {
asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
int blocks);
+const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
+ sst.count);
+const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
+ finalize);
+
static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
@@ -52,11 +54,6 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
- ASM_EXPORT(sha256_ce_offsetof_count,
- offsetof(struct sha256_ce_state, sst.count));
- ASM_EXPORT(sha256_ce_offsetof_finalize,
- offsetof(struct sha256_ce_state, finalize));
-
/*
* Allow the asm code to perform the finalization if there is no
* partial data and the input is a round multiple of the block size.
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 1a98bc8602a2..8cef47fa2218 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -89,7 +89,7 @@ static inline void gic_write_ctlr(u32 val)
static inline void gic_write_grpen1(u32 val)
{
- write_sysreg_s(val, SYS_ICC_GRPEN1_EL1);
+ write_sysreg_s(val, SYS_ICC_IGRPEN1_EL1);
isb();
}
diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h
index 09f65339d66d..0b6f5a7d4027 100644
--- a/arch/arm64/include/asm/checksum.h
+++ b/arch/arm64/include/asm/checksum.h
@@ -42,7 +42,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
} while (--ihl);
sum += ((sum >> 32) | (sum << 32));
- return csum_fold(sum >> 32);
+ return csum_fold((__force u32)(sum >> 32));
}
#define ip_fast_csum ip_fast_csum
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index b3aab8a17868..8d2272c6822c 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -38,7 +38,8 @@
#define ARM64_WORKAROUND_REPEAT_TLBI 17
#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18
#define ARM64_WORKAROUND_858921 19
+#define ARM64_WORKAROUND_CAVIUM_30115 20
-#define ARM64_NCAPS 20
+#define ARM64_NCAPS 21
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 0984d1b3a8f2..235e77d98261 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -86,6 +86,7 @@
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
+#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
#define BRCM_CPU_PART_VULCAN 0x516
@@ -96,6 +97,7 @@
#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
+#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 5392dbeffa45..f72779aad276 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -48,8 +48,6 @@ void arch_teardown_dma_ops(struct device *dev);
/* do not use this function in a driver */
static inline bool is_device_dma_coherent(struct device *dev)
{
- if (!dev)
- return false;
return dev->archdata.dma_coherent;
}
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 5d1700425efe..ac3fb7441510 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -142,6 +142,7 @@ typedef struct user_fpsimd_state elf_fpregset_t;
({ \
clear_bit(TIF_32BIT, &current->mm->context.flags); \
clear_thread_flag(TIF_32BIT); \
+ current->personality &= ~READ_IMPLIES_EXEC; \
})
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
@@ -187,6 +188,11 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
((x)->e_flags & EF_ARM_EABI_MASK))
#define compat_start_thread compat_start_thread
+/*
+ * Unlike the native SET_PERSONALITY macro, the compat version inherits
+ * READ_IMPLIES_EXEC across a fork() since this is the behaviour on
+ * arch/arm/.
+ */
#define COMPAT_SET_PERSONALITY(ex) \
({ \
set_bit(TIF_32BIT, &current->mm->context.flags); \
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 85997c0e5443..8cabd57b6348 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -19,6 +19,7 @@
#define __ASM_ESR_H
#include <asm/memory.h>
+#include <asm/sysreg.h>
#define ESR_ELx_EC_UNKNOWN (0x00)
#define ESR_ELx_EC_WFx (0x01)
@@ -83,6 +84,7 @@
#define ESR_ELx_WNR (UL(1) << 6)
/* Shared ISS field definitions for Data/Instruction aborts */
+#define ESR_ELx_FnV (UL(1) << 10)
#define ESR_ELx_EA (UL(1) << 9)
#define ESR_ELx_S1PTW (UL(1) << 7)
@@ -181,6 +183,29 @@
#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \
ESR_ELx_SYS64_ISS_DIR_READ)
+#define esr_sys64_to_sysreg(e) \
+ sys_reg((((e) & ESR_ELx_SYS64_ISS_OP0_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP0_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP1_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \
+ ESR_ELx_SYS64_ISS_CRN_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \
+ ESR_ELx_SYS64_ISS_CRM_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP2_SHIFT))
+
+#define esr_cp15_to_sysreg(e) \
+ sys_reg(3, \
+ (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP1_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \
+ ESR_ELx_SYS64_ISS_CRN_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \
+ ESR_ELx_SYS64_ISS_CRM_SHIFT), \
+ (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \
+ ESR_ELx_SYS64_ISS_OP2_SHIFT))
+
#ifndef __ASSEMBLY__
#include <asm/types.h>
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 85c4a8981d47..f32b42e8725d 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -48,16 +48,16 @@ do { \
} while (0)
static inline int
-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
+ int oparg = (int)(encoded_op << 8) >> 20;
+ int cmparg = (int)(encoded_op << 20) >> 20;
int oldval = 0, ret, tmp;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
+ oparg = 1U << (oparg & 0x1f);
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 6e99978e83bd..61d694c2eae5 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -204,6 +204,16 @@
#define FSC_FAULT ESR_ELx_FSC_FAULT
#define FSC_ACCESS ESR_ELx_FSC_ACCESS
#define FSC_PERM ESR_ELx_FSC_PERM
+#define FSC_SEA ESR_ELx_FSC_EXTABT
+#define FSC_SEA_TTW0 (0x14)
+#define FSC_SEA_TTW1 (0x15)
+#define FSC_SEA_TTW2 (0x16)
+#define FSC_SEA_TTW3 (0x17)
+#define FSC_SECC (0x18)
+#define FSC_SECC_TTW0 (0x1c)
+#define FSC_SECC_TTW1 (0x1d)
+#define FSC_SECC_TTW2 (0x1e)
+#define FSC_SECC_TTW3 (0x1f)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 1f252a95bc02..d68630007b14 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -42,7 +42,9 @@
#define KVM_VCPU_MAX_FEATURES 4
-#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_SLEEP \
+ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -334,8 +336,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
-void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu);
-void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu);
u64 __kvm_call_hyp(void *hypfn, ...);
#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index b18e852d27e8..4572a9b560fa 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -127,6 +127,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __timer_save_state(struct kvm_vcpu *vcpu);
void __timer_restore_state(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index d57693f5d4ec..19bd97671bb8 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -30,6 +30,9 @@ struct mod_plt_sec {
struct mod_arch_specific {
struct mod_plt_sec core;
struct mod_plt_sec init;
+
+ /* for CONFIG_DYNAMIC_FTRACE */
+ void *ftrace_trampoline;
};
#endif
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c213fdbd056c..6eae342ced6b 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -441,7 +441,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT))
-#define pud_present(pud) (pud_val(pud))
+#define pud_present(pud) pte_present(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
{
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 9428b93fefb2..64c9e78f9882 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -104,6 +104,9 @@ struct thread_struct {
#define task_user_tls(t) (&(t)->thread.tp_value)
#endif
+/* Sync TPIDR_EL0 back to thread_struct for current */
+void tls_preserve_current_state(void);
+
#define INIT_THREAD { }
static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 801a16dbbdf6..5b6eafccc5d8 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -30,5 +30,6 @@ struct stackframe {
extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
int (*fn)(struct stackframe *, void *), void *data);
+extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index b4d13d9267ff..16e44fa9b3b6 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -180,14 +180,31 @@
#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0)
+#define SYS_ICC_IAR0_EL1 sys_reg(3, 0, 12, 8, 0)
+#define SYS_ICC_EOIR0_EL1 sys_reg(3, 0, 12, 8, 1)
+#define SYS_ICC_HPPIR0_EL1 sys_reg(3, 0, 12, 8, 2)
+#define SYS_ICC_BPR0_EL1 sys_reg(3, 0, 12, 8, 3)
+#define SYS_ICC_AP0Rn_EL1(n) sys_reg(3, 0, 12, 8, 4 | n)
+#define SYS_ICC_AP0R0_EL1 SYS_ICC_AP0Rn_EL1(0)
+#define SYS_ICC_AP0R1_EL1 SYS_ICC_AP0Rn_EL1(1)
+#define SYS_ICC_AP0R2_EL1 SYS_ICC_AP0Rn_EL1(2)
+#define SYS_ICC_AP0R3_EL1 SYS_ICC_AP0Rn_EL1(3)
+#define SYS_ICC_AP1Rn_EL1(n) sys_reg(3, 0, 12, 9, n)
+#define SYS_ICC_AP1R0_EL1 SYS_ICC_AP1Rn_EL1(0)
+#define SYS_ICC_AP1R1_EL1 SYS_ICC_AP1Rn_EL1(1)
+#define SYS_ICC_AP1R2_EL1 SYS_ICC_AP1Rn_EL1(2)
+#define SYS_ICC_AP1R3_EL1 SYS_ICC_AP1Rn_EL1(3)
#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
+#define SYS_ICC_RPR_EL1 sys_reg(3, 0, 12, 11, 3)
#define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5)
#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
+#define SYS_ICC_HPPIR1_EL1 sys_reg(3, 0, 12, 12, 2)
#define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3)
#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4)
#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5)
-#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6)
+#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
#define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1)
#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4)
@@ -287,8 +304,8 @@
#define SCTLR_ELx_M 1
#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \
- (1 << 16) | (1 << 18) | (1 << 22) | (1 << 23) | \
- (1 << 28) | (1 << 29))
+ (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \
+ (1 << 29))
#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
SCTLR_ELx_SA | SCTLR_ELx_I)
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index bc812435bc76..07aa8e3c5630 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -40,7 +40,7 @@ void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
int sig, int code, const char *name);
struct mm_struct;
-extern void show_pte(struct mm_struct *mm, unsigned long addr);
+extern void show_pte(unsigned long addr);
extern void __show_regs(struct pt_regs *);
extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
@@ -56,6 +56,8 @@ extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
__show_ratelimited; \
})
+int handle_guest_sea(phys_addr_t addr, unsigned int esr);
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SYSTEM_MISC_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 70eea2ecc663..9f3ca24bbcc6 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -232,6 +232,9 @@ struct kvm_arch_memory_slot {
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
+#define KVM_ARM_VCPU_TIMER_CTRL 1
+#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
+#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index ee469be1ae1d..f0a76b9fcd6e 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -34,6 +34,26 @@ struct sigcontext {
};
/*
+ * Allocation of __reserved[]:
+ * (Note: records do not necessarily occur in the order shown here.)
+ *
+ * size description
+ *
+ * 0x210 fpsimd_context
+ * 0x10 esr_context
+ * 0x20 extra_context (optional)
+ * 0x10 terminator (null _aarch64_ctx)
+ *
+ * 0xdb0 (reserved for future allocation)
+ *
+ * New records that can exceed this space need to be opt-in for userspace, so
+ * that an expanded signal frame is not generated unexpectedly. The mechanism
+ * for opting in will depend on the extension that generates each new record.
+ * The above table documents the maximum set and sizes of records than can be
+ * generated when userspace does not opt in for any such extension.
+ */
+
+/*
* Header to be used at the beginning of structures extending the user
* context. Such structures must be placed after the rt_sigframe on the stack
* and be 16-byte aligned. The last structure must be a dummy one with the
@@ -61,4 +81,39 @@ struct esr_context {
__u64 esr;
};
+/*
+ * extra_context: describes extra space in the signal frame for
+ * additional structures that don't fit in sigcontext.__reserved[].
+ *
+ * Note:
+ *
+ * 1) fpsimd_context, esr_context and extra_context must be placed in
+ * sigcontext.__reserved[] if present. They cannot be placed in the
+ * extra space. Any other record can be placed either in the extra
+ * space or in sigcontext.__reserved[], unless otherwise specified in
+ * this file.
+ *
+ * 2) There must not be more than one extra_context.
+ *
+ * 3) If extra_context is present, it must be followed immediately in
+ * sigcontext.__reserved[] by the terminating null _aarch64_ctx.
+ *
+ * 4) The extra space to which datap points must start at the first
+ * 16-byte aligned address immediately after the terminating null
+ * _aarch64_ctx that follows the extra_context structure in
+ * __reserved[]. The extra space may overrun the end of __reserved[],
+ * as indicated by a sufficiently large value for the size field.
+ *
+ * 5) The extra space must itself be terminated with a null
+ * _aarch64_ctx.
+ */
+#define EXTRA_MAGIC 0x45585401
+
+struct extra_context {
+ struct _aarch64_ctx head;
+ __u64 datap; /* 16-byte aligned pointer to extra space cast to __u64 */
+ __u32 size; /* size in bytes of the extra space */
+ __u32 __reserved[3];
+};
+
#endif /* _UAPI__ASM_SIGCONTEXT_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 1dcb69d3d0e5..f2b4e816b6de 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -62,3 +62,6 @@ extra-y += $(head-y) vmlinux.lds
ifeq ($(CONFIG_DEBUG_EFI),y)
AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
endif
+
+# will be included by each individual module but not by the core kernel itself
+extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
index 1f5655cd9cc9..98a20e58758b 100644
--- a/arch/arm64/kernel/acpi_parking_protocol.c
+++ b/arch/arm64/kernel/acpi_parking_protocol.c
@@ -71,7 +71,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
{
struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
struct parking_protocol_mailbox __iomem *mailbox;
- __le32 cpu_id;
+ u32 cpu_id;
/*
* Map mailbox memory with attribute device nGnRE (ie ioremap -
@@ -123,9 +123,9 @@ static void acpi_parking_protocol_cpu_postboot(void)
int cpu = smp_processor_id();
struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
struct parking_protocol_mailbox __iomem *mailbox = cpu_entry->mailbox;
- __le64 entry_point;
+ u64 entry_point;
- entry_point = readl_relaxed(&mailbox->entry_point);
+ entry_point = readq_relaxed(&mailbox->entry_point);
/*
* Check if firmware has cleared the entry_point as expected
* by the protocol specification.
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 8840c109c5d6..6dd0a3a3e5c9 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -28,7 +28,7 @@
#include <asm/sections.h>
#include <linux/stop_machine.h>
-#define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f)
+#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f)
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
@@ -60,7 +60,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
#define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
-static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr)
+static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
{
u32 insn;
@@ -109,7 +109,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
{
struct alt_instr *alt;
struct alt_region *region = alt_region;
- u32 *origptr, *replptr, *updptr;
+ __le32 *origptr, *replptr, *updptr;
for (alt = region->begin; alt < region->end; alt++) {
u32 insn;
@@ -124,7 +124,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
origptr = ALT_ORIG_PTR(alt);
replptr = ALT_REPL_PTR(alt);
- updptr = use_linear_alias ? (u32 *)lm_alias(origptr) : origptr;
+ updptr = use_linear_alias ? lm_alias(origptr) : origptr;
nr_inst = alt->alt_len / sizeof(insn);
for (i = 0; i < nr_inst; i++) {
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 2ed2a7657711..0e27f86ee709 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -133,6 +133,27 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
},
#endif
+#ifdef CONFIG_CAVIUM_ERRATUM_30115
+ {
+ /* Cavium ThunderX, T88 pass 1.x - 2.2 */
+ .desc = "Cavium erratum 30115",
+ .capability = ARM64_WORKAROUND_CAVIUM_30115,
+ MIDR_RANGE(MIDR_THUNDERX, 0x00,
+ (1 << MIDR_VARIANT_SHIFT) | 2),
+ },
+ {
+ /* Cavium ThunderX, T81 pass 1.0 - 1.2 */
+ .desc = "Cavium erratum 30115",
+ .capability = ARM64_WORKAROUND_CAVIUM_30115,
+ MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x02),
+ },
+ {
+ /* Cavium ThunderX, T83 pass 1.0 */
+ .desc = "Cavium erratum 30115",
+ .capability = ARM64_WORKAROUND_CAVIUM_30115,
+ MIDR_RANGE(MIDR_THUNDERX_83XX, 0x00, 0x00),
+ },
+#endif
{
.desc = "Mismatched cache line size",
.capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 817ce3365e20..9f9e0064c8c1 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -51,6 +51,25 @@ unsigned int compat_elf_hwcap2 __read_mostly;
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcaps);
+static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p)
+{
+ /* file-wide pr_fmt adds "CPU features: " prefix */
+ pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps);
+ return 0;
+}
+
+static struct notifier_block cpu_hwcaps_notifier = {
+ .notifier_call = dump_cpu_hwcaps
+};
+
+static int __init register_cpu_hwcaps_dumper(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &cpu_hwcaps_notifier);
+ return 0;
+}
+__initcall(register_cpu_hwcaps_dumper);
+
DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcap_keys);
@@ -639,8 +658,10 @@ void update_cpu_features(int cpu,
* Mismatched CPU features are a recipe for disaster. Don't even
* pretend to support them.
*/
- WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC,
- "Unsupported CPU feature variation.\n");
+ if (taint) {
+ pr_warn_once("Unsupported CPU feature variation detected.\n");
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ }
}
u64 read_sanitised_ftr_reg(u32 id)
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 68b1f364c515..f495ee5049fd 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -227,7 +227,7 @@ static struct attribute *cpuregs_id_attrs[] = {
NULL
};
-static struct attribute_group cpuregs_attr_group = {
+static const struct attribute_group cpuregs_attr_group = {
.attrs = cpuregs_id_attrs,
.name = "identification"
};
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index d618e25c3de1..c7ef99904934 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -341,20 +341,22 @@ int aarch32_break_handler(struct pt_regs *regs)
if (compat_thumb_mode(regs)) {
/* get 16-bit Thumb instruction */
- get_user(thumb_instr, (u16 __user *)pc);
- thumb_instr = le16_to_cpu(thumb_instr);
+ __le16 instr;
+ get_user(instr, (__le16 __user *)pc);
+ thumb_instr = le16_to_cpu(instr);
if (thumb_instr == AARCH32_BREAK_THUMB2_LO) {
/* get second half of 32-bit Thumb-2 instruction */
- get_user(thumb_instr, (u16 __user *)(pc + 2));
- thumb_instr = le16_to_cpu(thumb_instr);
+ get_user(instr, (__le16 __user *)(pc + 2));
+ thumb_instr = le16_to_cpu(instr);
bp = thumb_instr == AARCH32_BREAK_THUMB2_HI;
} else {
bp = thumb_instr == AARCH32_BREAK_THUMB;
}
} else {
/* 32-bit ARM instruction */
- get_user(arm_instr, (u32 __user *)pc);
- arm_instr = le32_to_cpu(arm_instr);
+ __le32 instr;
+ get_user(instr, (__le32 __user *)pc);
+ arm_instr = le32_to_cpu(instr);
bp = (arm_instr & ~0xf0000000) == AARCH32_BREAK_ARM;
}
diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S
new file mode 100644
index 000000000000..00c4025be4ff
--- /dev/null
+++ b/arch/arm64/kernel/ftrace-mod.S
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .section ".text.ftrace_trampoline", "ax"
+ .align 3
+0: .quad 0
+__ftrace_trampoline:
+ ldr x16, 0b
+ br x16
+ENDPROC(__ftrace_trampoline)
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 40ad08ac569a..c13b1fca0e5b 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -10,10 +10,12 @@
*/
#include <linux/ftrace.h>
+#include <linux/module.h>
#include <linux/swab.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
+#include <asm/debug-monitors.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
@@ -70,6 +72,58 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long pc = rec->ip;
u32 old, new;
+ long offset = (long)pc - (long)addr;
+
+ if (offset < -SZ_128M || offset >= SZ_128M) {
+#ifdef CONFIG_ARM64_MODULE_PLTS
+ unsigned long *trampoline;
+ struct module *mod;
+
+ /*
+ * On kernels that support module PLTs, the offset between the
+ * branch instruction and its target may legally exceed the
+ * range of an ordinary relative 'bl' opcode. In this case, we
+ * need to branch via a trampoline in the module.
+ *
+ * NOTE: __module_text_address() must be called with preemption
+ * disabled, but we can rely on ftrace_lock to ensure that 'mod'
+ * retains its validity throughout the remainder of this code.
+ */
+ preempt_disable();
+ mod = __module_text_address(pc);
+ preempt_enable();
+
+ if (WARN_ON(!mod))
+ return -EINVAL;
+
+ /*
+ * There is only one ftrace trampoline per module. For now,
+ * this is not a problem since on arm64, all dynamic ftrace
+ * invocations are routed via ftrace_caller(). This will need
+ * to be revisited if support for multiple ftrace entry points
+ * is added in the future, but for now, the pr_err() below
+ * deals with a theoretical issue only.
+ */
+ trampoline = (unsigned long *)mod->arch.ftrace_trampoline;
+ if (trampoline[0] != addr) {
+ if (trampoline[0] != 0) {
+ pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
+ return -EINVAL;
+ }
+
+ /* point the trampoline to our ftrace entry point */
+ module_disable_ro(mod);
+ trampoline[0] = addr;
+ module_enable_ro(mod, true);
+
+ /* update trampoline before patching in the branch */
+ smp_wmb();
+ }
+ addr = (unsigned long)&trampoline[1];
+#else /* CONFIG_ARM64_MODULE_PLTS */
+ return -EINVAL;
+#endif /* CONFIG_ARM64_MODULE_PLTS */
+ }
old = aarch64_insn_gen_nop();
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
@@ -84,12 +138,55 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
unsigned long pc = rec->ip;
- u32 old, new;
+ bool validate = true;
+ u32 old = 0, new;
+ long offset = (long)pc - (long)addr;
+
+ if (offset < -SZ_128M || offset >= SZ_128M) {
+#ifdef CONFIG_ARM64_MODULE_PLTS
+ u32 replaced;
+
+ /*
+ * 'mod' is only set at module load time, but if we end up
+ * dealing with an out-of-range condition, we can assume it
+ * is due to a module being loaded far away from the kernel.
+ */
+ if (!mod) {
+ preempt_disable();
+ mod = __module_text_address(pc);
+ preempt_enable();
+
+ if (WARN_ON(!mod))
+ return -EINVAL;
+ }
+
+ /*
+ * The instruction we are about to patch may be a branch and
+ * link instruction that was redirected via a PLT entry. In
+ * this case, the normal validation will fail, but we can at
+ * least check that we are dealing with a branch and link
+ * instruction that points into the right module.
+ */
+ if (aarch64_insn_read((void *)pc, &replaced))
+ return -EFAULT;
+
+ if (!aarch64_insn_is_bl(replaced) ||
+ !within_module(pc + aarch64_get_branch_offset(replaced),
+ mod))
+ return -EINVAL;
+
+ validate = false;
+#else /* CONFIG_ARM64_MODULE_PLTS */
+ return -EINVAL;
+#endif /* CONFIG_ARM64_MODULE_PLTS */
+ } else {
+ old = aarch64_insn_gen_branch_imm(pc, addr,
+ AARCH64_INSN_BRANCH_LINK);
+ }
- old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
new = aarch64_insn_gen_nop();
- return ftrace_modify_code(pc, old, new, true);
+ return ftrace_modify_code(pc, old, new, validate);
}
void arch_ftrace_update_code(int command)
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index cd872133e88e..2718a77da165 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -117,7 +117,7 @@ static void __kprobes patch_unmap(int fixmap)
int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
{
int ret;
- u32 val;
+ __le32 val;
ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE);
if (!ret)
@@ -126,7 +126,7 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
return ret;
}
-static int __kprobes __aarch64_insn_write(void *addr, u32 insn)
+static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
{
void *waddr = addr;
unsigned long flags = 0;
@@ -145,8 +145,7 @@ static int __kprobes __aarch64_insn_write(void *addr, u32 insn)
int __kprobes aarch64_insn_write(void *addr, u32 insn)
{
- insn = cpu_to_le32(insn);
- return __aarch64_insn_write(addr, insn);
+ return __aarch64_insn_write(addr, cpu_to_le32(insn));
}
static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index d7e90d97f5c4..a9710efb8c01 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -27,7 +27,7 @@ u16 __initdata memstart_offset_seed;
static __init u64 get_kaslr_seed(void *fdt)
{
int node, len;
- u64 *prop;
+ fdt64_t *prop;
u64 ret;
node = fdt_path_offset(fdt, "/chosen");
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index f035ff6fb223..f469e0435903 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -74,7 +74,7 @@ enum aarch64_reloc_op {
RELOC_OP_PAGE,
};
-static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val)
+static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
{
switch (reloc_op) {
case RELOC_OP_ABS:
@@ -121,12 +121,12 @@ enum aarch64_insn_movw_imm_type {
AARCH64_INSN_IMM_MOVKZ,
};
-static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
+static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
int lsb, enum aarch64_insn_movw_imm_type imm_type)
{
u64 imm;
s64 sval;
- u32 insn = le32_to_cpu(*(u32 *)place);
+ u32 insn = le32_to_cpu(*place);
sval = do_reloc(op, place, val);
imm = sval >> lsb;
@@ -154,7 +154,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
/* Update the instruction with the new encoding. */
insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
- *(u32 *)place = cpu_to_le32(insn);
+ *place = cpu_to_le32(insn);
if (imm > U16_MAX)
return -ERANGE;
@@ -162,12 +162,12 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
return 0;
}
-static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val,
+static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
int lsb, int len, enum aarch64_insn_imm_type imm_type)
{
u64 imm, imm_mask;
s64 sval;
- u32 insn = le32_to_cpu(*(u32 *)place);
+ u32 insn = le32_to_cpu(*place);
/* Calculate the relocation value. */
sval = do_reloc(op, place, val);
@@ -179,7 +179,7 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val,
/* Update the instruction's immediate field. */
insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
- *(u32 *)place = cpu_to_le32(insn);
+ *place = cpu_to_le32(insn);
/*
* Extract the upper value bits (including the sign bit) and
@@ -420,8 +420,12 @@ int module_finalize(const Elf_Ehdr *hdr,
for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) {
apply_alternatives((void *)s->sh_addr, s->sh_size);
- return 0;
}
+#ifdef CONFIG_ARM64_MODULE_PLTS
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
+ !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name))
+ me->arch.ftrace_trampoline = (void *)s->sh_addr;
+#endif
}
return 0;
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index c7e3e6387a49..a7f6c01c13b9 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -108,7 +108,10 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
if (!acpi_disabled) {
struct pci_config_window *cfg = bridge->bus->sysdata;
struct acpi_device *adev = to_acpi_device(cfg->parent);
+ struct device *bus_dev = &bridge->bus->dev;
+
ACPI_COMPANION_SET(&bridge->dev, adev);
+ set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
}
return 0;
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 83a1b1ad189f..b5798ba21189 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -529,7 +529,7 @@ static struct attribute_group armv8_pmuv3_events_attr_group = {
.is_visible = armv8pmu_event_attr_is_visible,
};
-PMU_FORMAT_ATTR(event, "config:0-9");
+PMU_FORMAT_ATTR(event, "config:0-15");
static struct attribute *armv8_pmuv3_format_attrs[] = {
&format_attr_event.attr,
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index c5c45942fb6e..d849d9804011 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -522,9 +522,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
pr_err("current sp %lx does not match saved sp %lx\n",
orig_sp, stack_addr);
pr_err("Saved registers for jprobe %p\n", jp);
- show_regs(saved_regs);
+ __show_regs(saved_regs);
pr_err("Current registers\n");
- show_regs(regs);
+ __show_regs(regs);
BUG();
}
unpause_graph_tracing();
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index ae2a835898d7..659ae8094ed5 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -210,6 +210,7 @@ void __show_regs(struct pt_regs *regs)
void show_regs(struct pt_regs * regs)
{
__show_regs(regs);
+ dump_backtrace(regs, NULL);
}
static void tls_thread_flush(void)
@@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
return 0;
}
+void tls_preserve_current_state(void)
+{
+ *task_user_tls(current) = read_sysreg(tpidr_el0);
+}
+
static void tls_thread_switch(struct task_struct *next)
{
unsigned long tpidr, tpidrro;
- tpidr = read_sysreg(tpidr_el0);
- *task_user_tls(current) = tpidr;
+ tls_preserve_current_state();
tpidr = *task_user_tls(next);
tpidrro = is_compat_thread(task_thread_info(next)) ?
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index c142459a88f3..1b38c0150aec 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -623,6 +623,10 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
{
struct user_fpsimd_state *uregs;
uregs = &target->thread.fpsimd_state.user_fpsimd;
+
+ if (target == current)
+ fpsimd_preserve_current_state();
+
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
}
@@ -648,6 +652,10 @@ static int tls_get(struct task_struct *target, const struct user_regset *regset,
void *kbuf, void __user *ubuf)
{
unsigned long *tls = &target->thread.tp_value;
+
+ if (target == current)
+ tls_preserve_current_state();
+
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1);
}
@@ -894,21 +902,27 @@ static int compat_vfp_get(struct task_struct *target,
{
struct user_fpsimd_state *uregs;
compat_ulong_t fpscr;
- int ret;
+ int ret, vregs_end_pos;
uregs = &target->thread.fpsimd_state.user_fpsimd;
+ if (target == current)
+ fpsimd_preserve_current_state();
+
/*
* The VFP registers are packed into the fpsimd_state, so they all sit
* nicely together for us. We just need to create the fpscr separately.
*/
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0,
- VFP_STATE_SIZE - sizeof(compat_ulong_t));
+ vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t);
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
+ 0, vregs_end_pos);
if (count && !ret) {
fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) |
(uregs->fpcr & VFP_FPSCR_CTRL_MASK);
- ret = put_user(fpscr, (compat_ulong_t *)ubuf);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpscr,
+ vregs_end_pos, VFP_STATE_SIZE);
}
return ret;
@@ -921,20 +935,21 @@ static int compat_vfp_set(struct task_struct *target,
{
struct user_fpsimd_state *uregs;
compat_ulong_t fpscr;
- int ret;
-
- if (pos + count > VFP_STATE_SIZE)
- return -EIO;
+ int ret, vregs_end_pos;
uregs = &target->thread.fpsimd_state.user_fpsimd;
+ vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0,
- VFP_STATE_SIZE - sizeof(compat_ulong_t));
+ vregs_end_pos);
if (count && !ret) {
- ret = get_user(fpscr, (compat_ulong_t *)ubuf);
- uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK;
- uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpscr,
+ vregs_end_pos, VFP_STATE_SIZE);
+ if (!ret) {
+ uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK;
+ uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
+ }
}
fpsimd_flush_task_state(target);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 2c822ef94f34..d4b740538ad5 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -194,6 +194,9 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys)
}
name = of_flat_dt_get_machine_name();
+ if (!name)
+ return;
+
pr_info("Machine model: %s\n", name);
dump_stack_set_arch_desc("%s (DT)", name);
}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index c7b6de62f9d3..089c3747995d 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -19,10 +19,14 @@
#include <linux/compat.h>
#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/personality.h>
#include <linux/freezer.h>
+#include <linux/stddef.h>
#include <linux/uaccess.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
#include <linux/tracehook.h>
#include <linux/ratelimit.h>
@@ -41,10 +45,133 @@
struct rt_sigframe {
struct siginfo info;
struct ucontext uc;
+};
+
+struct frame_record {
u64 fp;
u64 lr;
};
+struct rt_sigframe_user_layout {
+ struct rt_sigframe __user *sigframe;
+ struct frame_record __user *next_frame;
+
+ unsigned long size; /* size of allocated sigframe data */
+ unsigned long limit; /* largest allowed size */
+
+ unsigned long fpsimd_offset;
+ unsigned long esr_offset;
+ unsigned long extra_offset;
+ unsigned long end_offset;
+};
+
+#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16)
+#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16)
+#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16)
+
+static void init_user_layout(struct rt_sigframe_user_layout *user)
+{
+ const size_t reserved_size =
+ sizeof(user->sigframe->uc.uc_mcontext.__reserved);
+
+ memset(user, 0, sizeof(*user));
+ user->size = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved);
+
+ user->limit = user->size + reserved_size;
+
+ user->limit -= TERMINATOR_SIZE;
+ user->limit -= EXTRA_CONTEXT_SIZE;
+ /* Reserve space for extension and terminator ^ */
+}
+
+static size_t sigframe_size(struct rt_sigframe_user_layout const *user)
+{
+ return round_up(max(user->size, sizeof(struct rt_sigframe)), 16);
+}
+
+/*
+ * Sanity limit on the approximate maximum size of signal frame we'll
+ * try to generate. Stack alignment padding and the frame record are
+ * not taken into account. This limit is not a guarantee and is
+ * NOT ABI.
+ */
+#define SIGFRAME_MAXSZ SZ_64K
+
+static int __sigframe_alloc(struct rt_sigframe_user_layout *user,
+ unsigned long *offset, size_t size, bool extend)
+{
+ size_t padded_size = round_up(size, 16);
+
+ if (padded_size > user->limit - user->size &&
+ !user->extra_offset &&
+ extend) {
+ int ret;
+
+ user->limit += EXTRA_CONTEXT_SIZE;
+ ret = __sigframe_alloc(user, &user->extra_offset,
+ sizeof(struct extra_context), false);
+ if (ret) {
+ user->limit -= EXTRA_CONTEXT_SIZE;
+ return ret;
+ }
+
+ /* Reserve space for the __reserved[] terminator */
+ user->size += TERMINATOR_SIZE;
+
+ /*
+ * Allow expansion up to SIGFRAME_MAXSZ, ensuring space for
+ * the terminator:
+ */
+ user->limit = SIGFRAME_MAXSZ - TERMINATOR_SIZE;
+ }
+
+ /* Still not enough space? Bad luck! */
+ if (padded_size > user->limit - user->size)
+ return -ENOMEM;
+
+ *offset = user->size;
+ user->size += padded_size;
+
+ return 0;
+}
+
+/*
+ * Allocate space for an optional record of <size> bytes in the user
+ * signal frame. The offset from the signal frame base address to the
+ * allocated block is assigned to *offset.
+ */
+static int sigframe_alloc(struct rt_sigframe_user_layout *user,
+ unsigned long *offset, size_t size)
+{
+ return __sigframe_alloc(user, offset, size, true);
+}
+
+/* Allocate the null terminator record and prevent further allocations */
+static int sigframe_alloc_end(struct rt_sigframe_user_layout *user)
+{
+ int ret;
+
+ /* Un-reserve the space reserved for the terminator: */
+ user->limit += TERMINATOR_SIZE;
+
+ ret = sigframe_alloc(user, &user->end_offset,
+ sizeof(struct _aarch64_ctx));
+ if (ret)
+ return ret;
+
+ /* Prevent further allocation: */
+ user->limit = user->size;
+ return 0;
+}
+
+static void __user *apply_user_offset(
+ struct rt_sigframe_user_layout const *user, unsigned long offset)
+{
+ char __user *base = (char __user *)user->sigframe;
+
+ return base + offset;
+}
+
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
{
struct fpsimd_state *fpsimd = &current->thread.fpsimd_state;
@@ -92,12 +219,159 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
return err ? -EFAULT : 0;
}
+struct user_ctxs {
+ struct fpsimd_context __user *fpsimd;
+};
+
+static int parse_user_sigframe(struct user_ctxs *user,
+ struct rt_sigframe __user *sf)
+{
+ struct sigcontext __user *const sc = &sf->uc.uc_mcontext;
+ struct _aarch64_ctx __user *head;
+ char __user *base = (char __user *)&sc->__reserved;
+ size_t offset = 0;
+ size_t limit = sizeof(sc->__reserved);
+ bool have_extra_context = false;
+ char const __user *const sfp = (char const __user *)sf;
+
+ user->fpsimd = NULL;
+
+ if (!IS_ALIGNED((unsigned long)base, 16))
+ goto invalid;
+
+ while (1) {
+ int err = 0;
+ u32 magic, size;
+ char const __user *userp;
+ struct extra_context const __user *extra;
+ u64 extra_datap;
+ u32 extra_size;
+ struct _aarch64_ctx const __user *end;
+ u32 end_magic, end_size;
+
+ if (limit - offset < sizeof(*head))
+ goto invalid;
+
+ if (!IS_ALIGNED(offset, 16))
+ goto invalid;
+
+ head = (struct _aarch64_ctx __user *)(base + offset);
+ __get_user_error(magic, &head->magic, err);
+ __get_user_error(size, &head->size, err);
+ if (err)
+ return err;
+
+ if (limit - offset < size)
+ goto invalid;
+
+ switch (magic) {
+ case 0:
+ if (size)
+ goto invalid;
+
+ goto done;
+
+ case FPSIMD_MAGIC:
+ if (user->fpsimd)
+ goto invalid;
+
+ if (size < sizeof(*user->fpsimd))
+ goto invalid;
+
+ user->fpsimd = (struct fpsimd_context __user *)head;
+ break;
+
+ case ESR_MAGIC:
+ /* ignore */
+ break;
+
+ case EXTRA_MAGIC:
+ if (have_extra_context)
+ goto invalid;
+
+ if (size < sizeof(*extra))
+ goto invalid;
+
+ userp = (char const __user *)head;
+
+ extra = (struct extra_context const __user *)userp;
+ userp += size;
+
+ __get_user_error(extra_datap, &extra->datap, err);
+ __get_user_error(extra_size, &extra->size, err);
+ if (err)
+ return err;
+
+ /* Check for the dummy terminator in __reserved[]: */
+
+ if (limit - offset - size < TERMINATOR_SIZE)
+ goto invalid;
+
+ end = (struct _aarch64_ctx const __user *)userp;
+ userp += TERMINATOR_SIZE;
+
+ __get_user_error(end_magic, &end->magic, err);
+ __get_user_error(end_size, &end->size, err);
+ if (err)
+ return err;
+
+ if (end_magic || end_size)
+ goto invalid;
+
+ /* Prevent looping/repeated parsing of extra_context */
+ have_extra_context = true;
+
+ base = (__force void __user *)extra_datap;
+ if (!IS_ALIGNED((unsigned long)base, 16))
+ goto invalid;
+
+ if (!IS_ALIGNED(extra_size, 16))
+ goto invalid;
+
+ if (base != userp)
+ goto invalid;
+
+ /* Reject "unreasonably large" frames: */
+ if (extra_size > sfp + SIGFRAME_MAXSZ - userp)
+ goto invalid;
+
+ /*
+ * Ignore trailing terminator in __reserved[]
+ * and start parsing extra data:
+ */
+ offset = 0;
+ limit = extra_size;
+ continue;
+
+ default:
+ goto invalid;
+ }
+
+ if (size < sizeof(*head))
+ goto invalid;
+
+ if (limit - offset < size)
+ goto invalid;
+
+ offset += size;
+ }
+
+done:
+ if (!user->fpsimd)
+ goto invalid;
+
+ return 0;
+
+invalid:
+ return -EINVAL;
+}
+
static int restore_sigframe(struct pt_regs *regs,
struct rt_sigframe __user *sf)
{
sigset_t set;
int i, err;
- void *aux = sf->uc.uc_mcontext.__reserved;
+ struct user_ctxs user;
err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
if (err == 0)
@@ -116,12 +390,11 @@ static int restore_sigframe(struct pt_regs *regs,
regs->syscallno = ~0UL;
err |= !valid_user_regs(&regs->user_regs, current);
+ if (err == 0)
+ err = parse_user_sigframe(&user, sf);
- if (err == 0) {
- struct fpsimd_context *fpsimd_ctx =
- container_of(aux, struct fpsimd_context, head);
- err |= restore_fpsimd_context(fpsimd_ctx);
- }
+ if (err == 0)
+ err = restore_fpsimd_context(user.fpsimd);
return err;
}
@@ -162,16 +435,37 @@ badframe:
return 0;
}
-static int setup_sigframe(struct rt_sigframe __user *sf,
+/* Determine the layout of optional records in the signal frame */
+static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
+{
+ int err;
+
+ err = sigframe_alloc(user, &user->fpsimd_offset,
+ sizeof(struct fpsimd_context));
+ if (err)
+ return err;
+
+ /* fault information, if valid */
+ if (current->thread.fault_code) {
+ err = sigframe_alloc(user, &user->esr_offset,
+ sizeof(struct esr_context));
+ if (err)
+ return err;
+ }
+
+ return sigframe_alloc_end(user);
+}
+
+
+static int setup_sigframe(struct rt_sigframe_user_layout *user,
struct pt_regs *regs, sigset_t *set)
{
int i, err = 0;
- void *aux = sf->uc.uc_mcontext.__reserved;
- struct _aarch64_ctx *end;
+ struct rt_sigframe __user *sf = user->sigframe;
/* set up the stack frame for unwinding */
- __put_user_error(regs->regs[29], &sf->fp, err);
- __put_user_error(regs->regs[30], &sf->lr, err);
+ __put_user_error(regs->regs[29], &user->next_frame->fp, err);
+ __put_user_error(regs->regs[30], &user->next_frame->lr, err);
for (i = 0; i < 31; i++)
__put_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i],
@@ -185,58 +479,103 @@ static int setup_sigframe(struct rt_sigframe __user *sf,
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
if (err == 0) {
- struct fpsimd_context *fpsimd_ctx =
- container_of(aux, struct fpsimd_context, head);
+ struct fpsimd_context __user *fpsimd_ctx =
+ apply_user_offset(user, user->fpsimd_offset);
err |= preserve_fpsimd_context(fpsimd_ctx);
- aux += sizeof(*fpsimd_ctx);
}
/* fault information, if valid */
- if (current->thread.fault_code) {
- struct esr_context *esr_ctx =
- container_of(aux, struct esr_context, head);
+ if (err == 0 && user->esr_offset) {
+ struct esr_context __user *esr_ctx =
+ apply_user_offset(user, user->esr_offset);
+
__put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err);
__put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err);
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
- aux += sizeof(*esr_ctx);
+ }
+
+ if (err == 0 && user->extra_offset) {
+ char __user *sfp = (char __user *)user->sigframe;
+ char __user *userp =
+ apply_user_offset(user, user->extra_offset);
+
+ struct extra_context __user *extra;
+ struct _aarch64_ctx __user *end;
+ u64 extra_datap;
+ u32 extra_size;
+
+ extra = (struct extra_context __user *)userp;
+ userp += EXTRA_CONTEXT_SIZE;
+
+ end = (struct _aarch64_ctx __user *)userp;
+ userp += TERMINATOR_SIZE;
+
+ /*
+ * extra_datap is just written to the signal frame.
+ * The value gets cast back to a void __user *
+ * during sigreturn.
+ */
+ extra_datap = (__force u64)userp;
+ extra_size = sfp + round_up(user->size, 16) - userp;
+
+ __put_user_error(EXTRA_MAGIC, &extra->head.magic, err);
+ __put_user_error(EXTRA_CONTEXT_SIZE, &extra->head.size, err);
+ __put_user_error(extra_datap, &extra->datap, err);
+ __put_user_error(extra_size, &extra->size, err);
+
+ /* Add the terminator */
+ __put_user_error(0, &end->magic, err);
+ __put_user_error(0, &end->size, err);
}
/* set the "end" magic */
- end = aux;
- __put_user_error(0, &end->magic, err);
- __put_user_error(0, &end->size, err);
+ if (err == 0) {
+ struct _aarch64_ctx __user *end =
+ apply_user_offset(user, user->end_offset);
+
+ __put_user_error(0, &end->magic, err);
+ __put_user_error(0, &end->size, err);
+ }
return err;
}
-static struct rt_sigframe __user *get_sigframe(struct ksignal *ksig,
- struct pt_regs *regs)
+static int get_sigframe(struct rt_sigframe_user_layout *user,
+ struct ksignal *ksig, struct pt_regs *regs)
{
unsigned long sp, sp_top;
- struct rt_sigframe __user *frame;
+ int err;
+
+ init_user_layout(user);
+ err = setup_sigframe_layout(user);
+ if (err)
+ return err;
sp = sp_top = sigsp(regs->sp, ksig);
- sp = (sp - sizeof(struct rt_sigframe)) & ~15;
- frame = (struct rt_sigframe __user *)sp;
+ sp = round_down(sp - sizeof(struct frame_record), 16);
+ user->next_frame = (struct frame_record __user *)sp;
+
+ sp = round_down(sp, 16) - sigframe_size(user);
+ user->sigframe = (struct rt_sigframe __user *)sp;
/*
* Check that we can actually write to the signal frame.
*/
- if (!access_ok(VERIFY_WRITE, frame, sp_top - sp))
- frame = NULL;
+ if (!access_ok(VERIFY_WRITE, user->sigframe, sp_top - sp))
+ return -EFAULT;
- return frame;
+ return 0;
}
static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
- void __user *frame, int usig)
+ struct rt_sigframe_user_layout *user, int usig)
{
__sigrestore_t sigtramp;
regs->regs[0] = usig;
- regs->sp = (unsigned long)frame;
- regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp);
+ regs->sp = (unsigned long)user->sigframe;
+ regs->regs[29] = (unsigned long)&user->next_frame->fp;
regs->pc = (unsigned long)ka->sa.sa_handler;
if (ka->sa.sa_flags & SA_RESTORER)
@@ -250,20 +589,22 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs)
{
+ struct rt_sigframe_user_layout user;
struct rt_sigframe __user *frame;
int err = 0;
- frame = get_sigframe(ksig, regs);
- if (!frame)
+ if (get_sigframe(&user, ksig, regs))
return 1;
+ frame = user.sigframe;
+
__put_user_error(0, &frame->uc.uc_flags, err);
__put_user_error(NULL, &frame->uc.uc_link, err);
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
- err |= setup_sigframe(frame, regs, set);
+ err |= setup_sigframe(&user, regs, set);
if (err == 0) {
- setup_return(regs, &ksig->ka, frame, usig);
+ setup_return(regs, &ksig->ka, &user, usig);
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
err |= copy_siginfo_to_user(&frame->info, &ksig->info);
regs->regs[1] = (unsigned long)&frame->info;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index feac80c22f61..09d37d66b630 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -210,6 +210,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
put_task_stack(tsk);
}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
void save_stack_trace(struct stack_trace *trace)
{
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 0805b44f986a..c7c7088097be 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -140,7 +140,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
}
}
-static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
+void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
struct stackframe frame;
unsigned long irq_stack_ptr;
@@ -344,22 +344,24 @@ static int call_undef_hook(struct pt_regs *regs)
if (compat_thumb_mode(regs)) {
/* 16-bit Thumb instruction */
- if (get_user(instr, (u16 __user *)pc))
+ __le16 instr_le;
+ if (get_user(instr_le, (__le16 __user *)pc))
goto exit;
- instr = le16_to_cpu(instr);
+ instr = le16_to_cpu(instr_le);
if (aarch32_insn_is_wide(instr)) {
u32 instr2;
- if (get_user(instr2, (u16 __user *)(pc + 2)))
+ if (get_user(instr_le, (__le16 __user *)(pc + 2)))
goto exit;
- instr2 = le16_to_cpu(instr2);
+ instr2 = le16_to_cpu(instr_le);
instr = (instr << 16) | instr2;
}
} else {
/* 32-bit ARM instruction */
- if (get_user(instr, (u32 __user *)pc))
+ __le32 instr_le;
+ if (get_user(instr_le, (__le32 __user *)pc))
goto exit;
- instr = le32_to_cpu(instr);
+ instr = le32_to_cpu(instr_le);
}
raw_spin_lock_irqsave(&undef_lock, flags);
@@ -728,8 +730,6 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr)
break;
case BUG_TRAP_TYPE_WARN:
- /* Ideally, report_bug() should backtrace for us... but no. */
- dump_backtrace(regs, NULL);
break;
default:
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 7492d9009610..e8f759f764f2 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -37,7 +37,7 @@
#include <asm/vdso.h>
#include <asm/vdso_datapage.h>
-extern char vdso_start, vdso_end;
+extern char vdso_start[], vdso_end[];
static unsigned long vdso_pages __ro_after_init;
/*
@@ -125,14 +125,14 @@ static int __init vdso_init(void)
struct page **vdso_pagelist;
unsigned long pfn;
- if (memcmp(&vdso_start, "\177ELF", 4)) {
+ if (memcmp(vdso_start, "\177ELF", 4)) {
pr_err("vDSO is not a valid ELF object!\n");
return -EINVAL;
}
- vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
+ vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
- vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data);
+ vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data);
/* Allocate the vDSO pagelist, plus a page for the data. */
vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
@@ -145,7 +145,7 @@ static int __init vdso_init(void)
/* Grab the vDSO code pages. */
- pfn = sym_to_pfn(&vdso_start);
+ pfn = sym_to_pfn(vdso_start);
for (i = 0; i < vdso_pages; i++)
vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index b37446a8ffdb..5c7f657dd207 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -390,6 +390,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_PMU_V3_CTRL:
ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_set_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -407,6 +410,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_PMU_V3_CTRL:
ret = kvm_arm_pmu_v3_get_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_get_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -424,6 +430,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_PMU_V3_CTRL:
ret = kvm_arm_pmu_v3_has_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_TIMER_CTRL:
+ ret = kvm_arm_timer_has_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index fa1b18e364fc..17d8a1677a0b 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -89,6 +89,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
vcpu->stat.wfi_exit_stat++;
kvm_vcpu_block(vcpu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index aede1658aeda..945e79c641c4 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -350,6 +350,20 @@ again:
}
}
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ exit_code == ARM_EXCEPTION_TRAP &&
+ (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
+ int ret = __vgic_v3_perform_cpuif_access(vcpu);
+
+ if (ret == 1) {
+ __skip_instr(vcpu);
+ goto again;
+ }
+
+ /* 0 falls through to be handled out of EL2 */
+ }
+
fp_enabled = __fpsimd_enabled();
__sysreg_save_guest_state(guest_ctxt);
@@ -422,6 +436,7 @@ void __hyp_text __noreturn __hyp_panic(void)
vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ __timer_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
__sysreg_restore_host_state(host_ctxt);
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 561badf93de8..3256b9228e75 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -46,16 +46,6 @@ static const struct kvm_regs default_regs_reset32 = {
COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT),
};
-static const struct kvm_irq_level default_ptimer_irq = {
- .irq = 30,
- .level = 1,
-};
-
-static const struct kvm_irq_level default_vtimer_irq = {
- .irq = 27,
- .level = 1,
-};
-
static bool cpu_has_32bit_el1(void)
{
u64 pfr0;
@@ -108,8 +98,6 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
*/
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
- const struct kvm_irq_level *cpu_vtimer_irq;
- const struct kvm_irq_level *cpu_ptimer_irq;
const struct kvm_regs *cpu_reset;
switch (vcpu->arch.target) {
@@ -122,8 +110,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
cpu_reset = &default_regs_reset;
}
- cpu_vtimer_irq = &default_vtimer_irq;
- cpu_ptimer_irq = &default_ptimer_irq;
break;
}
@@ -137,5 +123,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_pmu_vcpu_reset(vcpu);
/* Reset timer */
- return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq);
+ return kvm_timer_vcpu_reset(vcpu);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 0fe27024a2e1..77862881ae86 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -56,7 +56,8 @@
*/
static bool read_from_write_only(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *params)
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *r)
{
WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n");
print_sys_reg_instr(params);
@@ -64,6 +65,16 @@ static bool read_from_write_only(struct kvm_vcpu *vcpu,
return false;
}
+static bool write_to_read_only(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *r)
+{
+ WARN_ONCE(1, "Unexpected sys_reg write to read-only register\n");
+ print_sys_reg_instr(params);
+ kvm_inject_undefined(vcpu);
+ return false;
+}
+
/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
static u32 cache_levels;
@@ -93,7 +104,7 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (!p->is_write)
- return read_from_write_only(vcpu, p);
+ return read_from_write_only(vcpu, p, r);
kvm_set_way_flush(vcpu);
return true;
@@ -135,7 +146,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (!p->is_write)
- return read_from_write_only(vcpu, p);
+ return read_from_write_only(vcpu, p, r);
vgic_v3_dispatch_sgi(vcpu, p->regval);
@@ -773,7 +784,7 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return trap_raz_wi(vcpu, p, r);
if (!p->is_write)
- return read_from_write_only(vcpu, p);
+ return read_from_write_only(vcpu, p, r);
if (pmu_write_swinc_el0_disabled(vcpu))
return false;
@@ -953,7 +964,15 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
+ { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only },
+ { SYS_DESC(SYS_ICC_HPPIR0_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only },
+ { SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
+ { SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only },
+ { SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
{ SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c
index 6260b69e5622..116786d2e8e8 100644
--- a/arch/arm64/kvm/vgic-sys-reg-v3.c
+++ b/arch/arm64/kvm/vgic-sys-reg-v3.c
@@ -268,36 +268,21 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return true;
}
static const struct sys_reg_desc gic_v3_icc_reg_descs[] = {
- /* ICC_PMR_EL1 */
- { Op0(3), Op1(0), CRn(4), CRm(6), Op2(0), access_gic_pmr },
- /* ICC_BPR0_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(8), Op2(3), access_gic_bpr0 },
- /* ICC_AP0R0_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(8), Op2(4), access_gic_ap0r },
- /* ICC_AP0R1_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(8), Op2(5), access_gic_ap0r },
- /* ICC_AP0R2_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(8), Op2(6), access_gic_ap0r },
- /* ICC_AP0R3_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(8), Op2(7), access_gic_ap0r },
- /* ICC_AP1R0_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(9), Op2(0), access_gic_ap1r },
- /* ICC_AP1R1_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(9), Op2(1), access_gic_ap1r },
- /* ICC_AP1R2_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(9), Op2(2), access_gic_ap1r },
- /* ICC_AP1R3_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(9), Op2(3), access_gic_ap1r },
- /* ICC_BPR1_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(12), Op2(3), access_gic_bpr1 },
- /* ICC_CTLR_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(12), Op2(4), access_gic_ctlr },
- /* ICC_SRE_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(12), Op2(5), access_gic_sre },
- /* ICC_IGRPEN0_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(12), Op2(6), access_gic_grpen0 },
- /* ICC_GRPEN1_EL1 */
- { Op0(3), Op1(0), CRn(12), CRm(12), Op2(7), access_gic_grpen1 },
+ { SYS_DESC(SYS_ICC_PMR_EL1), access_gic_pmr },
+ { SYS_DESC(SYS_ICC_BPR0_EL1), access_gic_bpr0 },
+ { SYS_DESC(SYS_ICC_AP0R0_EL1), access_gic_ap0r },
+ { SYS_DESC(SYS_ICC_AP0R1_EL1), access_gic_ap0r },
+ { SYS_DESC(SYS_ICC_AP0R2_EL1), access_gic_ap0r },
+ { SYS_DESC(SYS_ICC_AP0R3_EL1), access_gic_ap0r },
+ { SYS_DESC(SYS_ICC_AP1R0_EL1), access_gic_ap1r },
+ { SYS_DESC(SYS_ICC_AP1R1_EL1), access_gic_ap1r },
+ { SYS_DESC(SYS_ICC_AP1R2_EL1), access_gic_ap1r },
+ { SYS_DESC(SYS_ICC_AP1R3_EL1), access_gic_ap1r },
+ { SYS_DESC(SYS_ICC_BPR1_EL1), access_gic_bpr1 },
+ { SYS_DESC(SYS_ICC_CTLR_EL1), access_gic_ctlr },
+ { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
+ { SYS_DESC(SYS_ICC_IGRPEN0_EL1), access_gic_grpen0 },
+ { SYS_DESC(SYS_ICC_IGRPEN1_EL1), access_gic_grpen1 },
};
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3216e098c058..3e340b625436 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -95,11 +95,6 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
unsigned long attrs)
{
- if (dev == NULL) {
- WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
- return NULL;
- }
-
if (IS_ENABLED(CONFIG_ZONE_DMA) &&
dev->coherent_dma_mask <= DMA_BIT_MASK(32))
flags |= GFP_DMA;
@@ -128,10 +123,6 @@ static void __dma_free_coherent(struct device *dev, size_t size,
bool freed;
phys_addr_t paddr = dma_to_phys(dev, dma_handle);
- if (dev == NULL) {
- WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
- return;
- }
freed = dma_release_from_contiguous(dev,
phys_to_page(paddr),
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 37b95dff0b07..c7861c9864e6 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -31,6 +31,7 @@
#include <linux/highmem.h>
#include <linux/perf_event.h>
#include <linux/preempt.h>
+#include <linux/hugetlb.h>
#include <asm/bug.h>
#include <asm/cpufeature.h>
@@ -42,6 +43,8 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+#include <acpi/ghes.h>
+
struct fault_info {
int (*fn)(unsigned long addr, unsigned int esr,
struct pt_regs *regs);
@@ -80,18 +83,35 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
#endif
/*
- * Dump out the page tables associated with 'addr' in mm 'mm'.
+ * Dump out the page tables associated with 'addr' in the currently active mm.
*/
-void show_pte(struct mm_struct *mm, unsigned long addr)
+void show_pte(unsigned long addr)
{
+ struct mm_struct *mm;
pgd_t *pgd;
- if (!mm)
+ if (addr < TASK_SIZE) {
+ /* TTBR0 */
+ mm = current->active_mm;
+ if (mm == &init_mm) {
+ pr_alert("[%016lx] user address but active_mm is swapper\n",
+ addr);
+ return;
+ }
+ } else if (addr >= VA_START) {
+ /* TTBR1 */
mm = &init_mm;
+ } else {
+ pr_alert("[%016lx] address between user and kernel address ranges\n",
+ addr);
+ return;
+ }
- pr_alert("pgd = %p\n", mm->pgd);
+ pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
+ mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
+ VA_BITS, mm->pgd);
pgd = pgd_offset(mm, addr);
- pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd));
+ pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
do {
pud_t *pud;
@@ -196,8 +216,8 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs,
/*
* The kernel tried to access some page that wasn't present.
*/
-static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
- unsigned int esr, struct pt_regs *regs)
+static void __do_kernel_fault(unsigned long addr, unsigned int esr,
+ struct pt_regs *regs)
{
const char *msg;
@@ -227,7 +247,7 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg,
addr);
- show_pte(mm, addr);
+ show_pte(addr);
die("Oops", regs, esr);
bust_spinlocks(0);
do_exit(SIGKILL);
@@ -239,18 +259,20 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
*/
static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
unsigned int esr, unsigned int sig, int code,
- struct pt_regs *regs)
+ struct pt_regs *regs, int fault)
{
struct siginfo si;
const struct fault_info *inf;
+ unsigned int lsb = 0;
if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) {
inf = esr_to_fault_info(esr);
- pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
+ pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x",
tsk->comm, task_pid_nr(tsk), inf->name, sig,
addr, esr);
- show_pte(tsk->mm, addr);
- show_regs(regs);
+ print_vma_addr(KERN_CONT ", in ", regs->pc);
+ pr_cont("\n");
+ __show_regs(regs);
}
tsk->thread.fault_address = addr;
@@ -259,13 +281,23 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
si.si_errno = 0;
si.si_code = code;
si.si_addr = (void __user *)addr;
+ /*
+ * Either small page or large page may be poisoned.
+ * In other words, VM_FAULT_HWPOISON_LARGE and
+ * VM_FAULT_HWPOISON are mutually exclusive.
+ */
+ if (fault & VM_FAULT_HWPOISON_LARGE)
+ lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+ else if (fault & VM_FAULT_HWPOISON)
+ lsb = PAGE_SHIFT;
+ si.si_addr_lsb = lsb;
+
force_sig_info(sig, &si, tsk);
}
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->active_mm;
const struct fault_info *inf;
/*
@@ -274,9 +306,9 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
*/
if (user_mode(regs)) {
inf = esr_to_fault_info(esr);
- __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs);
+ __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs, 0);
} else
- __do_kernel_fault(mm, addr, esr, regs);
+ __do_kernel_fault(addr, esr, regs);
}
#define VM_FAULT_BADMAP 0x010000
@@ -329,7 +361,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
{
struct task_struct *tsk;
struct mm_struct *mm;
- int fault, sig, code;
+ int fault, sig, code, major = 0;
unsigned long vm_flags = VM_READ | VM_WRITE;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -368,6 +400,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
die("Accessing user space memory outside uaccess.h routines", regs, esr);
}
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
@@ -391,24 +425,42 @@ retry:
}
fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
+ major |= fault & VM_FAULT_MAJOR;
- /*
- * If we need to retry but a fatal signal is pending, handle the
- * signal first. We do not need to release the mmap_sem because it
- * would already be released in __lock_page_or_retry in mm/filemap.c.
- */
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
- return 0;
+ if (fault & VM_FAULT_RETRY) {
+ /*
+ * If we need to retry but a fatal signal is pending,
+ * handle the signal first. We do not need to release
+ * the mmap_sem because it would already be released
+ * in __lock_page_or_retry in mm/filemap.c.
+ */
+ if (fatal_signal_pending(current))
+ return 0;
+
+ /*
+ * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
+ * starvation.
+ */
+ if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
+ mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ mm_flags |= FAULT_FLAG_TRIED;
+ goto retry;
+ }
+ }
+ up_read(&mm->mmap_sem);
/*
- * Major/minor page fault accounting is only done on the initial
- * attempt. If we go through a retry, it is extremely likely that the
- * page will be found in page cache at that point.
+ * Handle the "normal" (no error) case first.
*/
-
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
- if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
+ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
+ VM_FAULT_BADACCESS)))) {
+ /*
+ * Major/minor page fault accounting is only done
+ * once. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at
+ * that point.
+ */
+ if (major) {
tsk->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
addr);
@@ -417,25 +469,9 @@ retry:
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
addr);
}
- if (fault & VM_FAULT_RETRY) {
- /*
- * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
- * starvation.
- */
- mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
- mm_flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
- }
-
- up_read(&mm->mmap_sem);
- /*
- * Handle the "normal" case first - VM_FAULT_MAJOR
- */
- if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
- VM_FAULT_BADACCESS))))
return 0;
+ }
/*
* If we are in kernel mode at this point, we have no context to
@@ -461,6 +497,9 @@ retry:
*/
sig = SIGBUS;
code = BUS_ADRERR;
+ } else if (fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) {
+ sig = SIGBUS;
+ code = BUS_MCEERR_AR;
} else {
/*
* Something tried to access memory that isn't in our memory
@@ -471,11 +510,11 @@ retry:
SEGV_ACCERR : SEGV_MAPERR;
}
- __do_user_fault(tsk, addr, esr, sig, code, regs);
+ __do_user_fault(tsk, addr, esr, sig, code, regs, fault);
return 0;
no_context:
- __do_kernel_fault(mm, addr, esr, regs);
+ __do_kernel_fault(addr, esr, regs);
return 0;
}
@@ -522,6 +561,47 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
return 1;
}
+/*
+ * This abort handler deals with Synchronous External Abort.
+ * It calls notifiers, and then returns "fault".
+ */
+static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+{
+ struct siginfo info;
+ const struct fault_info *inf;
+ int ret = 0;
+
+ inf = esr_to_fault_info(esr);
+ pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
+ inf->name, esr, addr);
+
+ /*
+ * Synchronous aborts may interrupt code which had interrupts masked.
+ * Before calling out into the wider kernel tell the interested
+ * subsystems.
+ */
+ if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
+ if (interrupts_enabled(regs))
+ nmi_enter();
+
+ ret = ghes_notify_sea();
+
+ if (interrupts_enabled(regs))
+ nmi_exit();
+ }
+
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = 0;
+ if (esr & ESR_ELx_FnV)
+ info.si_addr = NULL;
+ else
+ info.si_addr = (void __user *)addr;
+ arm64_notify_die("", regs, &info, esr);
+
+ return ret;
+}
+
static const struct fault_info fault_info[] = {
{ do_bad, SIGBUS, 0, "ttbr address size fault" },
{ do_bad, SIGBUS, 0, "level 1 address size fault" },
@@ -539,22 +619,22 @@ static const struct fault_info fault_info[] = {
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
- { do_bad, SIGBUS, 0, "synchronous external abort" },
+ { do_sea, SIGBUS, 0, "synchronous external abort" },
{ do_bad, SIGBUS, 0, "unknown 17" },
{ do_bad, SIGBUS, 0, "unknown 18" },
{ do_bad, SIGBUS, 0, "unknown 19" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error" },
+ { do_sea, SIGBUS, 0, "level 0 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 1 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 2 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 3 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "synchronous parity or ECC error" },
{ do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
{ do_bad, SIGBUS, 0, "unknown 27" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 0 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 1 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 2 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 3 synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "unknown 32" },
{ do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" },
{ do_bad, SIGBUS, 0, "unknown 34" },
@@ -590,6 +670,23 @@ static const struct fault_info fault_info[] = {
};
/*
+ * Handle Synchronous External Aborts that occur in a guest kernel.
+ *
+ * The return value will be zero if the SEA was successfully handled
+ * and non-zero if there was an error processing the error or there was
+ * no error to process.
+ */
+int handle_guest_sea(phys_addr_t addr, unsigned int esr)
+{
+ int ret = -ENOENT;
+
+ if (IS_ENABLED(CONFIG_ACPI_APEI_SEA))
+ ret = ghes_notify_sea();
+
+ return ret;
+}
+
+/*
* Dispatch a data abort to the relevant handler.
*/
asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 7514a000e361..69b8200b1cfd 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
- pmd_t *pmd = NULL;
- pte_t *pte = NULL;
+ pmd_t *pmd;
pgd = pgd_offset(mm, addr);
pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
if (!pgd_present(*pgd))
return NULL;
+
pud = pud_offset(pgd, addr);
- if (!pud_present(*pud))
+ if (pud_none(*pud))
return NULL;
-
- if (pud_huge(*pud))
+ /* swap or huge page */
+ if (!pud_present(*pud) || pud_huge(*pud))
return (pte_t *)pud;
+ /* table; check the next level */
+
pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd))
+ if (pmd_none(*pmd))
return NULL;
-
- if (pte_cont(pmd_pte(*pmd))) {
- pmd = pmd_offset(
- pud, (addr & CONT_PMD_MASK));
- return (pte_t *)pmd;
- }
- if (pmd_huge(*pmd))
+ if (!pmd_present(*pmd) || pmd_huge(*pmd))
return (pte_t *)pmd;
- pte = pte_offset_kernel(pmd, addr);
- if (pte_present(*pte) && pte_cont(*pte)) {
- pte = pte_offset_kernel(
- pmd, (addr & CONT_PTE_MASK));
- return pte;
- }
+
return NULL;
}
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 7b0d55756eb1..adc208c2ae9c 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -18,6 +18,7 @@
#include <linux/elf.h>
#include <linux/fs.h>
+#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/export.h>
@@ -103,12 +104,18 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
*/
int valid_phys_addr_range(phys_addr_t addr, size_t size)
{
- if (addr < PHYS_OFFSET)
- return 0;
- if (addr + size > __pa(high_memory - 1) + 1)
- return 0;
-
- return 1;
+ /*
+ * Check whether addr is covered by a memory region without the
+ * MEMBLOCK_NOMAP attribute, and whether that region covers the
+ * entire range. In theory, this could lead to false negatives
+ * if the range is covered by distinct but adjacent memory regions
+ * that only differ in other attributes. However, few of such
+ * attributes have been defined, and it is debatable whether it
+ * follows that /dev/mem read() calls should be able traverse
+ * such boundaries.
+ */
+ return memblock_is_region_memory(addr, size) &&
+ memblock_is_map_memory(addr);
}
/*
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0c429ec6fde8..23c2d89a362e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -31,6 +31,7 @@
#include <linux/fs.h>
#include <linux/io.h>
#include <linux/mm.h>
+#include <linux/vmalloc.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c870d6f01ac2..f32144b2e07f 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -70,7 +70,8 @@ struct jit_ctx {
int idx;
int epilogue_offset;
int *offset;
- u32 *image;
+ __le32 *image;
+ u32 stack_size;
};
static inline void emit(const u32 insn, struct jit_ctx *ctx)
@@ -130,7 +131,7 @@ static inline int bpf2a64_offset(int bpf_to, int bpf_from,
static void jit_fill_hole(void *area, unsigned int size)
{
- u32 *ptr;
+ __le32 *ptr;
/* We are guaranteed to have aligned memory. */
for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
@@ -147,16 +148,11 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
/* Stack must be multiples of 16B */
#define STACK_ALIGN(sz) (((sz) + 15) & ~15)
-#define _STACK_SIZE \
- (MAX_BPF_STACK \
- + 4 /* extra for skb_copy_bits buffer */)
-
-#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
-
#define PROLOGUE_OFFSET 8
static int build_prologue(struct jit_ctx *ctx)
{
+ const struct bpf_prog *prog = ctx->prog;
const u8 r6 = bpf2a64[BPF_REG_6];
const u8 r7 = bpf2a64[BPF_REG_7];
const u8 r8 = bpf2a64[BPF_REG_8];
@@ -178,9 +174,9 @@ static int build_prologue(struct jit_ctx *ctx)
* | |
* | ... | BPF prog stack
* | |
- * +-----+ <= (BPF_FP - MAX_BPF_STACK)
+ * +-----+ <= (BPF_FP - prog->aux->stack_depth)
* |RSVD | JIT scratchpad
- * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE)
+ * current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size)
* | |
* | ... | Function call stack
* | |
@@ -204,8 +200,12 @@ static int build_prologue(struct jit_ctx *ctx)
/* Initialize tail_call_cnt */
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
+ /* 4 byte extra for skb_copy_bits buffer */
+ ctx->stack_size = prog->aux->stack_depth + 4;
+ ctx->stack_size = STACK_ALIGN(ctx->stack_size);
+
/* Set up function call stack */
- emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
+ emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
cur_offset = ctx->idx - idx0;
if (cur_offset != PROLOGUE_OFFSET) {
@@ -290,7 +290,7 @@ static void build_epilogue(struct jit_ctx *ctx)
const u8 fp = bpf2a64[BPF_REG_FP];
/* We're done with BPF stack */
- emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
+ emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
/* Restore fs (x25) and x26 */
emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
@@ -589,7 +589,7 @@ emit_cond_jmp:
break;
}
/* tail call */
- case BPF_JMP | BPF_CALL | BPF_X:
+ case BPF_JMP | BPF_TAIL_CALL:
if (emit_bpf_tail_call(ctx))
return -EFAULT;
break;
@@ -735,7 +735,7 @@ emit_cond_jmp:
return -EINVAL;
}
emit_a64_mov_i64(r3, size, ctx);
- emit(A64_SUB_I(1, r4, fp, STACK_SIZE), ctx);
+ emit(A64_SUB_I(1, r4, fp, ctx->stack_size), ctx);
emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
emit(A64_BLR(r5), ctx);
emit(A64_MOV(1, r0, A64_R(0)), ctx);
@@ -874,7 +874,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* 2. Now, the actual pass. */
- ctx.image = (u32 *)image_ptr;
+ ctx.image = (__le32 *)image_ptr;
ctx.idx = 0;
build_prologue(&ctx);
@@ -903,6 +903,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bpf_jit_binary_lock_ro(header);
prog->bpf_func = (void *)ctx.image;
prog->jited = 1;
+ prog->jited_len = image_size;
out_off:
kfree(ctx.offset);
diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig
index ba4267f658af..3ce77f07208a 100644
--- a/arch/blackfin/configs/BF609-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF609-EZKIT_defconfig
@@ -105,7 +105,7 @@ CONFIG_SPI=y
CONFIG_SPI_ADI_V3=y
CONFIG_GPIOLIB=y
CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_MCP23S08=y
+CONFIG_PINCTRL_MCP23S08=y
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
CONFIG_BFIN_WDT=y
diff --git a/arch/blackfin/mach-bf527/boards/tll6527m.c b/arch/blackfin/mach-bf527/boards/tll6527m.c
index c1acce4c2e45..ce5488e8226b 100644
--- a/arch/blackfin/mach-bf527/boards/tll6527m.c
+++ b/arch/blackfin/mach-bf527/boards/tll6527m.c
@@ -348,14 +348,14 @@ static struct platform_device bfin_i2s = {
};
#endif
-#if IS_ENABLED(CONFIG_GPIO_MCP23S08)
+#if IS_ENABLED(CONFIG_PINCTRL_MCP23S08)
#include <linux/spi/mcp23s08.h>
static const struct mcp23s08_platform_data bfin_mcp23s08_sys_gpio_info = {
- .chip[0].is_present = true,
+ .spi_present_mask = BIT(0),
.base = 0x30,
};
static const struct mcp23s08_platform_data bfin_mcp23s08_usr_gpio_info = {
- .chip[2].is_present = true,
+ .spi_present_mask = BIT(2),
.base = 0x38,
};
#endif
@@ -423,7 +423,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
.mode = SPI_CPHA | SPI_CPOL,
},
#endif
-#if IS_ENABLED(CONFIG_GPIO_MCP23S08)
+#if IS_ENABLED(CONFIG_PINCTRL_MCP23S08)
{
.modalias = "mcp23s08",
.platform_data = &bfin_mcp23s08_sys_gpio_info,
diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c
index 9231e5a72b93..51157a255824 100644
--- a/arch/blackfin/mach-bf609/boards/ezkit.c
+++ b/arch/blackfin/mach-bf609/boards/ezkit.c
@@ -1887,7 +1887,7 @@ static struct platform_device i2c_bfin_twi1_device = {
};
#endif
-#if IS_ENABLED(CONFIG_GPIO_MCP23S08)
+#if IS_ENABLED(CONFIG_PINCTRL_MCP23S08)
#include <linux/spi/mcp23s08.h>
static const struct mcp23s08_platform_data bfin_mcp23s08_soft_switch0 = {
.base = 120,
@@ -1929,7 +1929,7 @@ static struct i2c_board_info __initdata bfin_i2c_board_info0[] = {
I2C_BOARD_INFO("ssm2602", 0x1b),
},
#endif
-#if IS_ENABLED(CONFIG_GPIO_MCP23S08)
+#if IS_ENABLED(CONFIG_PINCTRL_MCP23S08)
{
I2C_BOARD_INFO("mcp23017", 0x21),
.platform_data = (void *)&bfin_mcp23s08_soft_switch0
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index 1ccf45657472..f1e3b20dce9f 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -98,5 +98,9 @@
#define SO_COOKIE 57
+#define SCM_TIMESTAMPING_PKTINFO 58
+
+#define SO_PEERGROUPS 59
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 2c3f4b48042a..5dd5c5d0d642 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -107,4 +107,8 @@
#define SO_COOKIE 57
+#define SCM_TIMESTAMPING_PKTINFO 58
+
+#define SO_PEERGROUPS 59
+
#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index ae6548d29a18..f8f7b47e247f 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -98,4 +98,8 @@
#define SO_COOKIE 57
+#define SCM_TIMESTAMPING_PKTINFO 58
+
+#define SO_PEERGROUPS 59
+
#endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 2828ecde133d..45bcd1cfcec0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -364,6 +364,7 @@ config MACH_INGENIC
select SYS_SUPPORTS_ZBOOT_UART16550
select DMA_NONCOHERENT
select IRQ_MIPS_CPU
+ select PINCTRL
select GPIOLIB
select COMMON_CLK
select GENERIC_IRQ_CHIP
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index 1652d8d60b1e..fd138d9978c1 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -29,18 +29,30 @@
&uart0 {
status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_uart0>;
};
&uart1 {
status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_uart1>;
};
&uart3 {
status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_uart2>;
};
&uart4 {
status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_uart4>;
};
&nemc {
@@ -61,6 +73,13 @@
ingenic,nemc-tAW = <15>;
ingenic,nemc-tSTRV = <100>;
+ /*
+ * Only CLE/ALE are needed for the devices that are connected, rather
+ * than the full address line set.
+ */
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_nemc>;
+
nand@1 {
reg = <1>;
@@ -69,6 +88,9 @@
nand-ecc-mode = "hw";
nand-on-flash-bbt;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_nemc_cs1>;
+
partitions {
compatible = "fixed-partitions";
#address-cells = <2>;
@@ -106,3 +128,41 @@
&bch {
status = "okay";
};
+
+&pinctrl {
+ pins_uart0: uart0 {
+ function = "uart0";
+ groups = "uart0-data";
+ bias-disable;
+ };
+
+ pins_uart1: uart1 {
+ function = "uart1";
+ groups = "uart1-data";
+ bias-disable;
+ };
+
+ pins_uart2: uart2 {
+ function = "uart2";
+ groups = "uart2-data", "uart2-hwflow";
+ bias-disable;
+ };
+
+ pins_uart4: uart4 {
+ function = "uart4";
+ groups = "uart4-data";
+ bias-disable;
+ };
+
+ pins_nemc: nemc {
+ function = "nemc";
+ groups = "nemc-data", "nemc-cle-ale", "nemc-rd-we", "nemc-frd-fwe";
+ bias-disable;
+ };
+
+ pins_nemc_cs1: nemc-cs1 {
+ function = "nemc-cs1";
+ groups = "nemc-cs1";
+ bias-disable;
+ };
+};
diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi
index 3e1587f1f77a..2ca7ce7481f1 100644
--- a/arch/mips/boot/dts/ingenic/jz4740.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi
@@ -55,6 +55,74 @@
clock-names = "rtc";
};
+ pinctrl: pin-controller@10010000 {
+ compatible = "ingenic,jz4740-pinctrl";
+ reg = <0x10010000 0x400>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpa: gpio@0 {
+ compatible = "ingenic,jz4740-gpio";
+ reg = <0>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 0 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <28>;
+ };
+
+ gpb: gpio@1 {
+ compatible = "ingenic,jz4740-gpio";
+ reg = <1>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 32 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <27>;
+ };
+
+ gpc: gpio@2 {
+ compatible = "ingenic,jz4740-gpio";
+ reg = <2>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 64 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <26>;
+ };
+
+ gpd: gpio@3 {
+ compatible = "ingenic,jz4740-gpio";
+ reg = <3>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 96 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <25>;
+ };
+ };
+
uart0: serial@10030000 {
compatible = "ingenic,jz4740-uart";
reg = <0x10030000 0x100>;
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index b868b429add2..4853ef67b3ab 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -44,6 +44,104 @@
#clock-cells = <1>;
};
+ pinctrl: pin-controller@10010000 {
+ compatible = "ingenic,jz4780-pinctrl";
+ reg = <0x10010000 0x600>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpa: gpio@0 {
+ compatible = "ingenic,jz4780-gpio";
+ reg = <0>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 0 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <17>;
+ };
+
+ gpb: gpio@1 {
+ compatible = "ingenic,jz4780-gpio";
+ reg = <1>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 32 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <16>;
+ };
+
+ gpc: gpio@2 {
+ compatible = "ingenic,jz4780-gpio";
+ reg = <2>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 64 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <15>;
+ };
+
+ gpd: gpio@3 {
+ compatible = "ingenic,jz4780-gpio";
+ reg = <3>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 96 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <14>;
+ };
+
+ gpe: gpio@4 {
+ compatible = "ingenic,jz4780-gpio";
+ reg = <4>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 128 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <13>;
+ };
+
+ gpf: gpio@5 {
+ compatible = "ingenic,jz4780-gpio";
+ reg = <5>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 160 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <12>;
+ };
+ };
+
uart0: serial@10030000 {
compatible = "ingenic,jz4780-uart";
reg = <0x10030000 0x100>;
diff --git a/arch/mips/boot/dts/ingenic/qi_lb60.dts b/arch/mips/boot/dts/ingenic/qi_lb60.dts
index be1a7d3a3e1b..b715ee2ac2ee 100644
--- a/arch/mips/boot/dts/ingenic/qi_lb60.dts
+++ b/arch/mips/boot/dts/ingenic/qi_lb60.dts
@@ -17,3 +17,16 @@
&rtc_dev {
system-power-controller;
};
+
+&uart0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_uart0>;
+};
+
+&pinctrl {
+ pins_uart0: uart0 {
+ function = "uart0";
+ groups = "uart0-data";
+ bias-disable;
+ };
+};
diff --git a/arch/mips/include/asm/mach-jz4740/gpio.h b/arch/mips/include/asm/mach-jz4740/gpio.h
index 7c7708a23baa..fd847c984701 100644
--- a/arch/mips/include/asm/mach-jz4740/gpio.h
+++ b/arch/mips/include/asm/mach-jz4740/gpio.h
@@ -16,380 +16,9 @@
#ifndef _JZ_GPIO_H
#define _JZ_GPIO_H
-#include <linux/types.h>
-
-enum jz_gpio_function {
- JZ_GPIO_FUNC_NONE,
- JZ_GPIO_FUNC1,
- JZ_GPIO_FUNC2,
- JZ_GPIO_FUNC3,
-};
-
-/*
- Usually a driver for a SoC component has to request several gpio pins and
- configure them as function pins.
- jz_gpio_bulk_request can be used to ease this process.
- Usually one would do something like:
-
- static const struct jz_gpio_bulk_request i2c_pins[] = {
- JZ_GPIO_BULK_PIN(I2C_SDA),
- JZ_GPIO_BULK_PIN(I2C_SCK),
- };
-
- inside the probe function:
-
- ret = jz_gpio_bulk_request(i2c_pins, ARRAY_SIZE(i2c_pins));
- if (ret) {
- ...
-
- inside the remove function:
-
- jz_gpio_bulk_free(i2c_pins, ARRAY_SIZE(i2c_pins));
-
-*/
-
-struct jz_gpio_bulk_request {
- int gpio;
- const char *name;
- enum jz_gpio_function function;
-};
-
-#define JZ_GPIO_BULK_PIN(pin) { \
- .gpio = JZ_GPIO_ ## pin, \
- .name = #pin, \
- .function = JZ_GPIO_FUNC_ ## pin \
-}
-
-int jz_gpio_bulk_request(const struct jz_gpio_bulk_request *request, size_t num);
-void jz_gpio_bulk_free(const struct jz_gpio_bulk_request *request, size_t num);
-void jz_gpio_bulk_suspend(const struct jz_gpio_bulk_request *request, size_t num);
-void jz_gpio_bulk_resume(const struct jz_gpio_bulk_request *request, size_t num);
-void jz_gpio_enable_pullup(unsigned gpio);
-void jz_gpio_disable_pullup(unsigned gpio);
-int jz_gpio_set_function(int gpio, enum jz_gpio_function function);
-
-int jz_gpio_port_direction_input(int port, uint32_t mask);
-int jz_gpio_port_direction_output(int port, uint32_t mask);
-void jz_gpio_port_set_value(int port, uint32_t value, uint32_t mask);
-uint32_t jz_gpio_port_get_value(int port, uint32_t mask);
-
#define JZ_GPIO_PORTA(x) ((x) + 32 * 0)
#define JZ_GPIO_PORTB(x) ((x) + 32 * 1)
#define JZ_GPIO_PORTC(x) ((x) + 32 * 2)
#define JZ_GPIO_PORTD(x) ((x) + 32 * 3)
-/* Port A function pins */
-#define JZ_GPIO_MEM_DATA0 JZ_GPIO_PORTA(0)
-#define JZ_GPIO_MEM_DATA1 JZ_GPIO_PORTA(1)
-#define JZ_GPIO_MEM_DATA2 JZ_GPIO_PORTA(2)
-#define JZ_GPIO_MEM_DATA3 JZ_GPIO_PORTA(3)
-#define JZ_GPIO_MEM_DATA4 JZ_GPIO_PORTA(4)
-#define JZ_GPIO_MEM_DATA5 JZ_GPIO_PORTA(5)
-#define JZ_GPIO_MEM_DATA6 JZ_GPIO_PORTA(6)
-#define JZ_GPIO_MEM_DATA7 JZ_GPIO_PORTA(7)
-#define JZ_GPIO_MEM_DATA8 JZ_GPIO_PORTA(8)
-#define JZ_GPIO_MEM_DATA9 JZ_GPIO_PORTA(9)
-#define JZ_GPIO_MEM_DATA10 JZ_GPIO_PORTA(10)
-#define JZ_GPIO_MEM_DATA11 JZ_GPIO_PORTA(11)
-#define JZ_GPIO_MEM_DATA12 JZ_GPIO_PORTA(12)
-#define JZ_GPIO_MEM_DATA13 JZ_GPIO_PORTA(13)
-#define JZ_GPIO_MEM_DATA14 JZ_GPIO_PORTA(14)
-#define JZ_GPIO_MEM_DATA15 JZ_GPIO_PORTA(15)
-#define JZ_GPIO_MEM_DATA16 JZ_GPIO_PORTA(16)
-#define JZ_GPIO_MEM_DATA17 JZ_GPIO_PORTA(17)
-#define JZ_GPIO_MEM_DATA18 JZ_GPIO_PORTA(18)
-#define JZ_GPIO_MEM_DATA19 JZ_GPIO_PORTA(19)
-#define JZ_GPIO_MEM_DATA20 JZ_GPIO_PORTA(20)
-#define JZ_GPIO_MEM_DATA21 JZ_GPIO_PORTA(21)
-#define JZ_GPIO_MEM_DATA22 JZ_GPIO_PORTA(22)
-#define JZ_GPIO_MEM_DATA23 JZ_GPIO_PORTA(23)
-#define JZ_GPIO_MEM_DATA24 JZ_GPIO_PORTA(24)
-#define JZ_GPIO_MEM_DATA25 JZ_GPIO_PORTA(25)
-#define JZ_GPIO_MEM_DATA26 JZ_GPIO_PORTA(26)
-#define JZ_GPIO_MEM_DATA27 JZ_GPIO_PORTA(27)
-#define JZ_GPIO_MEM_DATA28 JZ_GPIO_PORTA(28)
-#define JZ_GPIO_MEM_DATA29 JZ_GPIO_PORTA(29)
-#define JZ_GPIO_MEM_DATA30 JZ_GPIO_PORTA(30)
-#define JZ_GPIO_MEM_DATA31 JZ_GPIO_PORTA(31)
-
-#define JZ_GPIO_FUNC_MEM_DATA0 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA1 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA2 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA3 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA4 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA5 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA6 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA7 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA8 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA9 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA10 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA11 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA12 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA13 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA14 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA15 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA16 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA17 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA18 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA19 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA20 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA21 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA22 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA23 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA24 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA25 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA26 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA27 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA28 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA29 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA30 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DATA31 JZ_GPIO_FUNC1
-
-/* Port B function pins */
-#define JZ_GPIO_MEM_ADDR0 JZ_GPIO_PORTB(0)
-#define JZ_GPIO_MEM_ADDR1 JZ_GPIO_PORTB(1)
-#define JZ_GPIO_MEM_ADDR2 JZ_GPIO_PORTB(2)
-#define JZ_GPIO_MEM_ADDR3 JZ_GPIO_PORTB(3)
-#define JZ_GPIO_MEM_ADDR4 JZ_GPIO_PORTB(4)
-#define JZ_GPIO_MEM_ADDR5 JZ_GPIO_PORTB(5)
-#define JZ_GPIO_MEM_ADDR6 JZ_GPIO_PORTB(6)
-#define JZ_GPIO_MEM_ADDR7 JZ_GPIO_PORTB(7)
-#define JZ_GPIO_MEM_ADDR8 JZ_GPIO_PORTB(8)
-#define JZ_GPIO_MEM_ADDR9 JZ_GPIO_PORTB(9)
-#define JZ_GPIO_MEM_ADDR10 JZ_GPIO_PORTB(10)
-#define JZ_GPIO_MEM_ADDR11 JZ_GPIO_PORTB(11)
-#define JZ_GPIO_MEM_ADDR12 JZ_GPIO_PORTB(12)
-#define JZ_GPIO_MEM_ADDR13 JZ_GPIO_PORTB(13)
-#define JZ_GPIO_MEM_ADDR14 JZ_GPIO_PORTB(14)
-#define JZ_GPIO_MEM_ADDR15 JZ_GPIO_PORTB(15)
-#define JZ_GPIO_MEM_ADDR16 JZ_GPIO_PORTB(16)
-#define JZ_GPIO_LCD_CLS JZ_GPIO_PORTB(17)
-#define JZ_GPIO_LCD_SPL JZ_GPIO_PORTB(18)
-#define JZ_GPIO_MEM_DCS JZ_GPIO_PORTB(19)
-#define JZ_GPIO_MEM_RAS JZ_GPIO_PORTB(20)
-#define JZ_GPIO_MEM_CAS JZ_GPIO_PORTB(21)
-#define JZ_GPIO_MEM_SDWE JZ_GPIO_PORTB(22)
-#define JZ_GPIO_MEM_CKE JZ_GPIO_PORTB(23)
-#define JZ_GPIO_MEM_CKO JZ_GPIO_PORTB(24)
-#define JZ_GPIO_MEM_CS0 JZ_GPIO_PORTB(25)
-#define JZ_GPIO_MEM_CS1 JZ_GPIO_PORTB(26)
-#define JZ_GPIO_MEM_CS2 JZ_GPIO_PORTB(27)
-#define JZ_GPIO_MEM_CS3 JZ_GPIO_PORTB(28)
-#define JZ_GPIO_MEM_RD JZ_GPIO_PORTB(29)
-#define JZ_GPIO_MEM_WR JZ_GPIO_PORTB(30)
-#define JZ_GPIO_MEM_WE0 JZ_GPIO_PORTB(31)
-
-#define JZ_GPIO_FUNC_MEM_ADDR0 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR1 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR2 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR3 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR4 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR5 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR6 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR7 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR8 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR9 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR10 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR11 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR12 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR13 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR14 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR15 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_ADDR16 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_CLS JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_SPL JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_DCS JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_RAS JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_CAS JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_SDWE JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_CKE JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_CKO JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_CS0 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_CS1 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_CS2 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_CS3 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_RD JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_WR JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_WE0 JZ_GPIO_FUNC1
-
-
-#define JZ_GPIO_MEM_ADDR21 JZ_GPIO_PORTB(17)
-#define JZ_GPIO_MEM_ADDR22 JZ_GPIO_PORTB(18)
-
-#define JZ_GPIO_FUNC_MEM_ADDR21 JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_MEM_ADDR22 JZ_GPIO_FUNC2
-
-/* Port C function pins */
-#define JZ_GPIO_LCD_DATA0 JZ_GPIO_PORTC(0)
-#define JZ_GPIO_LCD_DATA1 JZ_GPIO_PORTC(1)
-#define JZ_GPIO_LCD_DATA2 JZ_GPIO_PORTC(2)
-#define JZ_GPIO_LCD_DATA3 JZ_GPIO_PORTC(3)
-#define JZ_GPIO_LCD_DATA4 JZ_GPIO_PORTC(4)
-#define JZ_GPIO_LCD_DATA5 JZ_GPIO_PORTC(5)
-#define JZ_GPIO_LCD_DATA6 JZ_GPIO_PORTC(6)
-#define JZ_GPIO_LCD_DATA7 JZ_GPIO_PORTC(7)
-#define JZ_GPIO_LCD_DATA8 JZ_GPIO_PORTC(8)
-#define JZ_GPIO_LCD_DATA9 JZ_GPIO_PORTC(9)
-#define JZ_GPIO_LCD_DATA10 JZ_GPIO_PORTC(10)
-#define JZ_GPIO_LCD_DATA11 JZ_GPIO_PORTC(11)
-#define JZ_GPIO_LCD_DATA12 JZ_GPIO_PORTC(12)
-#define JZ_GPIO_LCD_DATA13 JZ_GPIO_PORTC(13)
-#define JZ_GPIO_LCD_DATA14 JZ_GPIO_PORTC(14)
-#define JZ_GPIO_LCD_DATA15 JZ_GPIO_PORTC(15)
-#define JZ_GPIO_LCD_DATA16 JZ_GPIO_PORTC(16)
-#define JZ_GPIO_LCD_DATA17 JZ_GPIO_PORTC(17)
-#define JZ_GPIO_LCD_PCLK JZ_GPIO_PORTC(18)
-#define JZ_GPIO_LCD_HSYNC JZ_GPIO_PORTC(19)
-#define JZ_GPIO_LCD_VSYNC JZ_GPIO_PORTC(20)
-#define JZ_GPIO_LCD_DE JZ_GPIO_PORTC(21)
-#define JZ_GPIO_LCD_PS JZ_GPIO_PORTC(22)
-#define JZ_GPIO_LCD_REV JZ_GPIO_PORTC(23)
-#define JZ_GPIO_MEM_WE1 JZ_GPIO_PORTC(24)
-#define JZ_GPIO_MEM_WE2 JZ_GPIO_PORTC(25)
-#define JZ_GPIO_MEM_WE3 JZ_GPIO_PORTC(26)
-#define JZ_GPIO_MEM_WAIT JZ_GPIO_PORTC(27)
-#define JZ_GPIO_MEM_FRE JZ_GPIO_PORTC(28)
-#define JZ_GPIO_MEM_FWE JZ_GPIO_PORTC(29)
-
-#define JZ_GPIO_FUNC_LCD_DATA0 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA1 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA2 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA3 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA4 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA5 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA6 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA7 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA8 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA9 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA10 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA11 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA12 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA13 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA14 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA15 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA16 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DATA17 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_PCLK JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_VSYNC JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_HSYNC JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_DE JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_PS JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_LCD_REV JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_WE1 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_WE2 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_WE3 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_WAIT JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_FRE JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MEM_FWE JZ_GPIO_FUNC1
-
-
-#define JZ_GPIO_MEM_ADDR19 JZ_GPIO_PORTB(22)
-#define JZ_GPIO_MEM_ADDR20 JZ_GPIO_PORTB(23)
-
-#define JZ_GPIO_FUNC_MEM_ADDR19 JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_MEM_ADDR20 JZ_GPIO_FUNC2
-
-/* Port D function pins */
-#define JZ_GPIO_CIM_DATA0 JZ_GPIO_PORTD(0)
-#define JZ_GPIO_CIM_DATA1 JZ_GPIO_PORTD(1)
-#define JZ_GPIO_CIM_DATA2 JZ_GPIO_PORTD(2)
-#define JZ_GPIO_CIM_DATA3 JZ_GPIO_PORTD(3)
-#define JZ_GPIO_CIM_DATA4 JZ_GPIO_PORTD(4)
-#define JZ_GPIO_CIM_DATA5 JZ_GPIO_PORTD(5)
-#define JZ_GPIO_CIM_DATA6 JZ_GPIO_PORTD(6)
-#define JZ_GPIO_CIM_DATA7 JZ_GPIO_PORTD(7)
-#define JZ_GPIO_MSC_CMD JZ_GPIO_PORTD(8)
-#define JZ_GPIO_MSC_CLK JZ_GPIO_PORTD(9)
-#define JZ_GPIO_MSC_DATA0 JZ_GPIO_PORTD(10)
-#define JZ_GPIO_MSC_DATA1 JZ_GPIO_PORTD(11)
-#define JZ_GPIO_MSC_DATA2 JZ_GPIO_PORTD(12)
-#define JZ_GPIO_MSC_DATA3 JZ_GPIO_PORTD(13)
-#define JZ_GPIO_CIM_MCLK JZ_GPIO_PORTD(14)
-#define JZ_GPIO_CIM_PCLK JZ_GPIO_PORTD(15)
-#define JZ_GPIO_CIM_VSYNC JZ_GPIO_PORTD(16)
-#define JZ_GPIO_CIM_HSYNC JZ_GPIO_PORTD(17)
-#define JZ_GPIO_SPI_CLK JZ_GPIO_PORTD(18)
-#define JZ_GPIO_SPI_CE0 JZ_GPIO_PORTD(19)
-#define JZ_GPIO_SPI_DT JZ_GPIO_PORTD(20)
-#define JZ_GPIO_SPI_DR JZ_GPIO_PORTD(21)
-#define JZ_GPIO_SPI_CE1 JZ_GPIO_PORTD(22)
-#define JZ_GPIO_PWM0 JZ_GPIO_PORTD(23)
-#define JZ_GPIO_PWM1 JZ_GPIO_PORTD(24)
-#define JZ_GPIO_PWM2 JZ_GPIO_PORTD(25)
-#define JZ_GPIO_PWM3 JZ_GPIO_PORTD(26)
-#define JZ_GPIO_PWM4 JZ_GPIO_PORTD(27)
-#define JZ_GPIO_PWM5 JZ_GPIO_PORTD(28)
-#define JZ_GPIO_PWM6 JZ_GPIO_PORTD(30)
-#define JZ_GPIO_PWM7 JZ_GPIO_PORTD(31)
-
-#define JZ_GPIO_FUNC_CIM_DATA JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_CIM_DATA0 JZ_GPIO_FUNC_CIM_DATA
-#define JZ_GPIO_FUNC_CIM_DATA1 JZ_GPIO_FUNC_CIM_DATA
-#define JZ_GPIO_FUNC_CIM_DATA2 JZ_GPIO_FUNC_CIM_DATA
-#define JZ_GPIO_FUNC_CIM_DATA3 JZ_GPIO_FUNC_CIM_DATA
-#define JZ_GPIO_FUNC_CIM_DATA4 JZ_GPIO_FUNC_CIM_DATA
-#define JZ_GPIO_FUNC_CIM_DATA5 JZ_GPIO_FUNC_CIM_DATA
-#define JZ_GPIO_FUNC_CIM_DATA6 JZ_GPIO_FUNC_CIM_DATA
-#define JZ_GPIO_FUNC_CIM_DATA7 JZ_GPIO_FUNC_CIM_DATA
-#define JZ_GPIO_FUNC_MSC_CMD JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MSC_CLK JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MSC_DATA JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_MSC_DATA0 JZ_GPIO_FUNC_MSC_DATA
-#define JZ_GPIO_FUNC_MSC_DATA1 JZ_GPIO_FUNC_MSC_DATA
-#define JZ_GPIO_FUNC_MSC_DATA2 JZ_GPIO_FUNC_MSC_DATA
-#define JZ_GPIO_FUNC_MSC_DATA3 JZ_GPIO_FUNC_MSC_DATA
-#define JZ_GPIO_FUNC_CIM_MCLK JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_CIM_PCLK JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_CIM_VSYNC JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_CIM_HSYNC JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_SPI_CLK JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_SPI_CE0 JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_SPI_DT JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_SPI_DR JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_SPI_CE1 JZ_GPIO_FUNC1
-
-#define JZ_GPIO_FUNC_PWM JZ_GPIO_FUNC1
-#define JZ_GPIO_FUNC_PWM0 JZ_GPIO_FUNC_PWM
-#define JZ_GPIO_FUNC_PWM1 JZ_GPIO_FUNC_PWM
-#define JZ_GPIO_FUNC_PWM2 JZ_GPIO_FUNC_PWM
-#define JZ_GPIO_FUNC_PWM3 JZ_GPIO_FUNC_PWM
-#define JZ_GPIO_FUNC_PWM4 JZ_GPIO_FUNC_PWM
-#define JZ_GPIO_FUNC_PWM5 JZ_GPIO_FUNC_PWM
-#define JZ_GPIO_FUNC_PWM6 JZ_GPIO_FUNC_PWM
-#define JZ_GPIO_FUNC_PWM7 JZ_GPIO_FUNC_PWM
-
-#define JZ_GPIO_MEM_SCLK_RSTN JZ_GPIO_PORTD(18)
-#define JZ_GPIO_MEM_BCLK JZ_GPIO_PORTD(19)
-#define JZ_GPIO_MEM_SDATO JZ_GPIO_PORTD(20)
-#define JZ_GPIO_MEM_SDATI JZ_GPIO_PORTD(21)
-#define JZ_GPIO_MEM_SYNC JZ_GPIO_PORTD(22)
-#define JZ_GPIO_I2C_SDA JZ_GPIO_PORTD(23)
-#define JZ_GPIO_I2C_SCK JZ_GPIO_PORTD(24)
-#define JZ_GPIO_UART0_TXD JZ_GPIO_PORTD(25)
-#define JZ_GPIO_UART0_RXD JZ_GPIO_PORTD(26)
-#define JZ_GPIO_MEM_ADDR17 JZ_GPIO_PORTD(27)
-#define JZ_GPIO_MEM_ADDR18 JZ_GPIO_PORTD(28)
-#define JZ_GPIO_UART0_CTS JZ_GPIO_PORTD(30)
-#define JZ_GPIO_UART0_RTS JZ_GPIO_PORTD(31)
-
-#define JZ_GPIO_FUNC_MEM_SCLK_RSTN JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_MEM_BCLK JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_MEM_SDATO JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_MEM_SDATI JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_MEM_SYNC JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_I2C_SDA JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_I2C_SCK JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_UART0_TXD JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_UART0_RXD JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_MEM_ADDR17 JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_MEM_ADDR18 JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_UART0_CTS JZ_GPIO_FUNC2
-#define JZ_GPIO_FUNC_UART0_RTS JZ_GPIO_FUNC2
-
-#define JZ_GPIO_UART1_RXD JZ_GPIO_PORTD(30)
-#define JZ_GPIO_UART1_TXD JZ_GPIO_PORTD(31)
-
-#define JZ_GPIO_FUNC_UART1_RXD JZ_GPIO_FUNC3
-#define JZ_GPIO_FUNC_UART1_TXD JZ_GPIO_FUNC3
-
#endif
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 3418ec9c1c50..882823bec153 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -116,4 +116,8 @@
#define SO_COOKIE 57
+#define SCM_TIMESTAMPING_PKTINFO 58
+
+#define SO_PEERGROUPS 59
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mips/jz4740/Makefile b/arch/mips/jz4740/Makefile
index 39d70bde8cfe..6b9c1f7c31c9 100644
--- a/arch/mips/jz4740/Makefile
+++ b/arch/mips/jz4740/Makefile
@@ -7,8 +7,6 @@
obj-y += prom.o time.o reset.o setup.o \
platform.o timer.o
-obj-$(CONFIG_MACH_JZ4740) += gpio.o
-
CFLAGS_setup.o = -I$(src)/../../../scripts/dtc/libfdt
# board specific support
diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
index a5bd94b95263..6d7f97552200 100644
--- a/arch/mips/jz4740/board-qi_lb60.c
+++ b/arch/mips/jz4740/board-qi_lb60.c
@@ -22,6 +22,8 @@
#include <linux/input/matrix_keypad.h>
#include <linux/spi/spi.h>
#include <linux/spi/spi_gpio.h>
+#include <linux/pinctrl/machine.h>
+#include <linux/pinctrl/pinconf-generic.h>
#include <linux/power_supply.h>
#include <linux/power/jz4740-battery.h>
#include <linux/power/gpio-charger.h>
@@ -159,7 +161,7 @@ static struct jz_nand_platform_data qi_lb60_nand_pdata = {
static struct gpiod_lookup_table qi_lb60_nand_gpio_table = {
.dev_id = "jz4740-nand.0",
.table = {
- GPIO_LOOKUP("Bank C", 30, "busy", 0),
+ GPIO_LOOKUP("GPIOC", 30, "busy", 0),
{ },
},
};
@@ -421,8 +423,8 @@ static struct platform_device qi_lb60_audio_device = {
static struct gpiod_lookup_table qi_lb60_audio_gpio_table = {
.dev_id = "qi-lb60-audio",
.table = {
- GPIO_LOOKUP("Bank B", 29, "snd", 0),
- GPIO_LOOKUP("Bank D", 4, "amp", 0),
+ GPIO_LOOKUP("GPIOB", 29, "snd", 0),
+ GPIO_LOOKUP("GPIOD", 4, "amp", 0),
{ },
},
};
@@ -447,13 +449,36 @@ static struct platform_device *jz_platform_devices[] __initdata = {
&qi_lb60_audio_device,
};
-static void __init board_gpio_setup(void)
-{
- /* We only need to enable/disable pullup here for pins used in generic
- * drivers. Everything else is done by the drivers themselves. */
- jz_gpio_disable_pullup(QI_LB60_GPIO_SD_VCC_EN_N);
- jz_gpio_disable_pullup(QI_LB60_GPIO_SD_CD);
-}
+static unsigned long pin_cfg_bias_disable[] = {
+ PIN_CONFIG_BIAS_DISABLE,
+};
+
+static struct pinctrl_map pin_map[] __initdata = {
+ /* NAND pin configuration */
+ PIN_MAP_MUX_GROUP_DEFAULT("jz4740-nand",
+ "10010000.jz4740-pinctrl", "nand", "nand-cs1"),
+
+ /* fbdev pin configuration */
+ PIN_MAP_MUX_GROUP("jz4740-fb", PINCTRL_STATE_DEFAULT,
+ "10010000.jz4740-pinctrl", "lcd", "lcd-8bit"),
+ PIN_MAP_MUX_GROUP("jz4740-fb", PINCTRL_STATE_SLEEP,
+ "10010000.jz4740-pinctrl", "lcd", "lcd-no-pins"),
+
+ /* MMC pin configuration */
+ PIN_MAP_MUX_GROUP_DEFAULT("jz4740-mmc.0",
+ "10010000.jz4740-pinctrl", "mmc", "mmc-1bit"),
+ PIN_MAP_MUX_GROUP_DEFAULT("jz4740-mmc.0",
+ "10010000.jz4740-pinctrl", "mmc", "mmc-4bit"),
+ PIN_MAP_CONFIGS_PIN_DEFAULT("jz4740-mmc.0",
+ "10010000.jz4740-pinctrl", "PD0", pin_cfg_bias_disable),
+ PIN_MAP_CONFIGS_PIN_DEFAULT("jz4740-mmc.0",
+ "10010000.jz4740-pinctrl", "PD2", pin_cfg_bias_disable),
+
+ /* PWM pin configuration */
+ PIN_MAP_MUX_GROUP_DEFAULT("jz4740-pwm",
+ "10010000.jz4740-pinctrl", "pwm4", "pwm4"),
+};
+
static int __init qi_lb60_init_platform_devices(void)
{
@@ -469,6 +494,7 @@ static int __init qi_lb60_init_platform_devices(void)
ARRAY_SIZE(qi_lb60_spi_board_info));
pwm_add_table(qi_lb60_pwm_lookup, ARRAY_SIZE(qi_lb60_pwm_lookup));
+ pinctrl_register_mappings(pin_map, ARRAY_SIZE(pin_map));
return platform_add_devices(jz_platform_devices,
ARRAY_SIZE(jz_platform_devices));
@@ -479,8 +505,6 @@ static int __init qi_lb60_board_setup(void)
{
printk(KERN_INFO "Qi Hardware JZ4740 QI LB60 setup\n");
- board_gpio_setup();
-
if (qi_lb60_init_platform_devices())
panic("Failed to initialize platform devices");
diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c
deleted file mode 100644
index cac1ccde2214..000000000000
--- a/arch/mips/jz4740/gpio.c
+++ /dev/null
@@ -1,519 +0,0 @@
-/*
- * Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
- * JZ4740 platform GPIO support
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/init.h>
-
-#include <linux/io.h>
-#include <linux/gpio/driver.h>
-/* FIXME: needed for gpio_request(), try to remove consumer API from driver */
-#include <linux/gpio.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/irqchip/ingenic.h>
-#include <linux/bitops.h>
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#include <asm/mach-jz4740/base.h>
-#include <asm/mach-jz4740/gpio.h>
-
-#define JZ4740_GPIO_BASE_A (32*0)
-#define JZ4740_GPIO_BASE_B (32*1)
-#define JZ4740_GPIO_BASE_C (32*2)
-#define JZ4740_GPIO_BASE_D (32*3)
-
-#define JZ4740_GPIO_NUM_A 32
-#define JZ4740_GPIO_NUM_B 32
-#define JZ4740_GPIO_NUM_C 31
-#define JZ4740_GPIO_NUM_D 32
-
-#define JZ4740_IRQ_GPIO_BASE_A (JZ4740_IRQ_GPIO(0) + JZ4740_GPIO_BASE_A)
-#define JZ4740_IRQ_GPIO_BASE_B (JZ4740_IRQ_GPIO(0) + JZ4740_GPIO_BASE_B)
-#define JZ4740_IRQ_GPIO_BASE_C (JZ4740_IRQ_GPIO(0) + JZ4740_GPIO_BASE_C)
-#define JZ4740_IRQ_GPIO_BASE_D (JZ4740_IRQ_GPIO(0) + JZ4740_GPIO_BASE_D)
-
-#define JZ_REG_GPIO_PIN 0x00
-#define JZ_REG_GPIO_DATA 0x10
-#define JZ_REG_GPIO_DATA_SET 0x14
-#define JZ_REG_GPIO_DATA_CLEAR 0x18
-#define JZ_REG_GPIO_MASK 0x20
-#define JZ_REG_GPIO_MASK_SET 0x24
-#define JZ_REG_GPIO_MASK_CLEAR 0x28
-#define JZ_REG_GPIO_PULL 0x30
-#define JZ_REG_GPIO_PULL_SET 0x34
-#define JZ_REG_GPIO_PULL_CLEAR 0x38
-#define JZ_REG_GPIO_FUNC 0x40
-#define JZ_REG_GPIO_FUNC_SET 0x44
-#define JZ_REG_GPIO_FUNC_CLEAR 0x48
-#define JZ_REG_GPIO_SELECT 0x50
-#define JZ_REG_GPIO_SELECT_SET 0x54
-#define JZ_REG_GPIO_SELECT_CLEAR 0x58
-#define JZ_REG_GPIO_DIRECTION 0x60
-#define JZ_REG_GPIO_DIRECTION_SET 0x64
-#define JZ_REG_GPIO_DIRECTION_CLEAR 0x68
-#define JZ_REG_GPIO_TRIGGER 0x70
-#define JZ_REG_GPIO_TRIGGER_SET 0x74
-#define JZ_REG_GPIO_TRIGGER_CLEAR 0x78
-#define JZ_REG_GPIO_FLAG 0x80
-#define JZ_REG_GPIO_FLAG_CLEAR 0x14
-
-#define GPIO_TO_BIT(gpio) BIT(gpio & 0x1f)
-#define GPIO_TO_REG(gpio, reg) (gpio_to_jz_gpio_chip(gpio)->base + (reg))
-#define CHIP_TO_REG(chip, reg) (gpio_chip_to_jz_gpio_chip(chip)->base + (reg))
-
-struct jz_gpio_chip {
- unsigned int irq;
- unsigned int irq_base;
- uint32_t edge_trigger_both;
-
- void __iomem *base;
-
- struct gpio_chip gpio_chip;
-};
-
-static struct jz_gpio_chip jz4740_gpio_chips[];
-
-static inline struct jz_gpio_chip *gpio_to_jz_gpio_chip(unsigned int gpio)
-{
- return &jz4740_gpio_chips[gpio >> 5];
-}
-
-static inline struct jz_gpio_chip *gpio_chip_to_jz_gpio_chip(struct gpio_chip *gc)
-{
- return gpiochip_get_data(gc);
-}
-
-static inline struct jz_gpio_chip *irq_to_jz_gpio_chip(struct irq_data *data)
-{
- struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
- return gc->private;
-}
-
-static inline void jz_gpio_write_bit(unsigned int gpio, unsigned int reg)
-{
- writel(GPIO_TO_BIT(gpio), GPIO_TO_REG(gpio, reg));
-}
-
-int jz_gpio_set_function(int gpio, enum jz_gpio_function function)
-{
- if (function == JZ_GPIO_FUNC_NONE) {
- jz_gpio_write_bit(gpio, JZ_REG_GPIO_FUNC_CLEAR);
- jz_gpio_write_bit(gpio, JZ_REG_GPIO_SELECT_CLEAR);
- jz_gpio_write_bit(gpio, JZ_REG_GPIO_TRIGGER_CLEAR);
- } else {
- jz_gpio_write_bit(gpio, JZ_REG_GPIO_FUNC_SET);
- jz_gpio_write_bit(gpio, JZ_REG_GPIO_TRIGGER_CLEAR);
- switch (function) {
- case JZ_GPIO_FUNC1:
- jz_gpio_write_bit(gpio, JZ_REG_GPIO_SELECT_CLEAR);
- break;
- case JZ_GPIO_FUNC3:
- jz_gpio_write_bit(gpio, JZ_REG_GPIO_TRIGGER_SET);
- case JZ_GPIO_FUNC2: /* Falltrough */
- jz_gpio_write_bit(gpio, JZ_REG_GPIO_SELECT_SET);
- break;
- default:
- BUG();
- break;
- }
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(jz_gpio_set_function);
-
-int jz_gpio_bulk_request(const struct jz_gpio_bulk_request *request, size_t num)
-{
- size_t i;
- int ret;
-
- for (i = 0; i < num; ++i, ++request) {
- ret = gpio_request(request->gpio, request->name);
- if (ret)
- goto err;
- jz_gpio_set_function(request->gpio, request->function);
- }
-
- return 0;
-
-err:
- for (--request; i > 0; --i, --request) {
- gpio_free(request->gpio);
- jz_gpio_set_function(request->gpio, JZ_GPIO_FUNC_NONE);
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(jz_gpio_bulk_request);
-
-void jz_gpio_bulk_free(const struct jz_gpio_bulk_request *request, size_t num)
-{
- size_t i;
-
- for (i = 0; i < num; ++i, ++request) {
- gpio_free(request->gpio);
- jz_gpio_set_function(request->gpio, JZ_GPIO_FUNC_NONE);
- }
-
-}
-EXPORT_SYMBOL_GPL(jz_gpio_bulk_free);
-
-void jz_gpio_bulk_suspend(const struct jz_gpio_bulk_request *request, size_t num)
-{
- size_t i;
-
- for (i = 0; i < num; ++i, ++request) {
- jz_gpio_set_function(request->gpio, JZ_GPIO_FUNC_NONE);
- jz_gpio_write_bit(request->gpio, JZ_REG_GPIO_DIRECTION_CLEAR);
- jz_gpio_write_bit(request->gpio, JZ_REG_GPIO_PULL_SET);
- }
-}
-EXPORT_SYMBOL_GPL(jz_gpio_bulk_suspend);
-
-void jz_gpio_bulk_resume(const struct jz_gpio_bulk_request *request, size_t num)
-{
- size_t i;
-
- for (i = 0; i < num; ++i, ++request)
- jz_gpio_set_function(request->gpio, request->function);
-}
-EXPORT_SYMBOL_GPL(jz_gpio_bulk_resume);
-
-void jz_gpio_enable_pullup(unsigned gpio)
-{
- jz_gpio_write_bit(gpio, JZ_REG_GPIO_PULL_CLEAR);
-}
-EXPORT_SYMBOL_GPL(jz_gpio_enable_pullup);
-
-void jz_gpio_disable_pullup(unsigned gpio)
-{
- jz_gpio_write_bit(gpio, JZ_REG_GPIO_PULL_SET);
-}
-EXPORT_SYMBOL_GPL(jz_gpio_disable_pullup);
-
-static int jz_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
-{
- return !!(readl(CHIP_TO_REG(chip, JZ_REG_GPIO_PIN)) & BIT(gpio));
-}
-
-static void jz_gpio_set_value(struct gpio_chip *chip, unsigned gpio, int value)
-{
- uint32_t __iomem *reg = CHIP_TO_REG(chip, JZ_REG_GPIO_DATA_SET);
- reg += !value;
- writel(BIT(gpio), reg);
-}
-
-static int jz_gpio_direction_output(struct gpio_chip *chip, unsigned gpio,
- int value)
-{
- writel(BIT(gpio), CHIP_TO_REG(chip, JZ_REG_GPIO_DIRECTION_SET));
- jz_gpio_set_value(chip, gpio, value);
-
- return 0;
-}
-
-static int jz_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
-{
- writel(BIT(gpio), CHIP_TO_REG(chip, JZ_REG_GPIO_DIRECTION_CLEAR));
-
- return 0;
-}
-
-static int jz_gpio_to_irq(struct gpio_chip *chip, unsigned gpio)
-{
- struct jz_gpio_chip *jz_gpio = gpiochip_get_data(chip);
-
- return jz_gpio->irq_base + gpio;
-}
-
-int jz_gpio_port_direction_input(int port, uint32_t mask)
-{
- writel(mask, GPIO_TO_REG(port, JZ_REG_GPIO_DIRECTION_CLEAR));
-
- return 0;
-}
-EXPORT_SYMBOL(jz_gpio_port_direction_input);
-
-int jz_gpio_port_direction_output(int port, uint32_t mask)
-{
- writel(mask, GPIO_TO_REG(port, JZ_REG_GPIO_DIRECTION_SET));
-
- return 0;
-}
-EXPORT_SYMBOL(jz_gpio_port_direction_output);
-
-void jz_gpio_port_set_value(int port, uint32_t value, uint32_t mask)
-{
- writel(~value & mask, GPIO_TO_REG(port, JZ_REG_GPIO_DATA_CLEAR));
- writel(value & mask, GPIO_TO_REG(port, JZ_REG_GPIO_DATA_SET));
-}
-EXPORT_SYMBOL(jz_gpio_port_set_value);
-
-uint32_t jz_gpio_port_get_value(int port, uint32_t mask)
-{
- uint32_t value = readl(GPIO_TO_REG(port, JZ_REG_GPIO_PIN));
-
- return value & mask;
-}
-EXPORT_SYMBOL(jz_gpio_port_get_value);
-
-#define IRQ_TO_BIT(irq) BIT((irq - JZ4740_IRQ_GPIO(0)) & 0x1f)
-
-static void jz_gpio_check_trigger_both(struct jz_gpio_chip *chip, unsigned int irq)
-{
- uint32_t value;
- void __iomem *reg;
- uint32_t mask = IRQ_TO_BIT(irq);
-
- if (!(chip->edge_trigger_both & mask))
- return;
-
- reg = chip->base;
-
- value = readl(chip->base + JZ_REG_GPIO_PIN);
- if (value & mask)
- reg += JZ_REG_GPIO_DIRECTION_CLEAR;
- else
- reg += JZ_REG_GPIO_DIRECTION_SET;
-
- writel(mask, reg);
-}
-
-static void jz_gpio_irq_demux_handler(struct irq_desc *desc)
-{
- uint32_t flag;
- unsigned int gpio_irq;
- struct jz_gpio_chip *chip = irq_desc_get_handler_data(desc);
-
- flag = readl(chip->base + JZ_REG_GPIO_FLAG);
- if (!flag)
- return;
-
- gpio_irq = chip->irq_base + __fls(flag);
-
- jz_gpio_check_trigger_both(chip, gpio_irq);
-
- generic_handle_irq(gpio_irq);
-};
-
-static inline void jz_gpio_set_irq_bit(struct irq_data *data, unsigned int reg)
-{
- struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data);
- writel(IRQ_TO_BIT(data->irq), chip->base + reg);
-}
-
-static void jz_gpio_irq_unmask(struct irq_data *data)
-{
- struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data);
-
- jz_gpio_check_trigger_both(chip, data->irq);
- irq_gc_unmask_enable_reg(data);
-};
-
-/* TODO: Check if function is gpio */
-static unsigned int jz_gpio_irq_startup(struct irq_data *data)
-{
- jz_gpio_set_irq_bit(data, JZ_REG_GPIO_SELECT_SET);
- jz_gpio_irq_unmask(data);
- return 0;
-}
-
-static void jz_gpio_irq_shutdown(struct irq_data *data)
-{
- irq_gc_mask_disable_reg(data);
-
- /* Set direction to input */
- jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_CLEAR);
- jz_gpio_set_irq_bit(data, JZ_REG_GPIO_SELECT_CLEAR);
-}
-
-static int jz_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type)
-{
- struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data);
- unsigned int irq = data->irq;
-
- if (flow_type == IRQ_TYPE_EDGE_BOTH) {
- uint32_t value = readl(chip->base + JZ_REG_GPIO_PIN);
- if (value & IRQ_TO_BIT(irq))
- flow_type = IRQ_TYPE_EDGE_FALLING;
- else
- flow_type = IRQ_TYPE_EDGE_RISING;
- chip->edge_trigger_both |= IRQ_TO_BIT(irq);
- } else {
- chip->edge_trigger_both &= ~IRQ_TO_BIT(irq);
- }
-
- switch (flow_type) {
- case IRQ_TYPE_EDGE_RISING:
- jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_SET);
- jz_gpio_set_irq_bit(data, JZ_REG_GPIO_TRIGGER_SET);
- break;
- case IRQ_TYPE_EDGE_FALLING:
- jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_CLEAR);
- jz_gpio_set_irq_bit(data, JZ_REG_GPIO_TRIGGER_SET);
- break;
- case IRQ_TYPE_LEVEL_HIGH:
- jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_SET);
- jz_gpio_set_irq_bit(data, JZ_REG_GPIO_TRIGGER_CLEAR);
- break;
- case IRQ_TYPE_LEVEL_LOW:
- jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_CLEAR);
- jz_gpio_set_irq_bit(data, JZ_REG_GPIO_TRIGGER_CLEAR);
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int jz_gpio_irq_set_wake(struct irq_data *data, unsigned int on)
-{
- struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data);
-
- irq_gc_set_wake(data, on);
- irq_set_irq_wake(chip->irq, on);
-
- return 0;
-}
-
-#define JZ4740_GPIO_CHIP(_bank) { \
- .irq_base = JZ4740_IRQ_GPIO_BASE_ ## _bank, \
- .gpio_chip = { \
- .label = "Bank " # _bank, \
- .owner = THIS_MODULE, \
- .set = jz_gpio_set_value, \
- .get = jz_gpio_get_value, \
- .direction_output = jz_gpio_direction_output, \
- .direction_input = jz_gpio_direction_input, \
- .to_irq = jz_gpio_to_irq, \
- .base = JZ4740_GPIO_BASE_ ## _bank, \
- .ngpio = JZ4740_GPIO_NUM_ ## _bank, \
- }, \
-}
-
-static struct jz_gpio_chip jz4740_gpio_chips[] = {
- JZ4740_GPIO_CHIP(A),
- JZ4740_GPIO_CHIP(B),
- JZ4740_GPIO_CHIP(C),
- JZ4740_GPIO_CHIP(D),
-};
-
-static void jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id)
-{
- struct irq_chip_generic *gc;
- struct irq_chip_type *ct;
-
- chip->base = ioremap(JZ4740_GPIO_BASE_ADDR + (id * 0x100), 0x100);
-
- chip->irq = JZ4740_IRQ_INTC_GPIO(id);
- irq_set_chained_handler_and_data(chip->irq,
- jz_gpio_irq_demux_handler, chip);
-
- gc = irq_alloc_generic_chip(chip->gpio_chip.label, 1, chip->irq_base,
- chip->base, handle_level_irq);
-
- gc->wake_enabled = IRQ_MSK(chip->gpio_chip.ngpio);
- gc->private = chip;
-
- ct = gc->chip_types;
- ct->regs.enable = JZ_REG_GPIO_MASK_CLEAR;
- ct->regs.disable = JZ_REG_GPIO_MASK_SET;
- ct->regs.ack = JZ_REG_GPIO_FLAG_CLEAR;
-
- ct->chip.name = "GPIO";
- ct->chip.irq_mask = irq_gc_mask_disable_reg;
- ct->chip.irq_unmask = jz_gpio_irq_unmask;
- ct->chip.irq_ack = irq_gc_ack_set_bit;
- ct->chip.irq_suspend = ingenic_intc_irq_suspend;
- ct->chip.irq_resume = ingenic_intc_irq_resume;
- ct->chip.irq_startup = jz_gpio_irq_startup;
- ct->chip.irq_shutdown = jz_gpio_irq_shutdown;
- ct->chip.irq_set_type = jz_gpio_irq_set_type;
- ct->chip.irq_set_wake = jz_gpio_irq_set_wake;
- ct->chip.flags = IRQCHIP_SET_TYPE_MASKED;
-
- irq_setup_generic_chip(gc, IRQ_MSK(chip->gpio_chip.ngpio),
- IRQ_GC_INIT_NESTED_LOCK, 0, IRQ_NOPROBE | IRQ_LEVEL);
-
- gpiochip_add_data(&chip->gpio_chip, chip);
-}
-
-static int __init jz4740_gpio_init(void)
-{
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(jz4740_gpio_chips); ++i)
- jz4740_gpio_chip_init(&jz4740_gpio_chips[i], i);
-
- printk(KERN_INFO "JZ4740 GPIO initialized\n");
-
- return 0;
-}
-arch_initcall(jz4740_gpio_init);
-
-#ifdef CONFIG_DEBUG_FS
-
-static inline void gpio_seq_reg(struct seq_file *s, struct jz_gpio_chip *chip,
- const char *name, unsigned int reg)
-{
- seq_printf(s, "\t%s: %08x\n", name, readl(chip->base + reg));
-}
-
-static int gpio_regs_show(struct seq_file *s, void *unused)
-{
- struct jz_gpio_chip *chip = jz4740_gpio_chips;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(jz4740_gpio_chips); ++i, ++chip) {
- seq_printf(s, "==GPIO %d==\n", i);
- gpio_seq_reg(s, chip, "Pin", JZ_REG_GPIO_PIN);
- gpio_seq_reg(s, chip, "Data", JZ_REG_GPIO_DATA);
- gpio_seq_reg(s, chip, "Mask", JZ_REG_GPIO_MASK);
- gpio_seq_reg(s, chip, "Pull", JZ_REG_GPIO_PULL);
- gpio_seq_reg(s, chip, "Func", JZ_REG_GPIO_FUNC);
- gpio_seq_reg(s, chip, "Select", JZ_REG_GPIO_SELECT);
- gpio_seq_reg(s, chip, "Direction", JZ_REG_GPIO_DIRECTION);
- gpio_seq_reg(s, chip, "Trigger", JZ_REG_GPIO_TRIGGER);
- gpio_seq_reg(s, chip, "Flag", JZ_REG_GPIO_FLAG);
- }
-
- return 0;
-}
-
-static int gpio_regs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, gpio_regs_show, NULL);
-}
-
-static const struct file_operations gpio_regs_operations = {
- .open = gpio_regs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int __init gpio_debugfs_init(void)
-{
- (void) debugfs_create_file("jz_regs_gpio", S_IFREG | S_IRUGO,
- NULL, NULL, &gpio_regs_operations);
- return 0;
-}
-subsys_initcall(gpio_debugfs_init);
-
-#endif
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index a563759fd142..6a0d7040d882 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -1094,7 +1094,7 @@ static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu,
struct mm_struct *mm;
int i;
- if (likely(!vcpu->requests))
+ if (likely(!kvm_request_pending(vcpu)))
return;
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
index 71d8856ade64..74805035edc8 100644
--- a/arch/mips/kvm/vz.c
+++ b/arch/mips/kvm/vz.c
@@ -2337,7 +2337,7 @@ static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu)
int ret = 0;
int i;
- if (!vcpu->requests)
+ if (!kvm_request_pending(vcpu))
return 0;
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index 4526e92301a6..c710db354ff2 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -98,4 +98,8 @@
#define SO_COOKIE 57
+#define SCM_TIMESTAMPING_PKTINFO 58
+
+#define SO_PEERGROUPS 59
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h
index 9a2a8956a695..2b16282add69 100644
--- a/arch/parisc/include/asm/dma-mapping.h
+++ b/arch/parisc/include/asm/dma-mapping.h
@@ -20,8 +20,6 @@
** flush/purge and allocate "regular" cacheable pages for everything.
*/
-#define DMA_ERROR_CODE (~(dma_addr_t)0)
-
#ifdef CONFIG_PA11
extern const struct dma_map_ops pcxl_dma_ops;
extern const struct dma_map_ops pcx_dma_ops;
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 514701840bd9..a0d4dc9f4eb2 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -97,4 +97,8 @@
#define SO_COOKIE 0x4032
+#define SCM_TIMESTAMPING_PKTINFO 0x4033
+
+#define SO_PEERGROUPS 0x4034
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/boot/dts/fsl/kmcent2.dts b/arch/powerpc/boot/dts/fsl/kmcent2.dts
index 47afa438602e..5922c1ea0e96 100644
--- a/arch/powerpc/boot/dts/fsl/kmcent2.dts
+++ b/arch/powerpc/boot/dts/fsl/kmcent2.dts
@@ -293,9 +293,7 @@
compatible = "fsl,ucc-hdlc";
rx-clock-name = "clk9";
tx-clock-name = "clk9";
- fsl,tx-timeslot-mask = <0xfffffffe>;
- fsl,rx-timeslot-mask = <0xfffffffe>;
- fsl,siram-entry-id = <0>;
+ fsl,hdlc-bus;
};
};
};
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index d73755fafbb0..57d38b504ff7 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -295,6 +295,8 @@
#define H_DISABLE_ALL_VIO_INTS 0x0A
#define H_DISABLE_VIO_INTERRUPT 0x0B
#define H_ENABLE_VIO_INTERRUPT 0x0C
+#define H_GET_SESSION_TOKEN 0x19
+#define H_SESSION_ERR_DETECTED 0x1A
/* Platform specific hcalls, used by KVM */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 2bf35017ffc0..b8d5b8e35244 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -86,7 +86,6 @@ struct kvmppc_vcore {
u16 last_cpu;
u8 vcore_state;
u8 in_guest;
- struct kvmppc_vcore *master_vcore;
struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
struct list_head preempt_list;
spinlock_t lock;
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index b148496ffe36..7cea76f11c26 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -81,7 +81,7 @@ struct kvm_split_mode {
u8 subcore_size;
u8 do_nap;
u8 napped[MAX_SMT_THREADS];
- struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
+ struct kvmppc_vcore *vc[MAX_SUBCORES];
};
/*
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9c51ac4b8f36..8b3f1238d07f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -35,6 +35,7 @@
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/hvcall.h>
+#include <asm/mce.h>
#define KVM_MAX_VCPUS NR_CPUS
#define KVM_MAX_VCORES NR_CPUS
@@ -52,8 +53,8 @@
#define KVM_IRQCHIP_NUM_PINS 256
/* PPC-specific vcpu->requests bit members */
-#define KVM_REQ_WATCHDOG 8
-#define KVM_REQ_EPR_EXIT 9
+#define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0)
+#define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1)
#include <linux/mmu_notifier.h>
@@ -267,6 +268,8 @@ struct kvm_resize_hpt;
struct kvm_arch {
unsigned int lpid;
+ unsigned int smt_mode; /* # vcpus per virtual core */
+ unsigned int emul_smt_mode; /* emualted SMT mode, on P9 */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
unsigned int tlb_sets;
struct kvm_hpt_info hpt;
@@ -285,6 +288,7 @@ struct kvm_arch {
cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
u8 radix;
+ u8 fwnmi_enabled;
pgd_t *pgtable;
u64 process_table;
struct dentry *debugfs_dir;
@@ -566,6 +570,7 @@ struct kvm_vcpu_arch {
ulong wort;
ulong tid;
ulong psscr;
+ ulong hfscr;
ulong shadow_srr1;
#endif
u32 vrsave; /* also USPRG0 */
@@ -579,7 +584,7 @@ struct kvm_vcpu_arch {
ulong mcsrr0;
ulong mcsrr1;
ulong mcsr;
- u32 dec;
+ ulong dec;
#ifdef CONFIG_BOOKE
u32 decar;
#endif
@@ -710,6 +715,7 @@ struct kvm_vcpu_arch {
unsigned long pending_exceptions;
u8 ceded;
u8 prodded;
+ u8 doorbell_request;
u32 last_inst;
struct swait_queue_head *wqp;
@@ -722,6 +728,7 @@ struct kvm_vcpu_arch {
int prev_cpu;
bool timer_running;
wait_queue_head_t cpu_run;
+ struct machine_check_event mce_evt; /* Valid if trap == 0x200 */
struct kvm_vcpu_arch_shared *shared;
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index e0d88c38602b..ba5fadd6f3c9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -315,6 +315,8 @@ struct kvmppc_ops {
struct irq_bypass_producer *);
int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg);
int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
+ int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
+ unsigned long flags);
};
extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 3a8d278e7421..1a9b45198c06 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -103,6 +103,8 @@
#define OP_31_XOP_STBUX 247
#define OP_31_XOP_LHZX 279
#define OP_31_XOP_LHZUX 311
+#define OP_31_XOP_MSGSNDP 142
+#define OP_31_XOP_MSGCLRP 174
#define OP_31_XOP_MFSPR 339
#define OP_31_XOP_LWAX 341
#define OP_31_XOP_LHAX 343
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 07fbeb927834..8cf8f0c96906 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -60,6 +60,12 @@ struct kvm_regs {
#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */
+/* flags for kvm_run.flags */
+#define KVM_RUN_PPC_NMI_DISP_MASK (3 << 0)
+#define KVM_RUN_PPC_NMI_DISP_FULLY_RECOV (1 << 0)
+#define KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV (2 << 0)
+#define KVM_RUN_PPC_NMI_DISP_NOT_RECOV (3 << 0)
+
/*
* Feature bits indicate which sections of the sregs struct are valid,
* both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 58e2ec0310fc..3c590c7c42c0 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -8,28 +8,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <asm/sockios.h>
-
-/* For setsockopt(2) */
-#define SOL_SOCKET 1
-
-#define SO_DEBUG 1
-#define SO_REUSEADDR 2
-#define SO_TYPE 3
-#define SO_ERROR 4
-#define SO_DONTROUTE 5
-#define SO_BROADCAST 6
-#define SO_SNDBUF 7
-#define SO_RCVBUF 8
-#define SO_SNDBUFFORCE 32
-#define SO_RCVBUFFORCE 33
-#define SO_KEEPALIVE 9
-#define SO_OOBINLINE 10
-#define SO_NO_CHECK 11
-#define SO_PRIORITY 12
-#define SO_LINGER 13
-#define SO_BSDCOMPAT 14
-#define SO_REUSEPORT 15
#define SO_RCVLOWAT 16
#define SO_SNDLOWAT 17
#define SO_RCVTIMEO 18
@@ -37,72 +15,6 @@
#define SO_PASSCRED 20
#define SO_PEERCRED 21
-/* Security levels - as per NRL IPv6 - don't actually do anything */
-#define SO_SECURITY_AUTHENTICATION 22
-#define SO_SECURITY_ENCRYPTION_TRANSPORT 23
-#define SO_SECURITY_ENCRYPTION_NETWORK 24
-
-#define SO_BINDTODEVICE 25
-
-/* Socket filtering */
-#define SO_ATTACH_FILTER 26
-#define SO_DETACH_FILTER 27
-#define SO_GET_FILTER SO_ATTACH_FILTER
-
-#define SO_PEERNAME 28
-#define SO_TIMESTAMP 29
-#define SCM_TIMESTAMP SO_TIMESTAMP
-
-#define SO_ACCEPTCONN 30
-
-#define SO_PEERSEC 31
-#define SO_PASSSEC 34
-#define SO_TIMESTAMPNS 35
-#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
-
-#define SO_MARK 36
-
-#define SO_TIMESTAMPING 37
-#define SCM_TIMESTAMPING SO_TIMESTAMPING
-
-#define SO_PROTOCOL 38
-#define SO_DOMAIN 39
-
-#define SO_RXQ_OVFL 40
-
-#define SO_WIFI_STATUS 41
-#define SCM_WIFI_STATUS SO_WIFI_STATUS
-#define SO_PEEK_OFF 42
-
-/* Instruct lower device to use last 4-bytes of skb data as FCS */
-#define SO_NOFCS 43
-
-#define SO_LOCK_FILTER 44
-
-#define SO_SELECT_ERR_QUEUE 45
-
-#define SO_BUSY_POLL 46
-
-#define SO_MAX_PACING_RATE 47
-
-#define SO_BPF_EXTENSIONS 48
-
-#define SO_INCOMING_CPU 49
-
-#define SO_ATTACH_BPF 50
-#define SO_DETACH_BPF SO_DETACH_FILTER
-
-#define SO_ATTACH_REUSEPORT_CBPF 51
-#define SO_ATTACH_REUSEPORT_EBPF 52
-
-#define SO_CNX_ADVICE 53
-
-#define SCM_TIMESTAMPING_OPT_STATS 54
-
-#define SO_MEMINFO 55
-
-#define SO_INCOMING_NAPI_ID 56
-
-#define SO_COOKIE 57
+#include <asm-generic/socket.h>
#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 709e23425317..ae8e89e0d083 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -485,6 +485,7 @@ int main(void)
OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
+ OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
@@ -513,6 +514,7 @@ int main(void)
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+ OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc);
@@ -542,6 +544,7 @@ int main(void)
OFFSET(VCPU_WORT, kvm_vcpu, arch.wort);
OFFSET(VCPU_TID, kvm_vcpu, arch.tid);
OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr);
+ OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr);
OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest);
OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 5f9eada3519b..a9bfa49f3698 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -405,6 +405,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
}
}
+EXPORT_SYMBOL_GPL(machine_check_print_event_info);
uint64_t get_mce_fault_addr(struct machine_check_event *evt)
{
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index eae61b044e9e..496d6393bd41 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -792,21 +792,17 @@ static ssize_t dev_nvram_write(struct file *file, const char __user *buf,
count = min_t(size_t, count, size - *ppos);
count = min(count, PAGE_SIZE);
- ret = -ENOMEM;
- tmp = kmalloc(count, GFP_KERNEL);
- if (!tmp)
- goto out;
-
- ret = -EFAULT;
- if (copy_from_user(tmp, buf, count))
+ tmp = memdup_user(buf, count);
+ if (IS_ERR(tmp)) {
+ ret = PTR_ERR(tmp);
goto out;
+ }
ret = ppc_md.nvram_write(tmp, count, ppos);
-out:
kfree(tmp);
+out:
return ret;
-
}
static long dev_nvram_ioctl(struct file *file, unsigned int cmd,
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 773b35d16a0b..0b436df746fc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -46,6 +46,8 @@
#include <linux/of.h>
#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+#include <asm/disassemble.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
@@ -645,6 +647,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
unsigned long stolen;
unsigned long core_stolen;
u64 now;
+ unsigned long flags;
dt = vcpu->arch.dtl_ptr;
vpa = vcpu->arch.vpa.pinned_addr;
@@ -652,10 +655,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
core_stolen = vcore_stolen_time(vc, now);
stolen = core_stolen - vcpu->arch.stolen_logged;
vcpu->arch.stolen_logged = core_stolen;
- spin_lock_irq(&vcpu->arch.tbacct_lock);
+ spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
stolen += vcpu->arch.busy_stolen;
vcpu->arch.busy_stolen = 0;
- spin_unlock_irq(&vcpu->arch.tbacct_lock);
+ spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
if (!dt || !vpa)
return;
memset(dt, 0, sizeof(struct dtl_entry));
@@ -675,6 +678,26 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
vcpu->arch.dtl.dirty = true;
}
+/* See if there is a doorbell interrupt pending for a vcpu */
+static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
+{
+ int thr;
+ struct kvmppc_vcore *vc;
+
+ if (vcpu->arch.doorbell_request)
+ return true;
+ /*
+ * Ensure that the read of vcore->dpdes comes after the read
+ * of vcpu->doorbell_request. This barrier matches the
+ * lwsync in book3s_hv_rmhandlers.S just before the
+ * fast_guest_return label.
+ */
+ smp_rmb();
+ vc = vcpu->arch.vcore;
+ thr = vcpu->vcpu_id - vc->first_vcpuid;
+ return !!(vc->dpdes & (1 << thr));
+}
+
static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
@@ -926,6 +949,101 @@ static int kvmppc_emulate_debug_inst(struct kvm_run *run,
}
}
+static void do_nothing(void *x)
+{
+}
+
+static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
+{
+ int thr, cpu, pcpu, nthreads;
+ struct kvm_vcpu *v;
+ unsigned long dpdes;
+
+ nthreads = vcpu->kvm->arch.emul_smt_mode;
+ dpdes = 0;
+ cpu = vcpu->vcpu_id & ~(nthreads - 1);
+ for (thr = 0; thr < nthreads; ++thr, ++cpu) {
+ v = kvmppc_find_vcpu(vcpu->kvm, cpu);
+ if (!v)
+ continue;
+ /*
+ * If the vcpu is currently running on a physical cpu thread,
+ * interrupt it in order to pull it out of the guest briefly,
+ * which will update its vcore->dpdes value.
+ */
+ pcpu = READ_ONCE(v->cpu);
+ if (pcpu >= 0)
+ smp_call_function_single(pcpu, do_nothing, NULL, 1);
+ if (kvmppc_doorbell_pending(v))
+ dpdes |= 1 << thr;
+ }
+ return dpdes;
+}
+
+/*
+ * On POWER9, emulate doorbell-related instructions in order to
+ * give the guest the illusion of running on a multi-threaded core.
+ * The instructions emulated are msgsndp, msgclrp, mfspr TIR,
+ * and mfspr DPDES.
+ */
+static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
+{
+ u32 inst, rb, thr;
+ unsigned long arg;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *tvcpu;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return EMULATE_FAIL;
+ if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
+ return RESUME_GUEST;
+ if (get_op(inst) != 31)
+ return EMULATE_FAIL;
+ rb = get_rb(inst);
+ thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
+ switch (get_xop(inst)) {
+ case OP_31_XOP_MSGSNDP:
+ arg = kvmppc_get_gpr(vcpu, rb);
+ if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
+ break;
+ arg &= 0x3f;
+ if (arg >= kvm->arch.emul_smt_mode)
+ break;
+ tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
+ if (!tvcpu)
+ break;
+ if (!tvcpu->arch.doorbell_request) {
+ tvcpu->arch.doorbell_request = 1;
+ kvmppc_fast_vcpu_kick_hv(tvcpu);
+ }
+ break;
+ case OP_31_XOP_MSGCLRP:
+ arg = kvmppc_get_gpr(vcpu, rb);
+ if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
+ break;
+ vcpu->arch.vcore->dpdes = 0;
+ vcpu->arch.doorbell_request = 0;
+ break;
+ case OP_31_XOP_MFSPR:
+ switch (get_sprn(inst)) {
+ case SPRN_TIR:
+ arg = thr;
+ break;
+ case SPRN_DPDES:
+ arg = kvmppc_read_dpdes(vcpu);
+ break;
+ default:
+ return EMULATE_FAIL;
+ }
+ kvmppc_set_gpr(vcpu, get_rt(inst), arg);
+ break;
+ default:
+ return EMULATE_FAIL;
+ }
+ kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+ return RESUME_GUEST;
+}
+
static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
@@ -971,15 +1089,20 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_MACHINE_CHECK:
- /*
- * Deliver a machine check interrupt to the guest.
- * We have to do this, even if the host has handled the
- * machine check, because machine checks use SRR0/1 and
- * the interrupt might have trashed guest state in them.
- */
- kvmppc_book3s_queue_irqprio(vcpu,
- BOOK3S_INTERRUPT_MACHINE_CHECK);
- r = RESUME_GUEST;
+ /* Exit to guest with KVM_EXIT_NMI as exit reason */
+ run->exit_reason = KVM_EXIT_NMI;
+ run->hw.hardware_exit_reason = vcpu->arch.trap;
+ /* Clear out the old NMI status from run->flags */
+ run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
+ /* Now set the NMI status */
+ if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
+ run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+ else
+ run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
+
+ r = RESUME_HOST;
+ /* Print the MCE event to host console. */
+ machine_check_print_event_info(&vcpu->arch.mce_evt, false);
break;
case BOOK3S_INTERRUPT_PROGRAM:
{
@@ -1048,12 +1171,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
/*
* This occurs if the guest (kernel or userspace), does something that
- * is prohibited by HFSCR. We just generate a program interrupt to
- * the guest.
+ * is prohibited by HFSCR.
+ * On POWER9, this could be a doorbell instruction that we need
+ * to emulate.
+ * Otherwise, we just generate a program interrupt to the guest.
*/
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
- kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
- r = RESUME_GUEST;
+ r = EMULATE_FAIL;
+ if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
+ r = kvmppc_emulate_doorbell_instr(vcpu);
+ if (r == EMULATE_FAIL) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ r = RESUME_GUEST;
+ }
break;
case BOOK3S_INTERRUPT_HV_RM_HARD:
r = RESUME_PASSTHROUGH;
@@ -1143,6 +1273,12 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
if (cpu_has_feature(CPU_FTR_ARCH_207S))
mask |= LPCR_AIL;
+ /*
+ * On POWER9, allow userspace to enable large decrementer for the
+ * guest, whether or not the host has it enabled.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ mask |= LPCR_LD;
/* Broken 32-bit version of LPCR must not clear top bits */
if (preserve_top32)
@@ -1611,7 +1747,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
- vcore->first_vcpuid = core * threads_per_vcore();
+ vcore->first_vcpuid = core * kvm->arch.smt_mode;
vcore->kvm = kvm;
INIT_LIST_HEAD(&vcore->preempt_list);
@@ -1770,14 +1906,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
unsigned int id)
{
struct kvm_vcpu *vcpu;
- int err = -EINVAL;
+ int err;
int core;
struct kvmppc_vcore *vcore;
- core = id / threads_per_vcore();
- if (core >= KVM_MAX_VCORES)
- goto out;
-
err = -ENOMEM;
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu)
@@ -1808,6 +1940,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
vcpu->arch.busy_preempt = TB_NIL;
vcpu->arch.intr_msr = MSR_SF | MSR_ME;
+ /*
+ * Set the default HFSCR for the guest from the host value.
+ * This value is only used on POWER9.
+ * On POWER9 DD1, TM doesn't work, so we make sure to
+ * prevent the guest from using it.
+ * On POWER9, we want to virtualize the doorbell facility, so we
+ * turn off the HFSCR bit, which causes those instructions to trap.
+ */
+ vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+ if (!cpu_has_feature(CPU_FTR_TM))
+ vcpu->arch.hfscr &= ~HFSCR_TM;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ vcpu->arch.hfscr &= ~HFSCR_MSGP;
+
kvmppc_mmu_book3s_hv_init(vcpu);
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -1815,11 +1961,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
init_waitqueue_head(&vcpu->arch.cpu_run);
mutex_lock(&kvm->lock);
- vcore = kvm->arch.vcores[core];
- if (!vcore) {
- vcore = kvmppc_vcore_create(kvm, core);
- kvm->arch.vcores[core] = vcore;
- kvm->arch.online_vcores++;
+ vcore = NULL;
+ err = -EINVAL;
+ core = id / kvm->arch.smt_mode;
+ if (core < KVM_MAX_VCORES) {
+ vcore = kvm->arch.vcores[core];
+ if (!vcore) {
+ err = -ENOMEM;
+ vcore = kvmppc_vcore_create(kvm, core);
+ kvm->arch.vcores[core] = vcore;
+ kvm->arch.online_vcores++;
+ }
}
mutex_unlock(&kvm->lock);
@@ -1847,6 +1999,43 @@ out:
return ERR_PTR(err);
}
+static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
+ unsigned long flags)
+{
+ int err;
+ int esmt = 0;
+
+ if (flags)
+ return -EINVAL;
+ if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
+ return -EINVAL;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * On POWER8 (or POWER7), the threading mode is "strict",
+ * so we pack smt_mode vcpus per vcore.
+ */
+ if (smt_mode > threads_per_subcore)
+ return -EINVAL;
+ } else {
+ /*
+ * On POWER9, the threading mode is "loose",
+ * so each vcpu gets its own vcore.
+ */
+ esmt = smt_mode;
+ smt_mode = 1;
+ }
+ mutex_lock(&kvm->lock);
+ err = -EBUSY;
+ if (!kvm->arch.online_vcores) {
+ kvm->arch.smt_mode = smt_mode;
+ kvm->arch.emul_smt_mode = esmt;
+ err = 0;
+ }
+ mutex_unlock(&kvm->lock);
+
+ return err;
+}
+
static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
{
if (vpa->pinned_addr)
@@ -1897,7 +2086,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
}
}
-extern void __kvmppc_vcore_entry(void);
+extern int __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
struct kvm_vcpu *vcpu)
@@ -1962,10 +2151,6 @@ static void kvmppc_release_hwthread(int cpu)
tpaca->kvm_hstate.kvm_split_mode = NULL;
}
-static void do_nothing(void *x)
-{
-}
-
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
{
int i;
@@ -1983,11 +2168,35 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
smp_call_function_single(cpu + i, do_nothing, NULL, 1);
}
+static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ /*
+ * With radix, the guest can do TLB invalidations itself,
+ * and it could choose to use the local form (tlbiel) if
+ * it is invalidating a translation that has only ever been
+ * used on one vcpu. However, that doesn't mean it has
+ * only ever been used on one physical cpu, since vcpus
+ * can move around between pcpus. To cope with this, when
+ * a vcpu moves from one pcpu to another, we need to tell
+ * any vcpus running on the same core as this vcpu previously
+ * ran to flush the TLB. The TLB is shared between threads,
+ * so we use a single bit in .need_tlb_flush for all 4 threads.
+ */
+ if (vcpu->arch.prev_cpu != pcpu) {
+ if (vcpu->arch.prev_cpu >= 0 &&
+ cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
+ cpu_first_thread_sibling(pcpu))
+ radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
+ vcpu->arch.prev_cpu = pcpu;
+ }
+}
+
static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
{
int cpu;
struct paca_struct *tpaca;
- struct kvmppc_vcore *mvc = vc->master_vcore;
struct kvm *kvm = vc->kvm;
cpu = vc->pcpu;
@@ -1997,36 +2206,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
vcpu->arch.timer_running = 0;
}
cpu += vcpu->arch.ptid;
- vcpu->cpu = mvc->pcpu;
+ vcpu->cpu = vc->pcpu;
vcpu->arch.thread_cpu = cpu;
-
- /*
- * With radix, the guest can do TLB invalidations itself,
- * and it could choose to use the local form (tlbiel) if
- * it is invalidating a translation that has only ever been
- * used on one vcpu. However, that doesn't mean it has
- * only ever been used on one physical cpu, since vcpus
- * can move around between pcpus. To cope with this, when
- * a vcpu moves from one pcpu to another, we need to tell
- * any vcpus running on the same core as this vcpu previously
- * ran to flush the TLB. The TLB is shared between threads,
- * so we use a single bit in .need_tlb_flush for all 4 threads.
- */
- if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
- if (vcpu->arch.prev_cpu >= 0 &&
- cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
- cpu_first_thread_sibling(cpu))
- radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
- vcpu->arch.prev_cpu = cpu;
- }
cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
}
tpaca = &paca[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
- tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
+ tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
smp_wmb();
- tpaca->kvm_hstate.kvm_vcore = mvc;
+ tpaca->kvm_hstate.kvm_vcore = vc;
if (cpu != smp_processor_id())
kvmppc_ipi_thread(cpu);
}
@@ -2155,8 +2344,7 @@ struct core_info {
int max_subcore_threads;
int total_threads;
int subcore_threads[MAX_SUBCORES];
- struct kvm *subcore_vm[MAX_SUBCORES];
- struct list_head vcs[MAX_SUBCORES];
+ struct kvmppc_vcore *vc[MAX_SUBCORES];
};
/*
@@ -2167,17 +2355,12 @@ static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
{
- int sub;
-
memset(cip, 0, sizeof(*cip));
cip->n_subcores = 1;
cip->max_subcore_threads = vc->num_threads;
cip->total_threads = vc->num_threads;
cip->subcore_threads[0] = vc->num_threads;
- cip->subcore_vm[0] = vc->kvm;
- for (sub = 0; sub < MAX_SUBCORES; ++sub)
- INIT_LIST_HEAD(&cip->vcs[sub]);
- list_add_tail(&vc->preempt_list, &cip->vcs[0]);
+ cip->vc[0] = vc;
}
static bool subcore_config_ok(int n_subcores, int n_threads)
@@ -2197,9 +2380,8 @@ static bool subcore_config_ok(int n_subcores, int n_threads)
return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
}
-static void init_master_vcore(struct kvmppc_vcore *vc)
+static void init_vcore_to_run(struct kvmppc_vcore *vc)
{
- vc->master_vcore = vc;
vc->entry_exit_map = 0;
vc->in_guest = 0;
vc->napping_threads = 0;
@@ -2224,9 +2406,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
++cip->n_subcores;
cip->total_threads += vc->num_threads;
cip->subcore_threads[sub] = vc->num_threads;
- cip->subcore_vm[sub] = vc->kvm;
- init_master_vcore(vc);
- list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
+ cip->vc[sub] = vc;
+ init_vcore_to_run(vc);
+ list_del_init(&vc->preempt_list);
return true;
}
@@ -2294,6 +2476,18 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
spin_unlock(&lp->lock);
}
+static bool recheck_signals(struct core_info *cip)
+{
+ int sub, i;
+ struct kvm_vcpu *vcpu;
+
+ for (sub = 0; sub < cip->n_subcores; ++sub)
+ for_each_runnable_thread(i, vcpu, cip->vc[sub])
+ if (signal_pending(vcpu->arch.run_task))
+ return true;
+ return false;
+}
+
static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
{
int still_running = 0, i;
@@ -2331,7 +2525,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
wake_up(&vcpu->arch.cpu_run);
}
}
- list_del_init(&vc->preempt_list);
if (!is_master) {
if (still_running > 0) {
kvmppc_vcore_preempt(vc);
@@ -2393,6 +2586,21 @@ static inline int kvmppc_set_host_core(unsigned int cpu)
return 0;
}
+static void set_irq_happened(int trap)
+{
+ switch (trap) {
+ case BOOK3S_INTERRUPT_EXTERNAL:
+ local_paca->irq_happened |= PACA_IRQ_EE;
+ break;
+ case BOOK3S_INTERRUPT_H_DOORBELL:
+ local_paca->irq_happened |= PACA_IRQ_DBELL;
+ break;
+ case BOOK3S_INTERRUPT_HMI:
+ local_paca->irq_happened |= PACA_IRQ_HMI;
+ break;
+ }
+}
+
/*
* Run a set of guest threads on a physical core.
* Called with vc->lock held.
@@ -2403,7 +2611,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int i;
int srcu_idx;
struct core_info core_info;
- struct kvmppc_vcore *pvc, *vcnext;
+ struct kvmppc_vcore *pvc;
struct kvm_split_mode split_info, *sip;
int split, subcore_size, active;
int sub;
@@ -2412,6 +2620,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int pcpu, thr;
int target_threads;
int controlled_threads;
+ int trap;
/*
* Remove from the list any threads that have a signal pending
@@ -2426,7 +2635,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
/*
* Initialize *vc.
*/
- init_master_vcore(vc);
+ init_vcore_to_run(vc);
vc->preempt_tb = TB_NIL;
/*
@@ -2463,6 +2672,43 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
if (vc->num_threads < target_threads)
collect_piggybacks(&core_info, target_threads);
+ /*
+ * On radix, arrange for TLB flushing if necessary.
+ * This has to be done before disabling interrupts since
+ * it uses smp_call_function().
+ */
+ pcpu = smp_processor_id();
+ if (kvm_is_radix(vc->kvm)) {
+ for (sub = 0; sub < core_info.n_subcores; ++sub)
+ for_each_runnable_thread(i, vcpu, core_info.vc[sub])
+ kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+ }
+
+ /*
+ * Hard-disable interrupts, and check resched flag and signals.
+ * If we need to reschedule or deliver a signal, clean up
+ * and return without going into the guest(s).
+ */
+ local_irq_disable();
+ hard_irq_disable();
+ if (lazy_irq_pending() || need_resched() ||
+ recheck_signals(&core_info)) {
+ local_irq_enable();
+ vc->vcore_state = VCORE_INACTIVE;
+ /* Unlock all except the primary vcore */
+ for (sub = 1; sub < core_info.n_subcores; ++sub) {
+ pvc = core_info.vc[sub];
+ /* Put back on to the preempted vcores list */
+ kvmppc_vcore_preempt(pvc);
+ spin_unlock(&pvc->lock);
+ }
+ for (i = 0; i < controlled_threads; ++i)
+ kvmppc_release_hwthread(pcpu + i);
+ return;
+ }
+
+ kvmppc_clear_host_core(pcpu);
+
/* Decide on micro-threading (split-core) mode */
subcore_size = threads_per_subcore;
cmd_bit = stat_bit = 0;
@@ -2486,13 +2732,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
split_info.ldbar = mfspr(SPRN_LDBAR);
split_info.subcore_size = subcore_size;
for (sub = 0; sub < core_info.n_subcores; ++sub)
- split_info.master_vcs[sub] =
- list_first_entry(&core_info.vcs[sub],
- struct kvmppc_vcore, preempt_list);
+ split_info.vc[sub] = core_info.vc[sub];
/* order writes to split_info before kvm_split_mode pointer */
smp_wmb();
}
- pcpu = smp_processor_id();
for (thr = 0; thr < controlled_threads; ++thr)
paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
@@ -2512,32 +2755,29 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
}
- kvmppc_clear_host_core(pcpu);
-
/* Start all the threads */
active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) {
thr = subcore_thread_map[sub];
thr0_done = false;
active |= 1 << thr;
- list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
- pvc->pcpu = pcpu + thr;
- for_each_runnable_thread(i, vcpu, pvc) {
- kvmppc_start_thread(vcpu, pvc);
- kvmppc_create_dtl_entry(vcpu, pvc);
- trace_kvm_guest_enter(vcpu);
- if (!vcpu->arch.ptid)
- thr0_done = true;
- active |= 1 << (thr + vcpu->arch.ptid);
- }
- /*
- * We need to start the first thread of each subcore
- * even if it doesn't have a vcpu.
- */
- if (pvc->master_vcore == pvc && !thr0_done)
- kvmppc_start_thread(NULL, pvc);
- thr += pvc->num_threads;
+ pvc = core_info.vc[sub];
+ pvc->pcpu = pcpu + thr;
+ for_each_runnable_thread(i, vcpu, pvc) {
+ kvmppc_start_thread(vcpu, pvc);
+ kvmppc_create_dtl_entry(vcpu, pvc);
+ trace_kvm_guest_enter(vcpu);
+ if (!vcpu->arch.ptid)
+ thr0_done = true;
+ active |= 1 << (thr + vcpu->arch.ptid);
}
+ /*
+ * We need to start the first thread of each subcore
+ * even if it doesn't have a vcpu.
+ */
+ if (!thr0_done)
+ kvmppc_start_thread(NULL, pvc);
+ thr += pvc->num_threads;
}
/*
@@ -2564,17 +2804,27 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 0);
for (sub = 0; sub < core_info.n_subcores; ++sub)
- list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
- spin_unlock(&pvc->lock);
+ spin_unlock(&core_info.vc[sub]->lock);
+
+ /*
+ * Interrupts will be enabled once we get into the guest,
+ * so tell lockdep that we're about to enable interrupts.
+ */
+ trace_hardirqs_on();
guest_enter();
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
- __kvmppc_vcore_entry();
+ trap = __kvmppc_vcore_entry();
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+ guest_exit();
+
+ trace_hardirqs_off();
+ set_irq_happened(trap);
+
spin_lock(&vc->lock);
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
vc->vcore_state = VCORE_EXITING;
@@ -2602,6 +2852,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
split_info.do_nap = 0;
}
+ kvmppc_set_host_core(pcpu);
+
+ local_irq_enable();
+
/* Let secondaries go back to the offline loop */
for (i = 0; i < controlled_threads; ++i) {
kvmppc_release_hwthread(pcpu + i);
@@ -2610,18 +2864,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
}
- kvmppc_set_host_core(pcpu);
-
spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
- guest_exit();
- for (sub = 0; sub < core_info.n_subcores; ++sub)
- list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
- preempt_list)
- post_guest_process(pvc, pvc == vc);
+ for (sub = 0; sub < core_info.n_subcores; ++sub) {
+ pvc = core_info.vc[sub];
+ post_guest_process(pvc, pvc == vc);
+ }
spin_lock(&vc->lock);
preempt_enable();
@@ -2666,6 +2917,30 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
vc->halt_poll_ns /= halt_poll_ns_shrink;
}
+#ifdef CONFIG_KVM_XICS
+static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+{
+ if (!xive_enabled())
+ return false;
+ return vcpu->arch.xive_saved_state.pipr <
+ vcpu->arch.xive_saved_state.cppr;
+}
+#else
+static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+#endif /* CONFIG_KVM_XICS */
+
+static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
+ kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
+ return true;
+
+ return false;
+}
+
/*
* Check to see if any of the runnable vcpus on the vcore have pending
* exceptions or are no longer ceded
@@ -2676,8 +2951,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
int i;
for_each_runnable_thread(i, vcpu, vc) {
- if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded ||
- vcpu->arch.prodded)
+ if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
return 1;
}
@@ -2819,15 +3093,14 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
*/
if (!signal_pending(current)) {
if (vc->vcore_state == VCORE_PIGGYBACK) {
- struct kvmppc_vcore *mvc = vc->master_vcore;
- if (spin_trylock(&mvc->lock)) {
- if (mvc->vcore_state == VCORE_RUNNING &&
- !VCORE_IS_EXITING(mvc)) {
+ if (spin_trylock(&vc->lock)) {
+ if (vc->vcore_state == VCORE_RUNNING &&
+ !VCORE_IS_EXITING(vc)) {
kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
}
- spin_unlock(&mvc->lock);
+ spin_unlock(&vc->lock);
}
} else if (vc->vcore_state == VCORE_RUNNING &&
!VCORE_IS_EXITING(vc)) {
@@ -2863,7 +3136,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
break;
n_ceded = 0;
for_each_runnable_thread(i, v, vc) {
- if (!v->arch.pending_exceptions && !v->arch.prodded)
+ if (!kvmppc_vcpu_woken(v))
n_ceded += v->arch.ceded;
else
v->arch.ceded = 0;
@@ -3519,6 +3792,19 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvm_hv_vm_activated();
/*
+ * Initialize smt_mode depending on processor.
+ * POWER8 and earlier have to use "strict" threading, where
+ * all vCPUs in a vcore have to run on the same (sub)core,
+ * whereas on POWER9 the threads can each run a different
+ * guest.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ kvm->arch.smt_mode = threads_per_subcore;
+ else
+ kvm->arch.smt_mode = 1;
+ kvm->arch.emul_smt_mode = 1;
+
+ /*
* Create a debugfs directory for the VM
*/
snprintf(buf, sizeof(buf), "vm%d", current->pid);
@@ -3947,6 +4233,7 @@ static struct kvmppc_ops kvm_ops_hv = {
#endif
.configure_mmu = kvmhv_configure_mmu,
.get_rmmu_info = kvmhv_get_rmmu_info,
+ .set_smt_mode = kvmhv_set_smt_mode,
};
static int kvm_init_subcore_bitmap(void)
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ee4c2558c305..90644db9d38e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -307,7 +307,7 @@ void kvmhv_commence_exit(int trap)
return;
for (i = 0; i < MAX_SUBCORES; ++i) {
- vc = sip->master_vcs[i];
+ vc = sip->vc[i];
if (!vc)
break;
do {
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 404deb512844..dc54373c8780 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -61,13 +61,6 @@ BEGIN_FTR_SECTION
std r3, HSTATE_DABR(r13)
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
- /* Hard-disable interrupts */
- mfmsr r10
- std r10, HSTATE_HOST_MSR(r13)
- rldicl r10,r10,48,1
- rotldi r10,r10,16
- mtmsrd r10,1
-
/* Save host PMU registers */
BEGIN_FTR_SECTION
/* Work around P8 PMAE bug */
@@ -153,6 +146,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
*
* R1 = host R1
* R2 = host R2
+ * R3 = trap number on this thread
* R12 = exit handler id
* R13 = PACA
*/
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 7ef0993214f3..c356f9a40b24 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -130,12 +130,28 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
out:
/*
+ * For guest that supports FWNMI capability, hook the MCE event into
+ * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
+ * exit reason. On our way to exit we will pull this event from vcpu
+ * structure and print it from thread 0 of the core/subcore.
+ *
+ * For guest that does not support FWNMI capability (old QEMU):
* We are now going enter guest either through machine check
* interrupt (for unhandled errors) or will continue from
* current HSRR0 (for handled errors) in guest. Hence
* queue up the event so that we can log it from host console later.
*/
- machine_check_queue_event();
+ if (vcpu->kvm->arch.fwnmi_enabled) {
+ /*
+ * Hook up the mce event on to vcpu structure.
+ * First clear the old event.
+ */
+ memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
+ if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
+ vcpu->arch.mce_evt = mce_evt;
+ }
+ } else
+ machine_check_queue_event();
return handled;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 4888dd494604..6ea4b53f4b16 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -45,7 +45,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define NAPPING_NOVCPU 2
/* Stack frame offsets for kvmppc_hv_entry */
-#define SFS 144
+#define SFS 160
#define STACK_SLOT_TRAP (SFS-4)
#define STACK_SLOT_TID (SFS-16)
#define STACK_SLOT_PSSCR (SFS-24)
@@ -54,6 +54,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_CIABR (SFS-48)
#define STACK_SLOT_DAWR (SFS-56)
#define STACK_SLOT_DAWRX (SFS-64)
+#define STACK_SLOT_HFSCR (SFS-72)
/*
* Call kvmppc_hv_entry in real mode.
@@ -68,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
std r0, PPC_LR_STKOFF(r1)
stdu r1, -112(r1)
mfmsr r10
+ std r10, HSTATE_HOST_MSR(r13)
LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
li r0,MSR_RI
andc r0,r10,r0
@@ -152,20 +154,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
stb r0, HSTATE_HWTHREAD_REQ(r13)
/*
- * For external and machine check interrupts, we need
- * to call the Linux handler to process the interrupt.
- * We do that by jumping to absolute address 0x500 for
- * external interrupts, or the machine_check_fwnmi label
- * for machine checks (since firmware might have patched
- * the vector area at 0x200). The [h]rfid at the end of the
- * handler will return to the book3s_hv_interrupts.S code.
- * For other interrupts we do the rfid to get back
- * to the book3s_hv_interrupts.S code here.
+ * For external interrupts we need to call the Linux
+ * handler to process the interrupt. We do that by jumping
+ * to absolute address 0x500 for external interrupts.
+ * The [h]rfid at the end of the handler will return to
+ * the book3s_hv_interrupts.S code. For other interrupts
+ * we do the rfid to get back to the book3s_hv_interrupts.S
+ * code here.
*/
ld r8, 112+PPC_LR_STKOFF(r1)
addi r1, r1, 112
ld r7, HSTATE_HOST_MSR(r13)
+ /* Return the trap number on this thread as the return value */
+ mr r3, r12
+
/*
* If we came back from the guest via a relocation-on interrupt,
* we will be in virtual mode at this point, which makes it a
@@ -175,59 +178,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
andi. r0, r0, MSR_IR /* in real mode? */
bne .Lvirt_return
- cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
- beq 11f
- cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
- beq 15f /* Invoke the H_DOORBELL handler */
- cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI
- beq cr2, 14f /* HMI check */
-
- /* RFI into the highmem handler, or branch to interrupt handler */
+ /* RFI into the highmem handler */
mfmsr r6
li r0, MSR_RI
andc r6, r6, r0
mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8
mtsrr1 r7
- beq cr1, 13f /* machine check */
RFI
- /* On POWER7, we have external interrupts set to use HSRR0/1 */
-11: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- ba 0x500
-
-13: b machine_check_fwnmi
-
-14: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- b hmi_exception_after_realmode
-
-15: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- ba 0xe80
-
- /* Virtual-mode return - can't get here for HMI or machine check */
+ /* Virtual-mode return */
.Lvirt_return:
- cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
- beq 16f
- cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
- beq 17f
- andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */
- beq 18f
- mtmsrd r7, 1 /* if so then re-enable them */
-18: mtlr r8
+ mtlr r8
blr
-16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */
- mtspr SPRN_HSRR1, r7
- b exc_virt_0x4500_hardware_interrupt
-
-17: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- b exc_virt_0x4e80_h_doorbell
-
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
@@ -769,6 +733,8 @@ BEGIN_FTR_SECTION
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
std r8, STACK_SLOT_IAMR(r1)
+ mfspr r5, SPRN_HFSCR
+ std r5, STACK_SLOT_HFSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
mfspr r5, SPRN_CIABR
@@ -920,8 +886,10 @@ FTR_SECTION_ELSE
ld r5, VCPU_TID(r4)
ld r6, VCPU_PSSCR(r4)
oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
+ ld r7, VCPU_HFSCR(r4)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
+ mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
@@ -936,7 +904,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
mftb r7
subf r3,r7,r8
mtspr SPRN_DEC,r3
- stw r3,VCPU_DEC(r4)
+ std r3,VCPU_DEC(r4)
ld r5, VCPU_SPRG0(r4)
ld r6, VCPU_SPRG1(r4)
@@ -1048,7 +1016,13 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
li r0, BOOK3S_INTERRUPT_EXTERNAL
bne cr1, 12f
mfspr r0, SPRN_DEC
- cmpwi r0, 0
+BEGIN_FTR_SECTION
+ /* On POWER9 check whether the guest has large decrementer enabled */
+ andis. r8, r8, LPCR_LD@h
+ bne 15f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ extsw r0, r0
+15: cmpdi r0, 0
li r0, BOOK3S_INTERRUPT_DECREMENTER
bge 5f
@@ -1058,6 +1032,23 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
mr r9, r4
bl kvmppc_msr_interrupt
5:
+BEGIN_FTR_SECTION
+ b fast_guest_return
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+ /* On POWER9, check for pending doorbell requests */
+ lbz r0, VCPU_DBELL_REQ(r4)
+ cmpwi r0, 0
+ beq fast_guest_return
+ ld r5, HSTATE_KVM_VCORE(r13)
+ /* Set DPDES register so the CPU will take a doorbell interrupt */
+ li r0, 1
+ mtspr SPRN_DPDES, r0
+ std r0, VCORE_DPDES(r5)
+ /* Make sure other cpus see vcore->dpdes set before dbell req clear */
+ lwsync
+ /* Clear the pending doorbell request */
+ li r0, 0
+ stb r0, VCPU_DBELL_REQ(r4)
/*
* Required state:
@@ -1232,6 +1223,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
stw r12,VCPU_TRAP(r9)
+ /*
+ * Now that we have saved away SRR0/1 and HSRR0/1,
+ * interrupts are recoverable in principle, so set MSR_RI.
+ * This becomes important for relocation-on interrupts from
+ * the guest, which we can get in radix mode on POWER9.
+ */
+ li r0, MSR_RI
+ mtmsrd r0, 1
+
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
addi r3, r9, VCPU_TB_RMINTR
mr r4, r9
@@ -1288,6 +1288,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
beq 4f
b guest_exit_cont
3:
+ /* If it's a hypervisor facility unavailable interrupt, save HFSCR */
+ cmpwi r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
+ bne 14f
+ mfspr r3, SPRN_HFSCR
+ std r3, VCPU_HFSCR(r9)
+ b guest_exit_cont
+14:
/* External interrupt ? */
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
bne+ guest_exit_cont
@@ -1475,12 +1482,18 @@ mc_cont:
mtspr SPRN_SPURR,r4
/* Save DEC */
+ ld r3, HSTATE_KVM_VCORE(r13)
mfspr r5,SPRN_DEC
mftb r6
+ /* On P9, if the guest has large decr enabled, don't sign extend */
+BEGIN_FTR_SECTION
+ ld r4, VCORE_LPCR(r3)
+ andis. r4, r4, LPCR_LD@h
+ bne 16f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r5,r5
- add r5,r5,r6
+16: add r5,r5,r6
/* r5 is a guest timebase value here, convert to host TB */
- ld r3,HSTATE_KVM_VCORE(r13)
ld r4,VCORE_TB_OFFSET(r3)
subf r5,r4,r5
std r5,VCPU_DEC_EXPIRES(r9)
@@ -1525,6 +1538,9 @@ FTR_SECTION_ELSE
rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
rotldi r6, r6, 60
std r6, VCPU_PSSCR(r9)
+ /* Restore host HFSCR value */
+ ld r7, STACK_SLOT_HFSCR(r1)
+ mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/*
* Restore various registers to 0, where non-zero values
@@ -2402,8 +2418,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
+BEGIN_FTR_SECTION
+ /* On P9 check whether the guest has large decrementer mode enabled */
+ ld r6, HSTATE_KVM_VCORE(r13)
+ ld r6, VCORE_LPCR(r6)
+ andis. r6, r6, LPCR_LD@h
+ bne 68f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r3, r3
- EXTEND_HDEC(r4)
+68: EXTEND_HDEC(r4)
cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
@@ -2589,22 +2612,32 @@ machine_check_realmode:
ld r9, HSTATE_KVM_VCPU(r13)
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
/*
- * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
- * machine check interrupt (set HSRR0 to 0x200). And for handled
- * errors (no-fatal), just go back to guest execution with current
- * HSRR0 instead of exiting guest. This new approach will inject
- * machine check to guest for fatal error causing guest to crash.
- *
- * The old code used to return to host for unhandled errors which
- * was causing guest to hang with soft lockups inside guest and
- * makes it difficult to recover guest instance.
+ * For the guest that is FWNMI capable, deliver all the MCE errors
+ * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
+ * reason. This new approach injects machine check errors in guest
+ * address space to guest with additional information in the form
+ * of RTAS event, thus enabling guest kernel to suitably handle
+ * such errors.
*
+ * For the guest that is not FWNMI capable (old QEMU) fallback
+ * to old behaviour for backward compatibility:
+ * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
+ * through machine check interrupt (set HSRR0 to 0x200).
+ * For handled errors (no-fatal), just go back to guest execution
+ * with current HSRR0.
* if we receive machine check with MSR(RI=0) then deliver it to
* guest as machine check causing guest to crash.
*/
ld r11, VCPU_MSR(r9)
rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
bne mc_cont /* if so, exit to host */
+ /* Check if guest is capable of handling NMI exit */
+ ld r10, VCPU_KVM(r9)
+ lbz r10, KVM_FWNMI(r10)
+ cmpdi r10, 1 /* FWNMI capable? */
+ beq mc_cont /* if so, exit with KVM_EXIT_NMI. */
+
+ /* if not, fall through for backward compatibility. */
andi. r10, r11, MSR_RI /* check for unrecoverable exception */
beq 1f /* Deliver a machine check to guest */
ld r10, VCPU_PC(r9)
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index ffe1da95033a..08b200a0bbce 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1257,8 +1257,8 @@ static void xive_pre_save_scan(struct kvmppc_xive *xive)
if (!xc)
continue;
for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) {
- if (xc->queues[i].qpage)
- xive_pre_save_queue(xive, &xc->queues[i]);
+ if (xc->queues[j].qpage)
+ xive_pre_save_queue(xive, &xc->queues[j]);
}
}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 3eaac3809977..071b87ee682f 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -687,7 +687,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
kvmppc_core_check_exceptions(vcpu);
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
/* Exception delivery raised request; start over */
return 1;
}
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index c873ffe55362..4d8b4d6cebff 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -39,7 +39,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
unsigned long dec_nsec;
unsigned long long dec_time;
- pr_debug("mtDEC: %x\n", vcpu->arch.dec);
+ pr_debug("mtDEC: %lx\n", vcpu->arch.dec);
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
#ifdef CONFIG_PPC_BOOK3S
@@ -109,7 +109,7 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_TBWU: break;
case SPRN_DEC:
- vcpu->arch.dec = spr_val;
+ vcpu->arch.dec = (u32) spr_val;
kvmppc_emulate_dec(vcpu);
break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 7f71ab5fcad1..1a75c0b5f4ca 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -55,8 +55,7 @@ EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return !!(v->arch.pending_exceptions) ||
- v->requests;
+ return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -108,7 +107,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
*/
smp_mb();
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
/* Make sure we process requests preemptable */
local_irq_enable();
trace_kvm_check_requests(vcpu);
@@ -554,13 +553,28 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_SMT:
r = 0;
- if (hv_enabled) {
+ if (kvm) {
+ if (kvm->arch.emul_smt_mode > 1)
+ r = kvm->arch.emul_smt_mode;
+ else
+ r = kvm->arch.smt_mode;
+ } else if (hv_enabled) {
if (cpu_has_feature(CPU_FTR_ARCH_300))
r = 1;
else
r = threads_per_subcore;
}
break;
+ case KVM_CAP_PPC_SMT_POSSIBLE:
+ r = 1;
+ if (hv_enabled) {
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ r = ((threads_per_subcore << 1) - 1);
+ else
+ /* P9 can emulate dbells, so allow any mode */
+ r = 8 | 4 | 2 | 1;
+ }
+ break;
case KVM_CAP_PPC_RMA:
r = 0;
break;
@@ -619,6 +633,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
break;
#endif
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ case KVM_CAP_PPC_FWNMI:
+ r = hv_enabled;
+ break;
+#endif
case KVM_CAP_PPC_HTM:
r = cpu_has_feature(CPU_FTR_TM_COMP) &&
is_kvmppc_hv_enabled(kvm);
@@ -1538,6 +1557,15 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
}
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ case KVM_CAP_PPC_FWNMI:
+ r = -EINVAL;
+ if (!is_kvmppc_hv_enabled(vcpu->kvm))
+ break;
+ r = 0;
+ vcpu->kvm->arch.fwnmi_enabled = true;
+ break;
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
default:
r = -EINVAL;
break;
@@ -1712,6 +1740,15 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = 0;
break;
}
+ case KVM_CAP_PPC_SMT: {
+ unsigned long mode = cap->args[0];
+ unsigned long flags = cap->args[1];
+
+ r = -EINVAL;
+ if (kvm->arch.kvm_ops->set_smt_mode)
+ r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
+ break;
+ }
#endif
default:
r = -EINVAL;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index aee2bb817ac6..861c5af1c9c4 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -938,7 +938,7 @@ common_load:
/*
* Tail call
*/
- case BPF_JMP | BPF_CALL | BPF_X:
+ case BPF_JMP | BPF_TAIL_CALL:
ctx->seen |= SEEN_TAILCALL;
bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
break;
@@ -1052,6 +1052,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp->bpf_func = (void *)image;
fp->jited = 1;
+ fp->jited_len = alloclen;
bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index d0441ad2a990..e508dff92535 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -59,7 +59,9 @@ union ctlreg0 {
unsigned long lap : 1; /* Low-address-protection control */
unsigned long : 4;
unsigned long edat : 1; /* Enhanced-DAT-enablement control */
- unsigned long : 4;
+ unsigned long : 2;
+ unsigned long iep : 1; /* Instruction-Execution-Protection */
+ unsigned long : 1;
unsigned long afp : 1; /* AFP-register control */
unsigned long vx : 1; /* Vector enablement control */
unsigned long : 7;
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 8acf482162ed..88162bb5c190 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -8,6 +8,7 @@
#ifndef _ASM_S390_DIAG_H
#define _ASM_S390_DIAG_H
+#include <linux/if_ether.h>
#include <linux/percpu.h>
enum diag_stat_enum {
@@ -24,6 +25,7 @@ enum diag_stat_enum {
DIAG_STAT_X224,
DIAG_STAT_X250,
DIAG_STAT_X258,
+ DIAG_STAT_X26C,
DIAG_STAT_X288,
DIAG_STAT_X2C4,
DIAG_STAT_X2FC,
@@ -225,6 +227,30 @@ struct diag204_x_phys_block {
struct diag204_x_phys_cpu cpus[];
} __packed;
+enum diag26c_sc {
+ DIAG26C_MAC_SERVICES = 0x00000030
+};
+
+enum diag26c_version {
+ DIAG26C_VERSION2 = 0x00000002 /* z/VM 5.4.0 */
+};
+
+#define DIAG26C_GET_MAC 0x0000
+struct diag26c_mac_req {
+ u32 resp_buf_len;
+ u32 resp_version;
+ u16 op_code;
+ u16 devno;
+ u8 res[4];
+};
+
+struct diag26c_mac_resp {
+ u32 version;
+ u8 mac[ETH_ALEN];
+ u8 res[2];
+} __aligned(8);
+
int diag204(unsigned long subcode, unsigned long size, void *addr);
int diag224(void *ptr);
+int diag26c(void *req, void *resp, enum diag26c_sc subcode);
#endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 6baae236f461..a409d5991934 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -42,9 +42,11 @@
#define KVM_HALT_POLL_NS_DEFAULT 80000
/* s390-specific vcpu->requests bit members */
-#define KVM_REQ_ENABLE_IBS 8
-#define KVM_REQ_DISABLE_IBS 9
-#define KVM_REQ_ICPT_OPEREXC 10
+#define KVM_REQ_ENABLE_IBS KVM_ARCH_REQ(0)
+#define KVM_REQ_DISABLE_IBS KVM_ARCH_REQ(1)
+#define KVM_REQ_ICPT_OPEREXC KVM_ARCH_REQ(2)
+#define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3)
+#define KVM_REQ_STOP_MIGRATION KVM_ARCH_REQ(4)
#define SIGP_CTRL_C 0x80
#define SIGP_CTRL_SCN_MASK 0x3f
@@ -56,7 +58,7 @@ union bsca_sigp_ctrl {
__u8 r : 1;
__u8 scn : 6;
};
-} __packed;
+};
union esca_sigp_ctrl {
__u16 value;
@@ -65,14 +67,14 @@ union esca_sigp_ctrl {
__u8 reserved: 7;
__u8 scn;
};
-} __packed;
+};
struct esca_entry {
union esca_sigp_ctrl sigp_ctrl;
__u16 reserved1[3];
__u64 sda;
__u64 reserved2[6];
-} __packed;
+};
struct bsca_entry {
__u8 reserved0;
@@ -80,7 +82,7 @@ struct bsca_entry {
__u16 reserved[3];
__u64 sda;
__u64 reserved2[2];
-} __attribute__((packed));
+};
union ipte_control {
unsigned long val;
@@ -97,7 +99,7 @@ struct bsca_block {
__u64 mcn;
__u64 reserved2;
struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
-} __attribute__((packed));
+};
struct esca_block {
union ipte_control ipte_control;
@@ -105,7 +107,7 @@ struct esca_block {
__u64 mcn[4];
__u64 reserved2[20];
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
-} __packed;
+};
/*
* This struct is used to store some machine check info from lowcore
@@ -274,7 +276,7 @@ struct kvm_s390_sie_block {
struct kvm_s390_itdb {
__u8 data[256];
-} __packed;
+};
struct sie_page {
struct kvm_s390_sie_block sie_block;
@@ -282,7 +284,7 @@ struct sie_page {
__u8 reserved218[1000]; /* 0x0218 */
struct kvm_s390_itdb itdb; /* 0x0600 */
__u8 reserved700[2304]; /* 0x0700 */
-} __packed;
+};
struct kvm_vcpu_stat {
u64 exit_userspace;
@@ -695,7 +697,7 @@ struct sie_page2 {
__u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
struct kvm_s390_crypto_cb crycb; /* 0x0800 */
u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
-} __packed;
+};
struct kvm_s390_vsie {
struct mutex mutex;
@@ -705,6 +707,12 @@ struct kvm_s390_vsie {
struct page *pages[KVM_MAX_VCPUS];
};
+struct kvm_s390_migration_state {
+ unsigned long bitmap_size; /* in bits (number of guest pages) */
+ atomic64_t dirty_pages; /* number of dirty pages */
+ unsigned long *pgste_bitmap;
+};
+
struct kvm_arch{
void *sca;
int use_esca;
@@ -732,6 +740,7 @@ struct kvm_arch{
struct kvm_s390_crypto crypto;
struct kvm_s390_vsie vsie;
u64 epoch;
+ struct kvm_s390_migration_state *migration_state;
/* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
};
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 13623b9991d4..9d91cf3e427f 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -26,6 +26,12 @@
#define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20)
#define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23)
+#define MCCK_CR14_CR_PENDING_SUB_MASK (1 << 28)
+#define MCCK_CR14_RECOVERY_SUB_MASK (1 << 27)
+#define MCCK_CR14_DEGRAD_SUB_MASK (1 << 26)
+#define MCCK_CR14_EXT_DAMAGE_SUB_MASK (1 << 25)
+#define MCCK_CR14_WARN_SUB_MASK (1 << 24)
+
#ifndef __ASSEMBLY__
union mci {
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 3dd2a1d308dd..69d09c39bbcd 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -28,6 +28,7 @@
#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8
#define KVM_DEV_FLIC_AISM 9
#define KVM_DEV_FLIC_AIRQ_INJECT 10
+#define KVM_DEV_FLIC_AISM_ALL 11
/*
* We can have up to 4*64k pending subchannels + 8 adapter interrupts,
* as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
@@ -53,6 +54,11 @@ struct kvm_s390_ais_req {
__u16 mode;
};
+struct kvm_s390_ais_all {
+ __u8 simm;
+ __u8 nimm;
+};
+
#define KVM_S390_IO_ADAPTER_MASK 1
#define KVM_S390_IO_ADAPTER_MAP 2
#define KVM_S390_IO_ADAPTER_UNMAP 3
@@ -70,6 +76,7 @@ struct kvm_s390_io_adapter_req {
#define KVM_S390_VM_TOD 1
#define KVM_S390_VM_CRYPTO 2
#define KVM_S390_VM_CPU_MODEL 3
+#define KVM_S390_VM_MIGRATION 4
/* kvm attributes for mem_ctrl */
#define KVM_S390_VM_MEM_ENABLE_CMMA 0
@@ -151,6 +158,11 @@ struct kvm_s390_vm_cpu_subfunc {
#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2
#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3
+/* kvm attributes for migration mode */
+#define KVM_S390_VM_MIGRATION_STOP 0
+#define KVM_S390_VM_MIGRATION_START 1
+#define KVM_S390_VM_MIGRATION_STATUS 2
+
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* general purpose regs for s390 */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index e8e5ecf673fd..52a63f4175cb 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -104,4 +104,8 @@
#define SO_COOKIE 57
+#define SCM_TIMESTAMPING_PKTINFO 58
+
+#define SO_PEERGROUPS 59
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index ac6abcd3fe6a..349914571772 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -38,6 +38,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X224] = { .code = 0x224, .name = "EBCDIC-Name Table" },
[DIAG_STAT_X250] = { .code = 0x250, .name = "Block I/O" },
[DIAG_STAT_X258] = { .code = 0x258, .name = "Page-Reference Services" },
+ [DIAG_STAT_X26C] = { .code = 0x26c, .name = "Certain System Information" },
[DIAG_STAT_X288] = { .code = 0x288, .name = "Time Bomb" },
[DIAG_STAT_X2C4] = { .code = 0x2c4, .name = "FTP Services" },
[DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
@@ -236,3 +237,31 @@ int diag224(void *ptr)
return rc;
}
EXPORT_SYMBOL(diag224);
+
+/*
+ * Diagnose 26C: Access Certain System Information
+ */
+static inline int __diag26c(void *req, void *resp, enum diag26c_sc subcode)
+{
+ register unsigned long _req asm("2") = (addr_t) req;
+ register unsigned long _resp asm("3") = (addr_t) resp;
+ register unsigned long _subcode asm("4") = subcode;
+ register unsigned long _rc asm("5") = -EOPNOTSUPP;
+
+ asm volatile(
+ " sam31\n"
+ " diag %[rx],%[ry],0x26c\n"
+ "0: sam64\n"
+ EX_TABLE(0b,0b)
+ : "+d" (_rc)
+ : [rx] "d" (_req), "d" (_resp), [ry] "d" (_subcode)
+ : "cc", "memory");
+ return _rc;
+}
+
+int diag26c(void *req, void *resp, enum diag26c_sc subcode)
+{
+ diag_stat_inc(DIAG_STAT_X26C);
+ return __diag26c(req, resp, subcode);
+}
+EXPORT_SYMBOL(diag26c);
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 875f8bea8c67..653cae5e1ee1 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -89,7 +89,7 @@ struct region3_table_entry_fc1 {
unsigned long f : 1; /* Fetch-Protection Bit */
unsigned long fc : 1; /* Format-Control */
unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long co : 1; /* Change-Recording Override */
+ unsigned long iep: 1; /* Instruction-Execution-Protection */
unsigned long : 2;
unsigned long i : 1; /* Region-Invalid Bit */
unsigned long cr : 1; /* Common-Region Bit */
@@ -131,7 +131,7 @@ struct segment_entry_fc1 {
unsigned long f : 1; /* Fetch-Protection Bit */
unsigned long fc : 1; /* Format-Control */
unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long co : 1; /* Change-Recording Override */
+ unsigned long iep: 1; /* Instruction-Execution-Protection */
unsigned long : 2;
unsigned long i : 1; /* Segment-Invalid Bit */
unsigned long cs : 1; /* Common-Segment Bit */
@@ -168,7 +168,8 @@ union page_table_entry {
unsigned long z : 1; /* Zero Bit */
unsigned long i : 1; /* Page-Invalid Bit */
unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 9;
+ unsigned long iep: 1; /* Instruction-Execution-Protection */
+ unsigned long : 8;
};
};
@@ -241,7 +242,7 @@ struct ale {
unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */
unsigned long : 6;
unsigned long astesn : 32; /* ASTE Sequence Number */
-} __packed;
+};
struct aste {
unsigned long i : 1; /* ASX-Invalid Bit */
@@ -257,7 +258,7 @@ struct aste {
unsigned long ald : 32;
unsigned long astesn : 32;
/* .. more fields there */
-} __packed;
+};
int ipte_lock_held(struct kvm_vcpu *vcpu)
{
@@ -485,6 +486,7 @@ enum prot_type {
PROT_TYPE_KEYC = 1,
PROT_TYPE_ALC = 2,
PROT_TYPE_DAT = 3,
+ PROT_TYPE_IEP = 4,
};
static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
@@ -500,6 +502,9 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (code) {
case PGM_PROTECTION:
switch (prot) {
+ case PROT_TYPE_IEP:
+ tec->b61 = 1;
+ /* FALL THROUGH */
case PROT_TYPE_LA:
tec->b56 = 1;
break;
@@ -591,6 +596,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
* @gpa: points to where guest physical (absolute) address should be stored
* @asce: effective asce
* @mode: indicates the access mode to be used
+ * @prot: returns the type for protection exceptions
*
* Translate a guest virtual address into a guest absolute address by means
* of dynamic address translation as specified by the architecture.
@@ -606,19 +612,21 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
*/
static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
unsigned long *gpa, const union asce asce,
- enum gacc_mode mode)
+ enum gacc_mode mode, enum prot_type *prot)
{
union vaddress vaddr = {.addr = gva};
union raddress raddr = {.addr = gva};
union page_table_entry pte;
int dat_protection = 0;
+ int iep_protection = 0;
union ctlreg0 ctlreg0;
unsigned long ptr;
- int edat1, edat2;
+ int edat1, edat2, iep;
ctlreg0.val = vcpu->arch.sie_block->gcr[0];
edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
+ iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
if (asce.r)
goto real_address;
ptr = asce.origin * 4096;
@@ -702,6 +710,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_TRANSLATION_SPEC;
if (rtte.fc && edat2) {
dat_protection |= rtte.fc1.p;
+ iep_protection = rtte.fc1.iep;
raddr.rfaa = rtte.fc1.rfaa;
goto absolute_address;
}
@@ -729,6 +738,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_TRANSLATION_SPEC;
if (ste.fc && edat1) {
dat_protection |= ste.fc1.p;
+ iep_protection = ste.fc1.iep;
raddr.sfaa = ste.fc1.sfaa;
goto absolute_address;
}
@@ -745,12 +755,19 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
if (pte.z)
return PGM_TRANSLATION_SPEC;
dat_protection |= pte.p;
+ iep_protection = pte.iep;
raddr.pfra = pte.pfra;
real_address:
raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
absolute_address:
- if (mode == GACC_STORE && dat_protection)
+ if (mode == GACC_STORE && dat_protection) {
+ *prot = PROT_TYPE_DAT;
return PGM_PROTECTION;
+ }
+ if (mode == GACC_IFETCH && iep_protection && iep) {
+ *prot = PROT_TYPE_IEP;
+ return PGM_PROTECTION;
+ }
if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
return PGM_ADDRESSING;
*gpa = raddr.addr;
@@ -782,6 +799,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
int lap_enabled, rc = 0;
+ enum prot_type prot;
lap_enabled = low_address_protection_enabled(vcpu, asce);
while (nr_pages) {
@@ -791,7 +809,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
PROT_TYPE_LA);
ga &= PAGE_MASK;
if (psw_bits(*psw).dat) {
- rc = guest_translate(vcpu, ga, pages, asce, mode);
+ rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
if (rc < 0)
return rc;
} else {
@@ -800,7 +818,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
rc = PGM_ADDRESSING;
}
if (rc)
- return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT);
+ return trans_exc(vcpu, rc, ga, ar, mode, prot);
ga += PAGE_SIZE;
pages++;
nr_pages--;
@@ -886,6 +904,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long *gpa, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ enum prot_type prot;
union asce asce;
int rc;
@@ -900,9 +919,9 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
}
if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */
- rc = guest_translate(vcpu, gva, gpa, asce, mode);
+ rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
if (rc > 0)
- return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT);
+ return trans_exc(vcpu, rc, gva, 0, mode, prot);
} else {
*gpa = kvm_s390_real_to_abs(vcpu, gva);
if (kvm_is_error_gpa(vcpu->kvm, *gpa))
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 2d120fef7d90..a619ddae610d 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -251,8 +251,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
if (psw_mchk_disabled(vcpu))
active_mask &= ~IRQ_PEND_MCHK_MASK;
+ /*
+ * Check both floating and local interrupt's cr14 because
+ * bit IRQ_PEND_MCHK_REP could be set in both cases.
+ */
if (!(vcpu->arch.sie_block->gcr[14] &
- vcpu->kvm->arch.float_int.mchk.cr14))
+ (vcpu->kvm->arch.float_int.mchk.cr14 |
+ vcpu->arch.local_int.irq.mchk.cr14)))
__clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
/*
@@ -1876,6 +1881,28 @@ out:
return ret < 0 ? ret : n;
}
+static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_ais_all ais;
+
+ if (attr->attr < sizeof(ais))
+ return -EINVAL;
+
+ if (!test_kvm_facility(kvm, 72))
+ return -ENOTSUPP;
+
+ mutex_lock(&fi->ais_lock);
+ ais.simm = fi->simm;
+ ais.nimm = fi->nimm;
+ mutex_unlock(&fi->ais_lock);
+
+ if (copy_to_user((void __user *)attr->addr, &ais, sizeof(ais)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r;
@@ -1885,6 +1912,9 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
attr->attr);
break;
+ case KVM_DEV_FLIC_AISM_ALL:
+ r = flic_ais_mode_get_all(dev->kvm, attr);
+ break;
default:
r = -EINVAL;
}
@@ -2235,6 +2265,25 @@ static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr)
return kvm_s390_inject_airq(kvm, adapter);
}
+static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_ais_all ais;
+
+ if (!test_kvm_facility(kvm, 72))
+ return -ENOTSUPP;
+
+ if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais)))
+ return -EFAULT;
+
+ mutex_lock(&fi->ais_lock);
+ fi->simm = ais.simm;
+ fi->nimm = ais.nimm;
+ mutex_unlock(&fi->ais_lock);
+
+ return 0;
+}
+
static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r = 0;
@@ -2277,6 +2326,9 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
case KVM_DEV_FLIC_AIRQ_INJECT:
r = flic_inject_airq(dev->kvm, attr);
break;
+ case KVM_DEV_FLIC_AISM_ALL:
+ r = flic_ais_mode_set_all(dev->kvm, attr);
+ break;
default:
r = -EINVAL;
}
@@ -2298,6 +2350,7 @@ static int flic_has_attr(struct kvm_device *dev,
case KVM_DEV_FLIC_CLEAR_IO_IRQ:
case KVM_DEV_FLIC_AISM:
case KVM_DEV_FLIC_AIRQ_INJECT:
+ case KVM_DEV_FLIC_AISM_ALL:
return 0;
}
return -ENXIO;
@@ -2415,6 +2468,42 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
return ret;
}
+/*
+ * Inject the machine check to the guest.
+ */
+void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
+ struct mcck_volatile_info *mcck_info)
+{
+ struct kvm_s390_interrupt_info inti;
+ struct kvm_s390_irq irq;
+ struct kvm_s390_mchk_info *mchk;
+ union mci mci;
+ __u64 cr14 = 0; /* upper bits are not used */
+
+ mci.val = mcck_info->mcic;
+ if (mci.sr)
+ cr14 |= MCCK_CR14_RECOVERY_SUB_MASK;
+ if (mci.dg)
+ cr14 |= MCCK_CR14_DEGRAD_SUB_MASK;
+ if (mci.w)
+ cr14 |= MCCK_CR14_WARN_SUB_MASK;
+
+ mchk = mci.ck ? &inti.mchk : &irq.u.mchk;
+ mchk->cr14 = cr14;
+ mchk->mcic = mcck_info->mcic;
+ mchk->ext_damage_code = mcck_info->ext_damage_code;
+ mchk->failing_storage_address = mcck_info->failing_storage_address;
+ if (mci.ck) {
+ /* Inject the floating machine check */
+ inti.type = KVM_S390_MCHK;
+ WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti));
+ } else {
+ /* Inject the machine check to specified vcpu */
+ irq.type = KVM_S390_MCHK;
+ WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
+ }
+}
+
int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b0d7de5a533d..3f2884e99ed4 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -30,6 +30,7 @@
#include <linux/vmalloc.h>
#include <linux/bitmap.h>
#include <linux/sched/signal.h>
+#include <linux/string.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
@@ -386,6 +387,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_SKEYS:
case KVM_CAP_S390_IRQ_STATE:
case KVM_CAP_S390_USER_INSTR0:
+ case KVM_CAP_S390_CMMA_MIGRATION:
case KVM_CAP_S390_AIS:
r = 1;
break;
@@ -749,6 +751,129 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
return 0;
}
+static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
+{
+ int cx;
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(cx, vcpu, kvm)
+ kvm_s390_sync_request(req, vcpu);
+}
+
+/*
+ * Must be called with kvm->srcu held to avoid races on memslots, and with
+ * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
+ */
+static int kvm_s390_vm_start_migration(struct kvm *kvm)
+{
+ struct kvm_s390_migration_state *mgs;
+ struct kvm_memory_slot *ms;
+ /* should be the only one */
+ struct kvm_memslots *slots;
+ unsigned long ram_pages;
+ int slotnr;
+
+ /* migration mode already enabled */
+ if (kvm->arch.migration_state)
+ return 0;
+
+ slots = kvm_memslots(kvm);
+ if (!slots || !slots->used_slots)
+ return -EINVAL;
+
+ mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
+ if (!mgs)
+ return -ENOMEM;
+ kvm->arch.migration_state = mgs;
+
+ if (kvm->arch.use_cmma) {
+ /*
+ * Get the last slot. They should be sorted by base_gfn, so the
+ * last slot is also the one at the end of the address space.
+ * We have verified above that at least one slot is present.
+ */
+ ms = slots->memslots + slots->used_slots - 1;
+ /* round up so we only use full longs */
+ ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
+ /* allocate enough bytes to store all the bits */
+ mgs->pgste_bitmap = vmalloc(ram_pages / 8);
+ if (!mgs->pgste_bitmap) {
+ kfree(mgs);
+ kvm->arch.migration_state = NULL;
+ return -ENOMEM;
+ }
+
+ mgs->bitmap_size = ram_pages;
+ atomic64_set(&mgs->dirty_pages, ram_pages);
+ /* mark all the pages in active slots as dirty */
+ for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
+ ms = slots->memslots + slotnr;
+ bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
+ }
+
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
+ }
+ return 0;
+}
+
+/*
+ * Must be called with kvm->lock to avoid races with ourselves and
+ * kvm_s390_vm_start_migration.
+ */
+static int kvm_s390_vm_stop_migration(struct kvm *kvm)
+{
+ struct kvm_s390_migration_state *mgs;
+
+ /* migration mode already disabled */
+ if (!kvm->arch.migration_state)
+ return 0;
+ mgs = kvm->arch.migration_state;
+ kvm->arch.migration_state = NULL;
+
+ if (kvm->arch.use_cmma) {
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
+ vfree(mgs->pgste_bitmap);
+ }
+ kfree(mgs);
+ return 0;
+}
+
+static int kvm_s390_vm_set_migration(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ int idx, res = -ENXIO;
+
+ mutex_lock(&kvm->lock);
+ switch (attr->attr) {
+ case KVM_S390_VM_MIGRATION_START:
+ idx = srcu_read_lock(&kvm->srcu);
+ res = kvm_s390_vm_start_migration(kvm);
+ srcu_read_unlock(&kvm->srcu, idx);
+ break;
+ case KVM_S390_VM_MIGRATION_STOP:
+ res = kvm_s390_vm_stop_migration(kvm);
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(&kvm->lock);
+
+ return res;
+}
+
+static int kvm_s390_vm_get_migration(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ u64 mig = (kvm->arch.migration_state != NULL);
+
+ if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
+ return -ENXIO;
+
+ if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
+ return -EFAULT;
+ return 0;
+}
+
static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
{
u8 gtod_high;
@@ -1089,6 +1214,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CRYPTO:
ret = kvm_s390_vm_set_crypto(kvm, attr);
break;
+ case KVM_S390_VM_MIGRATION:
+ ret = kvm_s390_vm_set_migration(kvm, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -1111,6 +1239,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_MODEL:
ret = kvm_s390_get_cpu_model(kvm, attr);
break;
+ case KVM_S390_VM_MIGRATION:
+ ret = kvm_s390_vm_get_migration(kvm, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -1178,6 +1309,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
break;
}
break;
+ case KVM_S390_VM_MIGRATION:
+ ret = 0;
+ break;
default:
ret = -ENXIO;
break;
@@ -1285,6 +1419,182 @@ out:
return r;
}
+/*
+ * Base address and length must be sent at the start of each block, therefore
+ * it's cheaper to send some clean data, as long as it's less than the size of
+ * two longs.
+ */
+#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
+/* for consistency */
+#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
+
+/*
+ * This function searches for the next page with dirty CMMA attributes, and
+ * saves the attributes in the buffer up to either the end of the buffer or
+ * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
+ * no trailing clean bytes are saved.
+ * In case no dirty bits were found, or if CMMA was not enabled or used, the
+ * output buffer will indicate 0 as length.
+ */
+static int kvm_s390_get_cmma_bits(struct kvm *kvm,
+ struct kvm_s390_cmma_log *args)
+{
+ struct kvm_s390_migration_state *s = kvm->arch.migration_state;
+ unsigned long bufsize, hva, pgstev, i, next, cur;
+ int srcu_idx, peek, r = 0, rr;
+ u8 *res;
+
+ cur = args->start_gfn;
+ i = next = pgstev = 0;
+
+ if (unlikely(!kvm->arch.use_cmma))
+ return -ENXIO;
+ /* Invalid/unsupported flags were specified */
+ if (args->flags & ~KVM_S390_CMMA_PEEK)
+ return -EINVAL;
+ /* Migration mode query, and we are not doing a migration */
+ peek = !!(args->flags & KVM_S390_CMMA_PEEK);
+ if (!peek && !s)
+ return -EINVAL;
+ /* CMMA is disabled or was not used, or the buffer has length zero */
+ bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
+ if (!bufsize || !kvm->mm->context.use_cmma) {
+ memset(args, 0, sizeof(*args));
+ return 0;
+ }
+
+ if (!peek) {
+ /* We are not peeking, and there are no dirty pages */
+ if (!atomic64_read(&s->dirty_pages)) {
+ memset(args, 0, sizeof(*args));
+ return 0;
+ }
+ cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
+ args->start_gfn);
+ if (cur >= s->bitmap_size) /* nothing found, loop back */
+ cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
+ if (cur >= s->bitmap_size) { /* again! (very unlikely) */
+ memset(args, 0, sizeof(*args));
+ return 0;
+ }
+ next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
+ }
+
+ res = vmalloc(bufsize);
+ if (!res)
+ return -ENOMEM;
+
+ args->start_gfn = cur;
+
+ down_read(&kvm->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ while (i < bufsize) {
+ hva = gfn_to_hva(kvm, cur);
+ if (kvm_is_error_hva(hva)) {
+ r = -EFAULT;
+ break;
+ }
+ /* decrement only if we actually flipped the bit to 0 */
+ if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
+ atomic64_dec(&s->dirty_pages);
+ r = get_pgste(kvm->mm, hva, &pgstev);
+ if (r < 0)
+ pgstev = 0;
+ /* save the value */
+ res[i++] = (pgstev >> 24) & 0x3;
+ /*
+ * if the next bit is too far away, stop.
+ * if we reached the previous "next", find the next one
+ */
+ if (!peek) {
+ if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
+ break;
+ if (cur == next)
+ next = find_next_bit(s->pgste_bitmap,
+ s->bitmap_size, cur + 1);
+ /* reached the end of the bitmap or of the buffer, stop */
+ if ((next >= s->bitmap_size) ||
+ (next >= args->start_gfn + bufsize))
+ break;
+ }
+ cur++;
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ up_read(&kvm->mm->mmap_sem);
+ args->count = i;
+ args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
+
+ rr = copy_to_user((void __user *)args->values, res, args->count);
+ if (rr)
+ r = -EFAULT;
+
+ vfree(res);
+ return r;
+}
+
+/*
+ * This function sets the CMMA attributes for the given pages. If the input
+ * buffer has zero length, no action is taken, otherwise the attributes are
+ * set and the mm->context.use_cmma flag is set.
+ */
+static int kvm_s390_set_cmma_bits(struct kvm *kvm,
+ const struct kvm_s390_cmma_log *args)
+{
+ unsigned long hva, mask, pgstev, i;
+ uint8_t *bits;
+ int srcu_idx, r = 0;
+
+ mask = args->mask;
+
+ if (!kvm->arch.use_cmma)
+ return -ENXIO;
+ /* invalid/unsupported flags */
+ if (args->flags != 0)
+ return -EINVAL;
+ /* Enforce sane limit on memory allocation */
+ if (args->count > KVM_S390_CMMA_SIZE_MAX)
+ return -EINVAL;
+ /* Nothing to do */
+ if (args->count == 0)
+ return 0;
+
+ bits = vmalloc(sizeof(*bits) * args->count);
+ if (!bits)
+ return -ENOMEM;
+
+ r = copy_from_user(bits, (void __user *)args->values, args->count);
+ if (r) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ down_read(&kvm->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ for (i = 0; i < args->count; i++) {
+ hva = gfn_to_hva(kvm, args->start_gfn + i);
+ if (kvm_is_error_hva(hva)) {
+ r = -EFAULT;
+ break;
+ }
+
+ pgstev = bits[i];
+ pgstev = pgstev << 24;
+ mask &= _PGSTE_GPS_USAGE_MASK;
+ set_pgste_bits(kvm->mm, hva, mask, pgstev);
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ up_read(&kvm->mm->mmap_sem);
+
+ if (!kvm->mm->context.use_cmma) {
+ down_write(&kvm->mm->mmap_sem);
+ kvm->mm->context.use_cmma = 1;
+ up_write(&kvm->mm->mmap_sem);
+ }
+out:
+ vfree(bits);
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1363,6 +1673,29 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_s390_set_skeys(kvm, &args);
break;
}
+ case KVM_S390_GET_CMMA_BITS: {
+ struct kvm_s390_cmma_log args;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp, sizeof(args)))
+ break;
+ r = kvm_s390_get_cmma_bits(kvm, &args);
+ if (!r) {
+ r = copy_to_user(argp, &args, sizeof(args));
+ if (r)
+ r = -EFAULT;
+ }
+ break;
+ }
+ case KVM_S390_SET_CMMA_BITS: {
+ struct kvm_s390_cmma_log args;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp, sizeof(args)))
+ break;
+ r = kvm_s390_set_cmma_bits(kvm, &args);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -1631,6 +1964,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
+ if (kvm->arch.migration_state) {
+ vfree(kvm->arch.migration_state->pgste_bitmap);
+ kfree(kvm->arch.migration_state);
+ }
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
}
@@ -1975,7 +2312,6 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
if (!vcpu->arch.sie_block->cbrlo)
return -ENOMEM;
- vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
return 0;
}
@@ -2439,7 +2775,7 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
kvm_s390_vcpu_request_handled(vcpu);
- if (!vcpu->requests)
+ if (!kvm_request_pending(vcpu))
return 0;
/*
* We use MMU_RELOAD just to re-arm the ipte notifier for the
@@ -2488,6 +2824,27 @@ retry:
goto retry;
}
+ if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
+ /*
+ * Disable CMMA virtualization; we will emulate the ESSA
+ * instruction manually, in order to provide additional
+ * functionalities needed for live migration.
+ */
+ vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
+ goto retry;
+ }
+
+ if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
+ /*
+ * Re-enable CMMA virtualization if CMMA is available and
+ * was used.
+ */
+ if ((vcpu->kvm->arch.use_cmma) &&
+ (vcpu->kvm->mm->context.use_cmma))
+ vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+ goto retry;
+ }
+
/* nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
@@ -2682,6 +3039,9 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
+ struct mcck_volatile_info *mcck_info;
+ struct sie_page *sie_page;
+
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
@@ -2692,6 +3052,15 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
+ if (exit_reason == -EINTR) {
+ VCPU_EVENT(vcpu, 3, "%s", "machine check");
+ sie_page = container_of(vcpu->arch.sie_block,
+ struct sie_page, sie_block);
+ mcck_info = &sie_page->mcck_info;
+ kvm_s390_reinject_machine_check(vcpu, mcck_info);
+ return 0;
+ }
+
if (vcpu->arch.sie_block->icptcode > 0) {
int rc = kvm_handle_sie_intercept(vcpu);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 55f5c8457d6d..6fedc8bc7a37 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -397,4 +397,6 @@ static inline int kvm_s390_use_sca_entries(void)
*/
return sclp.has_sigpif;
}
+void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
+ struct mcck_volatile_info *mcck_info);
#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index e53292a89257..8a1dac793d6b 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -24,6 +24,7 @@
#include <asm/ebcdic.h>
#include <asm/sysinfo.h>
#include <asm/pgtable.h>
+#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/io.h>
@@ -949,13 +950,72 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return 0;
}
+static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
+{
+ struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
+ int r1, r2, nappended, entries;
+ unsigned long gfn, hva, res, pgstev, ptev;
+ unsigned long *cbrlo;
+
+ /*
+ * We don't need to set SD.FPF.SK to 1 here, because if we have a
+ * machine check here we either handle it or crash
+ */
+
+ kvm_s390_get_regs_rre(vcpu, &r1, &r2);
+ gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT;
+ hva = gfn_to_hva(vcpu->kvm, gfn);
+ entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+
+ if (kvm_is_error_hva(hva))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ nappended = pgste_perform_essa(vcpu->kvm->mm, hva, orc, &ptev, &pgstev);
+ if (nappended < 0) {
+ res = orc ? 0x10 : 0;
+ vcpu->run->s.regs.gprs[r1] = res; /* Exception Indication */
+ return 0;
+ }
+ res = (pgstev & _PGSTE_GPS_USAGE_MASK) >> 22;
+ /*
+ * Set the block-content state part of the result. 0 means resident, so
+ * nothing to do if the page is valid. 2 is for preserved pages
+ * (non-present and non-zero), and 3 for zero pages (non-present and
+ * zero).
+ */
+ if (ptev & _PAGE_INVALID) {
+ res |= 2;
+ if (pgstev & _PGSTE_GPS_ZERO)
+ res |= 1;
+ }
+ vcpu->run->s.regs.gprs[r1] = res;
+ /*
+ * It is possible that all the normal 511 slots were full, in which case
+ * we will now write in the 512th slot, which is reserved for host use.
+ * In both cases we let the normal essa handling code process all the
+ * slots, including the reserved one, if needed.
+ */
+ if (nappended > 0) {
+ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo & PAGE_MASK);
+ cbrlo[entries] = gfn << PAGE_SHIFT;
+ }
+
+ if (orc) {
+ /* increment only if we are really flipping the bit to 1 */
+ if (!test_and_set_bit(gfn, ms->pgste_bitmap))
+ atomic64_inc(&ms->dirty_pages);
+ }
+
+ return nappended;
+}
+
static int handle_essa(struct kvm_vcpu *vcpu)
{
/* entries expected to be 1FF */
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
unsigned long *cbrlo;
struct gmap *gmap;
- int i;
+ int i, orc;
VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries);
gmap = vcpu->arch.gmap;
@@ -965,12 +1025,45 @@ static int handle_essa(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
-
- if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
+ /* Check for invalid operation request code */
+ orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
+ if (orc > ESSA_MAX)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- /* Retry the ESSA instruction */
- kvm_s390_retry_instr(vcpu);
+ if (likely(!vcpu->kvm->arch.migration_state)) {
+ /*
+ * CMMA is enabled in the KVM settings, but is disabled in
+ * the SIE block and in the mm_context, and we are not doing
+ * a migration. Enable CMMA in the mm_context.
+ * Since we need to take a write lock to write to the context
+ * to avoid races with storage keys handling, we check if the
+ * value really needs to be written to; if the value is
+ * already correct, we do nothing and avoid the lock.
+ */
+ if (vcpu->kvm->mm->context.use_cmma == 0) {
+ down_write(&vcpu->kvm->mm->mmap_sem);
+ vcpu->kvm->mm->context.use_cmma = 1;
+ up_write(&vcpu->kvm->mm->mmap_sem);
+ }
+ /*
+ * If we are here, we are supposed to have CMMA enabled in
+ * the SIE block. Enabling CMMA works on a per-CPU basis,
+ * while the context use_cmma flag is per process.
+ * It's possible that the context flag is enabled and the
+ * SIE flag is not, so we set the flag always; if it was
+ * already set, nothing changes, otherwise we enable it
+ * on this CPU too.
+ */
+ vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+ /* Retry the ESSA instruction */
+ kvm_s390_retry_instr(vcpu);
+ } else {
+ /* Account for the possible extra cbrl entry */
+ i = do_essa(vcpu, orc);
+ if (i < 0)
+ return i;
+ entries += i;
+ }
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
down_read(&gmap->mm->mmap_sem);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 4719ecb9ab42..715c19c45d9a 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -26,16 +26,21 @@
struct vsie_page {
struct kvm_s390_sie_block scb_s; /* 0x0000 */
+ /*
+ * the backup info for machine check. ensure it's at
+ * the same offset as that in struct sie_page!
+ */
+ struct mcck_volatile_info mcck_info; /* 0x0200 */
/* the pinned originial scb */
- struct kvm_s390_sie_block *scb_o; /* 0x0200 */
+ struct kvm_s390_sie_block *scb_o; /* 0x0218 */
/* the shadow gmap in use by the vsie_page */
- struct gmap *gmap; /* 0x0208 */
+ struct gmap *gmap; /* 0x0220 */
/* address of the last reported fault to guest2 */
- unsigned long fault_addr; /* 0x0210 */
- __u8 reserved[0x0700 - 0x0218]; /* 0x0218 */
+ unsigned long fault_addr; /* 0x0228 */
+ __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
-} __packed;
+};
/* trigger a validity icpt for the given scb */
static int set_validity_icpt(struct kvm_s390_sie_block *scb,
@@ -801,6 +806,8 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ struct mcck_volatile_info *mcck_info;
+ struct sie_page *sie_page;
int rc;
handle_last_fault(vcpu, vsie_page);
@@ -822,6 +829,14 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
local_irq_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (rc == -EINTR) {
+ VCPU_EVENT(vcpu, 3, "%s", "machine check");
+ sie_page = container_of(scb_s, struct sie_page, sie_block);
+ mcck_info = &sie_page->mcck_info;
+ kvm_s390_reinject_machine_check(vcpu, mcck_info);
+ return 0;
+ }
+
if (rc > 0)
rc = 0; /* we could still have an icpt */
else if (rc == -EFAULT)
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 6e97a2e3fd8d..01c6fbc3e85b 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -991,7 +991,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
}
break;
}
- case BPF_JMP | BPF_CALL | BPF_X:
+ case BPF_JMP | BPF_TAIL_CALL:
/*
* Implicit input:
* B1: pointer to ctx
@@ -1329,6 +1329,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_jit_binary_lock_ro(header);
fp->bpf_func = (void *) jit.prg_buf;
fp->jited = 1;
+ fp->jited_len = jit.size;
free_addrs:
kfree(jit.addrs);
out:
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 3f4ad19d9ec7..186fd8199f54 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -94,6 +94,10 @@
#define SO_COOKIE 0x003b
+#define SCM_TIMESTAMPING_PKTINFO 0x003c
+
+#define SO_PEERGROUPS 0x003d
+
/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 21de77419f48..8799ae9a8788 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -802,8 +802,13 @@ static void build_prologue(struct jit_ctx *ctx)
{
s32 stack_needed = BASE_STACKFRAME;
- if (ctx->saw_frame_pointer || ctx->saw_tail_call)
- stack_needed += MAX_BPF_STACK;
+ if (ctx->saw_frame_pointer || ctx->saw_tail_call) {
+ struct bpf_prog *prog = ctx->prog;
+ u32 stack_depth;
+
+ stack_depth = prog->aux->stack_depth;
+ stack_needed += round_up(stack_depth, 16);
+ }
if (ctx->saw_tail_call)
stack_needed += 8;
@@ -1217,7 +1222,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
}
/* tail call */
- case BPF_JMP | BPF_CALL |BPF_X:
+ case BPF_JMP | BPF_TAIL_CALL:
emit_tail_call(ctx);
break;
@@ -1555,6 +1560,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog->bpf_func = (void *)ctx.image;
prog->jited = 1;
+ prog->jited_len = image_size;
out_off:
kfree(ctx.offset);
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index 910565547163..8739cf7795de 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -42,17 +42,15 @@
#define R5E %esi
#define R6 %rdi
#define R6E %edi
-#define R7 %rbp
-#define R7E %ebp
+#define R7 %r9 /* don't use %rbp; it breaks stack traces */
+#define R7E %r9d
#define R8 %r8
-#define R9 %r9
#define R10 %r10
#define R11 %r11
-#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
+#define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
ENTRY(FUNC); \
movq r1,r2; \
- movq r3,r4; \
leaq KEY+48(r8),r9; \
movq r10,r11; \
movl (r7),r5 ## E; \
@@ -70,9 +68,8 @@
je B192; \
leaq 32(r9),r9;
-#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
+#define epilogue(FUNC,r1,r2,r5,r6,r7,r8,r9) \
movq r1,r2; \
- movq r3,r4; \
movl r5 ## E,(r9); \
movl r6 ## E,4(r9); \
movl r7 ## E,8(r9); \
@@ -88,12 +85,12 @@
movl TAB(,r6,4),r6 ## E; \
roll $16,r2 ## E; \
shrl $16,r4 ## E; \
- movzbl r4 ## H,r7 ## E; \
- movzbl r4 ## L,r4 ## E; \
+ movzbl r4 ## L,r7 ## E; \
+ movzbl r4 ## H,r4 ## E; \
xorl OFFSET(r8),ra ## E; \
xorl OFFSET+4(r8),rb ## E; \
- xorl TAB+3072(,r7,4),r5 ## E;\
- xorl TAB+2048(,r4,4),r6 ## E;\
+ xorl TAB+3072(,r4,4),r5 ## E;\
+ xorl TAB+2048(,r7,4),r6 ## E;\
movzbl r1 ## L,r7 ## E; \
movzbl r1 ## H,r4 ## E; \
movl TAB+1024(,r4,4),r4 ## E;\
@@ -101,19 +98,19 @@
roll $16,r1 ## E; \
shrl $16,r3 ## E; \
xorl TAB(,r7,4),r5 ## E; \
- movzbl r3 ## H,r7 ## E; \
- movzbl r3 ## L,r3 ## E; \
- xorl TAB+3072(,r7,4),r4 ## E;\
- xorl TAB+2048(,r3,4),r5 ## E;\
- movzbl r1 ## H,r7 ## E; \
- movzbl r1 ## L,r3 ## E; \
+ movzbl r3 ## L,r7 ## E; \
+ movzbl r3 ## H,r3 ## E; \
+ xorl TAB+3072(,r3,4),r4 ## E;\
+ xorl TAB+2048(,r7,4),r5 ## E;\
+ movzbl r1 ## L,r7 ## E; \
+ movzbl r1 ## H,r3 ## E; \
shrl $16,r1 ## E; \
- xorl TAB+3072(,r7,4),r6 ## E;\
- movl TAB+2048(,r3,4),r3 ## E;\
- movzbl r1 ## H,r7 ## E; \
- movzbl r1 ## L,r1 ## E; \
- xorl TAB+1024(,r7,4),r6 ## E;\
- xorl TAB(,r1,4),r3 ## E; \
+ xorl TAB+3072(,r3,4),r6 ## E;\
+ movl TAB+2048(,r7,4),r3 ## E;\
+ movzbl r1 ## L,r7 ## E; \
+ movzbl r1 ## H,r1 ## E; \
+ xorl TAB+1024(,r1,4),r6 ## E;\
+ xorl TAB(,r7,4),r3 ## E; \
movzbl r2 ## H,r1 ## E; \
movzbl r2 ## L,r7 ## E; \
shrl $16,r2 ## E; \
@@ -131,9 +128,9 @@
movl r4 ## E,r2 ## E;
#define entry(FUNC,KEY,B128,B192) \
- prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
+ prologue(FUNC,KEY,B128,B192,R2,R8,R1,R3,R4,R6,R10,R5,R11)
-#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)
+#define return(FUNC) epilogue(FUNC,R8,R2,R5,R6,R3,R4,R11)
#define encrypt_round(TAB,OFFSET) \
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 3c465184ff8a..16627fec80b2 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -89,6 +89,29 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
ALL_F: .octa 0xffffffffffffffffffffffffffffffff
.octa 0x00000000000000000000000000000000
+.section .rodata
+.align 16
+.type aad_shift_arr, @object
+.size aad_shift_arr, 272
+aad_shift_arr:
+ .octa 0xffffffffffffffffffffffffffffffff
+ .octa 0xffffffffffffffffffffffffffffff0C
+ .octa 0xffffffffffffffffffffffffffff0D0C
+ .octa 0xffffffffffffffffffffffffff0E0D0C
+ .octa 0xffffffffffffffffffffffff0F0E0D0C
+ .octa 0xffffffffffffffffffffff0C0B0A0908
+ .octa 0xffffffffffffffffffff0D0C0B0A0908
+ .octa 0xffffffffffffffffff0E0D0C0B0A0908
+ .octa 0xffffffffffffffff0F0E0D0C0B0A0908
+ .octa 0xffffffffffffff0C0B0A090807060504
+ .octa 0xffffffffffff0D0C0B0A090807060504
+ .octa 0xffffffffff0E0D0C0B0A090807060504
+ .octa 0xffffffff0F0E0D0C0B0A090807060504
+ .octa 0xffffff0C0B0A09080706050403020100
+ .octa 0xffff0D0C0B0A09080706050403020100
+ .octa 0xff0E0D0C0B0A09080706050403020100
+ .octa 0x0F0E0D0C0B0A09080706050403020100
+
.text
@@ -252,32 +275,66 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
mov arg8, %r12 # %r12 = aadLen
mov %r12, %r11
pxor %xmm\i, %xmm\i
+ pxor \XMM2, \XMM2
-_get_AAD_loop\num_initial_blocks\operation:
- movd (%r10), \TMP1
- pslldq $12, \TMP1
- psrldq $4, %xmm\i
+ cmp $16, %r11
+ jl _get_AAD_rest8\num_initial_blocks\operation
+_get_AAD_blocks\num_initial_blocks\operation:
+ movdqu (%r10), %xmm\i
+ PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
+ pxor %xmm\i, \XMM2
+ GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+ add $16, %r10
+ sub $16, %r12
+ sub $16, %r11
+ cmp $16, %r11
+ jge _get_AAD_blocks\num_initial_blocks\operation
+
+ movdqu \XMM2, %xmm\i
+ cmp $0, %r11
+ je _get_AAD_done\num_initial_blocks\operation
+
+ pxor %xmm\i,%xmm\i
+
+ /* read the last <16B of AAD. since we have at least 4B of
+ data right after the AAD (the ICV, and maybe some CT), we can
+ read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\num_initial_blocks\operation:
+ cmp $4, %r11
+ jle _get_AAD_rest4\num_initial_blocks\operation
+ movq (%r10), \TMP1
+ add $8, %r10
+ sub $8, %r11
+ pslldq $8, \TMP1
+ psrldq $8, %xmm\i
pxor \TMP1, %xmm\i
+ jmp _get_AAD_rest8\num_initial_blocks\operation
+_get_AAD_rest4\num_initial_blocks\operation:
+ cmp $0, %r11
+ jle _get_AAD_rest0\num_initial_blocks\operation
+ mov (%r10), %eax
+ movq %rax, \TMP1
add $4, %r10
- sub $4, %r12
- jne _get_AAD_loop\num_initial_blocks\operation
-
- cmp $16, %r11
- je _get_AAD_loop2_done\num_initial_blocks\operation
-
- mov $16, %r12
-_get_AAD_loop2\num_initial_blocks\operation:
+ sub $4, %r10
+ pslldq $12, \TMP1
psrldq $4, %xmm\i
- sub $4, %r12
- cmp %r11, %r12
- jne _get_AAD_loop2\num_initial_blocks\operation
-
-_get_AAD_loop2_done\num_initial_blocks\operation:
+ pxor \TMP1, %xmm\i
+_get_AAD_rest0\num_initial_blocks\operation:
+ /* finalize: shift out the extra bytes we read, and align
+ left. since pslldq can only shift by an immediate, we use
+ vpshufb and an array of shuffle masks */
+ movq %r12, %r11
+ salq $4, %r11
+ movdqu aad_shift_arr(%r11), \TMP1
+ PSHUFB_XMM \TMP1, %xmm\i
+_get_AAD_rest_final\num_initial_blocks\operation:
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
+ pxor \XMM2, %xmm\i
+ GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+_get_AAD_done\num_initial_blocks\operation:
xor %r11, %r11 # initialise the data pointer offset as zero
-
- # start AES for num_initial_blocks blocks
+ # start AES for num_initial_blocks blocks
mov %arg5, %rax # %rax = *Y0
movdqu (%rax), \XMM0 # XMM0 = Y0
@@ -322,7 +379,7 @@ aes_loop_initial_dec\num_initial_blocks:
# prepare plaintext/ciphertext for GHASH computation
.endr
.endif
- GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+
# apply GHASH on num_initial_blocks blocks
.if \i == 5
@@ -477,28 +534,66 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
mov arg8, %r12 # %r12 = aadLen
mov %r12, %r11
pxor %xmm\i, %xmm\i
-_get_AAD_loop\num_initial_blocks\operation:
- movd (%r10), \TMP1
- pslldq $12, \TMP1
- psrldq $4, %xmm\i
+ pxor \XMM2, \XMM2
+
+ cmp $16, %r11
+ jl _get_AAD_rest8\num_initial_blocks\operation
+_get_AAD_blocks\num_initial_blocks\operation:
+ movdqu (%r10), %xmm\i
+ PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
+ pxor %xmm\i, \XMM2
+ GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+ add $16, %r10
+ sub $16, %r12
+ sub $16, %r11
+ cmp $16, %r11
+ jge _get_AAD_blocks\num_initial_blocks\operation
+
+ movdqu \XMM2, %xmm\i
+ cmp $0, %r11
+ je _get_AAD_done\num_initial_blocks\operation
+
+ pxor %xmm\i,%xmm\i
+
+ /* read the last <16B of AAD. since we have at least 4B of
+ data right after the AAD (the ICV, and maybe some PT), we can
+ read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\num_initial_blocks\operation:
+ cmp $4, %r11
+ jle _get_AAD_rest4\num_initial_blocks\operation
+ movq (%r10), \TMP1
+ add $8, %r10
+ sub $8, %r11
+ pslldq $8, \TMP1
+ psrldq $8, %xmm\i
pxor \TMP1, %xmm\i
+ jmp _get_AAD_rest8\num_initial_blocks\operation
+_get_AAD_rest4\num_initial_blocks\operation:
+ cmp $0, %r11
+ jle _get_AAD_rest0\num_initial_blocks\operation
+ mov (%r10), %eax
+ movq %rax, \TMP1
add $4, %r10
- sub $4, %r12
- jne _get_AAD_loop\num_initial_blocks\operation
- cmp $16, %r11
- je _get_AAD_loop2_done\num_initial_blocks\operation
- mov $16, %r12
-_get_AAD_loop2\num_initial_blocks\operation:
+ sub $4, %r10
+ pslldq $12, \TMP1
psrldq $4, %xmm\i
- sub $4, %r12
- cmp %r11, %r12
- jne _get_AAD_loop2\num_initial_blocks\operation
-_get_AAD_loop2_done\num_initial_blocks\operation:
+ pxor \TMP1, %xmm\i
+_get_AAD_rest0\num_initial_blocks\operation:
+ /* finalize: shift out the extra bytes we read, and align
+ left. since pslldq can only shift by an immediate, we use
+ vpshufb and an array of shuffle masks */
+ movq %r12, %r11
+ salq $4, %r11
+ movdqu aad_shift_arr(%r11), \TMP1
+ PSHUFB_XMM \TMP1, %xmm\i
+_get_AAD_rest_final\num_initial_blocks\operation:
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
+ pxor \XMM2, %xmm\i
+ GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+_get_AAD_done\num_initial_blocks\operation:
xor %r11, %r11 # initialise the data pointer offset as zero
-
- # start AES for num_initial_blocks blocks
+ # start AES for num_initial_blocks blocks
mov %arg5, %rax # %rax = *Y0
movdqu (%rax), \XMM0 # XMM0 = Y0
@@ -543,7 +638,7 @@ aes_loop_initial_enc\num_initial_blocks:
# prepare plaintext/ciphertext for GHASH computation
.endr
.endif
- GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+
# apply GHASH on num_initial_blocks blocks
.if \i == 5
@@ -1454,18 +1549,35 @@ _return_T_decrypt:
mov arg10, %r11 # %r11 = auth_tag_len
cmp $16, %r11
je _T_16_decrypt
- cmp $12, %r11
- je _T_12_decrypt
+ cmp $8, %r11
+ jl _T_4_decrypt
_T_8_decrypt:
MOVQ_R64_XMM %xmm0, %rax
mov %rax, (%r10)
- jmp _return_T_done_decrypt
-_T_12_decrypt:
- MOVQ_R64_XMM %xmm0, %rax
- mov %rax, (%r10)
+ add $8, %r10
+ sub $8, %r11
psrldq $8, %xmm0
+ cmp $0, %r11
+ je _return_T_done_decrypt
+_T_4_decrypt:
+ movd %xmm0, %eax
+ mov %eax, (%r10)
+ add $4, %r10
+ sub $4, %r11
+ psrldq $4, %xmm0
+ cmp $0, %r11
+ je _return_T_done_decrypt
+_T_123_decrypt:
movd %xmm0, %eax
- mov %eax, 8(%r10)
+ cmp $2, %r11
+ jl _T_1_decrypt
+ mov %ax, (%r10)
+ cmp $2, %r11
+ je _return_T_done_decrypt
+ add $2, %r10
+ sar $16, %eax
+_T_1_decrypt:
+ mov %al, (%r10)
jmp _return_T_done_decrypt
_T_16_decrypt:
movdqu %xmm0, (%r10)
@@ -1718,18 +1830,35 @@ _return_T_encrypt:
mov arg10, %r11 # %r11 = auth_tag_len
cmp $16, %r11
je _T_16_encrypt
- cmp $12, %r11
- je _T_12_encrypt
+ cmp $8, %r11
+ jl _T_4_encrypt
_T_8_encrypt:
MOVQ_R64_XMM %xmm0, %rax
mov %rax, (%r10)
- jmp _return_T_done_encrypt
-_T_12_encrypt:
- MOVQ_R64_XMM %xmm0, %rax
- mov %rax, (%r10)
+ add $8, %r10
+ sub $8, %r11
psrldq $8, %xmm0
+ cmp $0, %r11
+ je _return_T_done_encrypt
+_T_4_encrypt:
+ movd %xmm0, %eax
+ mov %eax, (%r10)
+ add $4, %r10
+ sub $4, %r11
+ psrldq $4, %xmm0
+ cmp $0, %r11
+ je _return_T_done_encrypt
+_T_123_encrypt:
movd %xmm0, %eax
- mov %eax, 8(%r10)
+ cmp $2, %r11
+ jl _T_1_encrypt
+ mov %ax, (%r10)
+ cmp $2, %r11
+ je _return_T_done_encrypt
+ add $2, %r10
+ sar $16, %eax
+_T_1_encrypt:
+ mov %al, (%r10)
jmp _return_T_done_encrypt
_T_16_encrypt:
movdqu %xmm0, (%r10)
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index d664382c6e56..faecb1518bf8 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -155,6 +155,30 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
ALL_F: .octa 0xffffffffffffffffffffffffffffffff
.octa 0x00000000000000000000000000000000
+.section .rodata
+.align 16
+.type aad_shift_arr, @object
+.size aad_shift_arr, 272
+aad_shift_arr:
+ .octa 0xffffffffffffffffffffffffffffffff
+ .octa 0xffffffffffffffffffffffffffffff0C
+ .octa 0xffffffffffffffffffffffffffff0D0C
+ .octa 0xffffffffffffffffffffffffff0E0D0C
+ .octa 0xffffffffffffffffffffffff0F0E0D0C
+ .octa 0xffffffffffffffffffffff0C0B0A0908
+ .octa 0xffffffffffffffffffff0D0C0B0A0908
+ .octa 0xffffffffffffffffff0E0D0C0B0A0908
+ .octa 0xffffffffffffffff0F0E0D0C0B0A0908
+ .octa 0xffffffffffffff0C0B0A090807060504
+ .octa 0xffffffffffff0D0C0B0A090807060504
+ .octa 0xffffffffff0E0D0C0B0A090807060504
+ .octa 0xffffffff0F0E0D0C0B0A090807060504
+ .octa 0xffffff0C0B0A09080706050403020100
+ .octa 0xffff0D0C0B0A09080706050403020100
+ .octa 0xff0E0D0C0B0A09080706050403020100
+ .octa 0x0F0E0D0C0B0A09080706050403020100
+
+
.text
@@ -372,41 +396,72 @@ VARIABLE_OFFSET = 16*8
.macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
i = (8-\num_initial_blocks)
+ j = 0
setreg
- mov arg6, %r10 # r10 = AAD
- mov arg7, %r12 # r12 = aadLen
-
-
- mov %r12, %r11
-
- vpxor reg_i, reg_i, reg_i
-_get_AAD_loop\@:
- vmovd (%r10), \T1
- vpslldq $12, \T1, \T1
- vpsrldq $4, reg_i, reg_i
- vpxor \T1, reg_i, reg_i
-
- add $4, %r10
- sub $4, %r12
- jg _get_AAD_loop\@
-
-
- cmp $16, %r11
- je _get_AAD_loop2_done\@
- mov $16, %r12
-
-_get_AAD_loop2\@:
- vpsrldq $4, reg_i, reg_i
- sub $4, %r12
- cmp %r11, %r12
- jg _get_AAD_loop2\@
-
-_get_AAD_loop2_done\@:
-
- #byte-reflect the AAD data
- vpshufb SHUF_MASK(%rip), reg_i, reg_i
-
+ mov arg6, %r10 # r10 = AAD
+ mov arg7, %r12 # r12 = aadLen
+
+
+ mov %r12, %r11
+
+ vpxor reg_j, reg_j, reg_j
+ vpxor reg_i, reg_i, reg_i
+ cmp $16, %r11
+ jl _get_AAD_rest8\@
+_get_AAD_blocks\@:
+ vmovdqu (%r10), reg_i
+ vpshufb SHUF_MASK(%rip), reg_i, reg_i
+ vpxor reg_i, reg_j, reg_j
+ GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6
+ add $16, %r10
+ sub $16, %r12
+ sub $16, %r11
+ cmp $16, %r11
+ jge _get_AAD_blocks\@
+ vmovdqu reg_j, reg_i
+ cmp $0, %r11
+ je _get_AAD_done\@
+
+ vpxor reg_i, reg_i, reg_i
+
+ /* read the last <16B of AAD. since we have at least 4B of
+ data right after the AAD (the ICV, and maybe some CT), we can
+ read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\@:
+ cmp $4, %r11
+ jle _get_AAD_rest4\@
+ movq (%r10), \T1
+ add $8, %r10
+ sub $8, %r11
+ vpslldq $8, \T1, \T1
+ vpsrldq $8, reg_i, reg_i
+ vpxor \T1, reg_i, reg_i
+ jmp _get_AAD_rest8\@
+_get_AAD_rest4\@:
+ cmp $0, %r11
+ jle _get_AAD_rest0\@
+ mov (%r10), %eax
+ movq %rax, \T1
+ add $4, %r10
+ sub $4, %r11
+ vpslldq $12, \T1, \T1
+ vpsrldq $4, reg_i, reg_i
+ vpxor \T1, reg_i, reg_i
+_get_AAD_rest0\@:
+ /* finalize: shift out the extra bytes we read, and align
+ left. since pslldq can only shift by an immediate, we use
+ vpshufb and an array of shuffle masks */
+ movq %r12, %r11
+ salq $4, %r11
+ movdqu aad_shift_arr(%r11), \T1
+ vpshufb \T1, reg_i, reg_i
+_get_AAD_rest_final\@:
+ vpshufb SHUF_MASK(%rip), reg_i, reg_i
+ vpxor reg_j, reg_i, reg_i
+ GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6
+
+_get_AAD_done\@:
# initialize the data pointer offset as zero
xor %r11, %r11
@@ -480,7 +535,6 @@ _get_AAD_loop2_done\@:
i = (8-\num_initial_blocks)
j = (9-\num_initial_blocks)
setreg
- GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6
.rep \num_initial_blocks
vpxor reg_i, reg_j, reg_j
@@ -1427,19 +1481,36 @@ _return_T\@:
cmp $16, %r11
je _T_16\@
- cmp $12, %r11
- je _T_12\@
+ cmp $8, %r11
+ jl _T_4\@
_T_8\@:
vmovq %xmm9, %rax
mov %rax, (%r10)
- jmp _return_T_done\@
-_T_12\@:
- vmovq %xmm9, %rax
- mov %rax, (%r10)
+ add $8, %r10
+ sub $8, %r11
vpsrldq $8, %xmm9, %xmm9
+ cmp $0, %r11
+ je _return_T_done\@
+_T_4\@:
vmovd %xmm9, %eax
- mov %eax, 8(%r10)
+ mov %eax, (%r10)
+ add $4, %r10
+ sub $4, %r11
+ vpsrldq $4, %xmm9, %xmm9
+ cmp $0, %r11
+ je _return_T_done\@
+_T_123\@:
+ vmovd %xmm9, %eax
+ cmp $2, %r11
+ jl _T_1\@
+ mov %ax, (%r10)
+ cmp $2, %r11
+ je _return_T_done\@
+ add $2, %r10
+ sar $16, %eax
+_T_1\@:
+ mov %al, (%r10)
jmp _return_T_done\@
_T_16\@:
@@ -1631,41 +1702,73 @@ ENDPROC(aesni_gcm_dec_avx_gen2)
.macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
i = (8-\num_initial_blocks)
+ j = 0
setreg
- mov arg6, %r10 # r10 = AAD
- mov arg7, %r12 # r12 = aadLen
-
-
- mov %r12, %r11
-
- vpxor reg_i, reg_i, reg_i
-_get_AAD_loop\@:
- vmovd (%r10), \T1
- vpslldq $12, \T1, \T1
- vpsrldq $4, reg_i, reg_i
- vpxor \T1, reg_i, reg_i
-
- add $4, %r10
- sub $4, %r12
- jg _get_AAD_loop\@
-
-
- cmp $16, %r11
- je _get_AAD_loop2_done\@
- mov $16, %r12
-
-_get_AAD_loop2\@:
- vpsrldq $4, reg_i, reg_i
- sub $4, %r12
- cmp %r11, %r12
- jg _get_AAD_loop2\@
-
-_get_AAD_loop2_done\@:
-
- #byte-reflect the AAD data
- vpshufb SHUF_MASK(%rip), reg_i, reg_i
-
+ mov arg6, %r10 # r10 = AAD
+ mov arg7, %r12 # r12 = aadLen
+
+
+ mov %r12, %r11
+
+ vpxor reg_j, reg_j, reg_j
+ vpxor reg_i, reg_i, reg_i
+
+ cmp $16, %r11
+ jl _get_AAD_rest8\@
+_get_AAD_blocks\@:
+ vmovdqu (%r10), reg_i
+ vpshufb SHUF_MASK(%rip), reg_i, reg_i
+ vpxor reg_i, reg_j, reg_j
+ GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6
+ add $16, %r10
+ sub $16, %r12
+ sub $16, %r11
+ cmp $16, %r11
+ jge _get_AAD_blocks\@
+ vmovdqu reg_j, reg_i
+ cmp $0, %r11
+ je _get_AAD_done\@
+
+ vpxor reg_i, reg_i, reg_i
+
+ /* read the last <16B of AAD. since we have at least 4B of
+ data right after the AAD (the ICV, and maybe some CT), we can
+ read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\@:
+ cmp $4, %r11
+ jle _get_AAD_rest4\@
+ movq (%r10), \T1
+ add $8, %r10
+ sub $8, %r11
+ vpslldq $8, \T1, \T1
+ vpsrldq $8, reg_i, reg_i
+ vpxor \T1, reg_i, reg_i
+ jmp _get_AAD_rest8\@
+_get_AAD_rest4\@:
+ cmp $0, %r11
+ jle _get_AAD_rest0\@
+ mov (%r10), %eax
+ movq %rax, \T1
+ add $4, %r10
+ sub $4, %r11
+ vpslldq $12, \T1, \T1
+ vpsrldq $4, reg_i, reg_i
+ vpxor \T1, reg_i, reg_i
+_get_AAD_rest0\@:
+ /* finalize: shift out the extra bytes we read, and align
+ left. since pslldq can only shift by an immediate, we use
+ vpshufb and an array of shuffle masks */
+ movq %r12, %r11
+ salq $4, %r11
+ movdqu aad_shift_arr(%r11), \T1
+ vpshufb \T1, reg_i, reg_i
+_get_AAD_rest_final\@:
+ vpshufb SHUF_MASK(%rip), reg_i, reg_i
+ vpxor reg_j, reg_i, reg_i
+ GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6
+
+_get_AAD_done\@:
# initialize the data pointer offset as zero
xor %r11, %r11
@@ -1740,7 +1843,6 @@ _get_AAD_loop2_done\@:
i = (8-\num_initial_blocks)
j = (9-\num_initial_blocks)
setreg
- GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6
.rep \num_initial_blocks
vpxor reg_i, reg_j, reg_j
@@ -2702,19 +2804,36 @@ _return_T\@:
cmp $16, %r11
je _T_16\@
- cmp $12, %r11
- je _T_12\@
+ cmp $8, %r11
+ jl _T_4\@
_T_8\@:
vmovq %xmm9, %rax
mov %rax, (%r10)
- jmp _return_T_done\@
-_T_12\@:
- vmovq %xmm9, %rax
- mov %rax, (%r10)
+ add $8, %r10
+ sub $8, %r11
vpsrldq $8, %xmm9, %xmm9
+ cmp $0, %r11
+ je _return_T_done\@
+_T_4\@:
vmovd %xmm9, %eax
- mov %eax, 8(%r10)
+ mov %eax, (%r10)
+ add $4, %r10
+ sub $4, %r11
+ vpsrldq $4, %xmm9, %xmm9
+ cmp $0, %r11
+ je _return_T_done\@
+_T_123\@:
+ vmovd %xmm9, %eax
+ cmp $2, %r11
+ jl _T_1\@
+ mov %ax, (%r10)
+ cmp $2, %r11
+ je _return_T_done\@
+ add $2, %r10
+ sar $16, %eax
+_T_1\@:
+ mov %al, (%r10)
jmp _return_T_done\@
_T_16\@:
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 93de8ea51548..4a55cdcdc008 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -61,6 +61,11 @@ struct aesni_rfc4106_gcm_ctx {
u8 nonce[4];
};
+struct generic_gcmaes_ctx {
+ u8 hash_subkey[16] AESNI_ALIGN_ATTR;
+ struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
+};
+
struct aesni_xts_ctx {
u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
@@ -102,13 +107,11 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
* u8 *out, Ciphertext output. Encrypt in-place is allowed.
* const u8 *in, Plaintext input
* unsigned long plaintext_len, Length of data in bytes for encryption.
- * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association)
- * concatenated with 8 byte Initialisation Vector (from IPSec ESP
- * Payload) concatenated with 0x00000001. 16-byte aligned pointer.
+ * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001.
+ * 16-byte aligned pointer.
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
* const u8 *aad, Additional Authentication Data (AAD)
- * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this
- * is going to be 8 or 12 bytes
+ * unsigned long aad_len, Length of AAD in bytes.
* u8 *auth_tag, Authenticated Tag output.
* unsigned long auth_tag_len), Authenticated Tag Length in bytes.
* Valid values are 16 (most likely), 12 or 8.
@@ -123,9 +126,8 @@ asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
* u8 *out, Plaintext output. Decrypt in-place is allowed.
* const u8 *in, Ciphertext input
* unsigned long ciphertext_len, Length of data in bytes for decryption.
- * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association)
- * concatenated with 8 byte Initialisation Vector (from IPSec ESP
- * Payload) concatenated with 0x00000001. 16-byte aligned pointer.
+ * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001.
+ * 16-byte aligned pointer.
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
* const u8 *aad, Additional Authentication Data (AAD)
* unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going
@@ -275,6 +277,16 @@ aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
align = 1;
return PTR_ALIGN(crypto_aead_ctx(tfm), align);
}
+
+static inline struct
+generic_gcmaes_ctx *generic_gcmaes_ctx_get(struct crypto_aead *tfm)
+{
+ unsigned long align = AESNI_ALIGN;
+
+ if (align <= crypto_tfm_ctx_alignment())
+ align = 1;
+ return PTR_ALIGN(crypto_aead_ctx(tfm), align);
+}
#endif
static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
@@ -712,32 +724,34 @@ static int rfc4106_set_authsize(struct crypto_aead *parent,
return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
}
-static int helper_rfc4106_encrypt(struct aead_request *req)
+static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
+ unsigned int authsize)
+{
+ switch (authsize) {
+ case 4:
+ case 8:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ case 16:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
+ u8 *hash_subkey, u8 *iv, void *aes_ctx)
{
u8 one_entry_in_sg = 0;
u8 *src, *dst, *assoc;
- __be32 counter = cpu_to_be32(1);
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
- void *aes_ctx = &(ctx->aes_key_expanded);
unsigned long auth_tag_len = crypto_aead_authsize(tfm);
- u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
struct scatter_walk src_sg_walk;
struct scatter_walk dst_sg_walk = {};
- unsigned int i;
-
- /* Assuming we are supporting rfc4106 64-bit extended */
- /* sequence numbers We need to have the AAD length equal */
- /* to 16 or 20 bytes */
- if (unlikely(req->assoclen != 16 && req->assoclen != 20))
- return -EINVAL;
-
- /* IV below built */
- for (i = 0; i < 4; i++)
- *(iv+i) = ctx->nonce[i];
- for (i = 0; i < 8; i++)
- *(iv+4+i) = req->iv[i];
- *((__be32 *)(iv+12)) = counter;
if (sg_is_last(req->src) &&
(!PageHighMem(sg_page(req->src)) ||
@@ -768,7 +782,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
kernel_fpu_begin();
aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv,
- ctx->hash_subkey, assoc, req->assoclen - 8,
+ hash_subkey, assoc, assoclen,
dst + req->cryptlen, auth_tag_len);
kernel_fpu_end();
@@ -791,37 +805,20 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
return 0;
}
-static int helper_rfc4106_decrypt(struct aead_request *req)
+static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
+ u8 *hash_subkey, u8 *iv, void *aes_ctx)
{
u8 one_entry_in_sg = 0;
u8 *src, *dst, *assoc;
unsigned long tempCipherLen = 0;
- __be32 counter = cpu_to_be32(1);
- int retval = 0;
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
- void *aes_ctx = &(ctx->aes_key_expanded);
unsigned long auth_tag_len = crypto_aead_authsize(tfm);
- u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
u8 authTag[16];
struct scatter_walk src_sg_walk;
struct scatter_walk dst_sg_walk = {};
- unsigned int i;
-
- if (unlikely(req->assoclen != 16 && req->assoclen != 20))
- return -EINVAL;
-
- /* Assuming we are supporting rfc4106 64-bit extended */
- /* sequence numbers We need to have the AAD length */
- /* equal to 16 or 20 bytes */
+ int retval = 0;
tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
- /* IV below built */
- for (i = 0; i < 4; i++)
- *(iv+i) = ctx->nonce[i];
- for (i = 0; i < 8; i++)
- *(iv+4+i) = req->iv[i];
- *((__be32 *)(iv+12)) = counter;
if (sg_is_last(req->src) &&
(!PageHighMem(sg_page(req->src)) ||
@@ -838,7 +835,6 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
scatterwalk_start(&dst_sg_walk, req->dst);
dst = scatterwalk_map(&dst_sg_walk) + req->assoclen;
}
-
} else {
/* Allocate memory for src, dst, assoc */
assoc = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
@@ -850,9 +846,10 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
dst = src;
}
+
kernel_fpu_begin();
aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
- ctx->hash_subkey, assoc, req->assoclen - 8,
+ hash_subkey, assoc, assoclen,
authTag, auth_tag_len);
kernel_fpu_end();
@@ -875,6 +872,60 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
kfree(assoc);
}
return retval;
+
+}
+
+static int helper_rfc4106_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+ void *aes_ctx = &(ctx->aes_key_expanded);
+ u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+ unsigned int i;
+ __be32 counter = cpu_to_be32(1);
+
+ /* Assuming we are supporting rfc4106 64-bit extended */
+ /* sequence numbers We need to have the AAD length equal */
+ /* to 16 or 20 bytes */
+ if (unlikely(req->assoclen != 16 && req->assoclen != 20))
+ return -EINVAL;
+
+ /* IV below built */
+ for (i = 0; i < 4; i++)
+ *(iv+i) = ctx->nonce[i];
+ for (i = 0; i < 8; i++)
+ *(iv+4+i) = req->iv[i];
+ *((__be32 *)(iv+12)) = counter;
+
+ return gcmaes_encrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
+ aes_ctx);
+}
+
+static int helper_rfc4106_decrypt(struct aead_request *req)
+{
+ __be32 counter = cpu_to_be32(1);
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+ void *aes_ctx = &(ctx->aes_key_expanded);
+ u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+ unsigned int i;
+
+ if (unlikely(req->assoclen != 16 && req->assoclen != 20))
+ return -EINVAL;
+
+ /* Assuming we are supporting rfc4106 64-bit extended */
+ /* sequence numbers We need to have the AAD length */
+ /* equal to 16 or 20 bytes */
+
+ /* IV below built */
+ for (i = 0; i < 4; i++)
+ *(iv+i) = ctx->nonce[i];
+ for (i = 0; i < 8; i++)
+ *(iv+4+i) = req->iv[i];
+ *((__be32 *)(iv+12)) = counter;
+
+ return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
+ aes_ctx);
}
static int rfc4106_encrypt(struct aead_request *req)
@@ -1035,6 +1086,46 @@ struct {
};
#ifdef CONFIG_X86_64
+static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
+ unsigned int key_len)
+{
+ struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead);
+
+ return aes_set_key_common(crypto_aead_tfm(aead),
+ &ctx->aes_key_expanded, key, key_len) ?:
+ rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
+}
+
+static int generic_gcmaes_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
+ void *aes_ctx = &(ctx->aes_key_expanded);
+ u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+ __be32 counter = cpu_to_be32(1);
+
+ memcpy(iv, req->iv, 12);
+ *((__be32 *)(iv+12)) = counter;
+
+ return gcmaes_encrypt(req, req->assoclen, ctx->hash_subkey, iv,
+ aes_ctx);
+}
+
+static int generic_gcmaes_decrypt(struct aead_request *req)
+{
+ __be32 counter = cpu_to_be32(1);
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+ void *aes_ctx = &(ctx->aes_key_expanded);
+ u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+
+ memcpy(iv, req->iv, 12);
+ *((__be32 *)(iv+12)) = counter;
+
+ return gcmaes_decrypt(req, req->assoclen, ctx->hash_subkey, iv,
+ aes_ctx);
+}
+
static struct aead_alg aesni_aead_algs[] = { {
.setkey = common_rfc4106_set_key,
.setauthsize = common_rfc4106_set_authsize,
@@ -1069,6 +1160,23 @@ static struct aead_alg aesni_aead_algs[] = { {
.cra_ctxsize = sizeof(struct cryptd_aead *),
.cra_module = THIS_MODULE,
},
+}, {
+ .setkey = generic_gcmaes_set_key,
+ .setauthsize = generic_gcmaes_set_authsize,
+ .encrypt = generic_gcmaes_encrypt,
+ .decrypt = generic_gcmaes_decrypt,
+ .ivsize = 12,
+ .maxauthsize = 16,
+ .base = {
+ .cra_name = "gcm(aes)",
+ .cra_driver_name = "generic-gcm-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct generic_gcmaes_ctx),
+ .cra_alignmask = AESNI_ALIGN - 1,
+ .cra_module = THIS_MODULE,
+ },
} };
#else
static struct aead_alg aesni_aead_algs[0];
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 24ac9fad832d..d61e57960fe0 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -176,9 +176,6 @@ __glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
src -= 1;
dst -= 1;
} while (nbytes >= func_bytes);
-
- if (nbytes < bsize)
- goto done;
}
}
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
index 2dd3674b5a1e..458409b7568d 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb.c
@@ -269,19 +269,19 @@ static struct sha512_hash_ctx
* LAST
*/
ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
- return ctx;
+ goto unlock;
}
if (ctx->status & HASH_CTX_STS_PROCESSING) {
/* Cannot submit to a currently processing job. */
ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
- return ctx;
+ goto unlock;
}
if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
/* Cannot update a finished job. */
ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
- return ctx;
+ goto unlock;
}
@@ -363,6 +363,7 @@ static struct sha512_hash_ctx
}
ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
+unlock:
spin_unlock_irqrestore(&cstate->work_lock, irqflags);
return ctx;
}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 695605eb1dfb..1588e9e3dc01 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -48,28 +48,31 @@
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
/* x86-specific vcpu->requests bit members */
-#define KVM_REQ_MIGRATE_TIMER 8
-#define KVM_REQ_REPORT_TPR_ACCESS 9
-#define KVM_REQ_TRIPLE_FAULT 10
-#define KVM_REQ_MMU_SYNC 11
-#define KVM_REQ_CLOCK_UPDATE 12
-#define KVM_REQ_EVENT 14
-#define KVM_REQ_APF_HALT 15
-#define KVM_REQ_STEAL_UPDATE 16
-#define KVM_REQ_NMI 17
-#define KVM_REQ_PMU 18
-#define KVM_REQ_PMI 19
-#define KVM_REQ_SMI 20
-#define KVM_REQ_MASTERCLOCK_UPDATE 21
-#define KVM_REQ_MCLOCK_INPROGRESS (22 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_SCAN_IOAPIC (23 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24
-#define KVM_REQ_APIC_PAGE_RELOAD (25 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_HV_CRASH 26
-#define KVM_REQ_IOAPIC_EOI_EXIT 27
-#define KVM_REQ_HV_RESET 28
-#define KVM_REQ_HV_EXIT 29
-#define KVM_REQ_HV_STIMER 30
+#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
+#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
+#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
+#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
+#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
+#define KVM_REQ_EVENT KVM_ARCH_REQ(6)
+#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
+#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
+#define KVM_REQ_NMI KVM_ARCH_REQ(9)
+#define KVM_REQ_PMU KVM_ARCH_REQ(10)
+#define KVM_REQ_PMI KVM_ARCH_REQ(11)
+#define KVM_REQ_SMI KVM_ARCH_REQ(12)
+#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13)
+#define KVM_REQ_MCLOCK_INPROGRESS \
+ KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_SCAN_IOAPIC \
+ KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16)
+#define KVM_REQ_APIC_PAGE_RELOAD \
+ KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18)
+#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19)
+#define KVM_REQ_HV_RESET KVM_ARCH_REQ(20)
+#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
+#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -254,7 +257,8 @@ union kvm_mmu_page_role {
unsigned cr0_wp:1;
unsigned smep_andnot_wp:1;
unsigned smap_andnot_wp:1;
- unsigned :8;
+ unsigned ad_disabled:1;
+ unsigned :7;
/*
* This is left at the top of the word so that
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d406894cd9a2..5573c75f8e4c 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -426,6 +426,8 @@
#define MSR_IA32_TSC_ADJUST 0x0000003b
#define MSR_IA32_BNDCFGS 0x00000d90
+#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
+
#define MSR_IA32_XSS 0x00000da0
#define FEATURE_CONTROL_LOCKED (1<<0)
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index f6d20f6cca12..11071fcd630e 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -43,6 +43,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/smap.h>
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
@@ -50,6 +51,8 @@
#include <xen/interface/platform.h>
#include <xen/interface/xen-mca.h>
+struct xen_dm_op_buf;
+
/*
* The hypercall asms have to meet several constraints:
* - Work on 32- and 64-bit.
@@ -214,10 +217,12 @@ privcmd_call(unsigned call,
__HYPERCALL_DECLS;
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
+ stac();
asm volatile("call *%[call]"
: __HYPERCALL_5PARAM
: [call] "a" (&hypercall_page[call])
: __HYPERCALL_CLOBBER5);
+ clac();
return (long)__res;
}
@@ -474,9 +479,13 @@ HYPERVISOR_xenpmu_op(unsigned int op, void *arg)
static inline int
HYPERVISOR_dm_op(
- domid_t dom, unsigned int nr_bufs, void *bufs)
+ domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs)
{
- return _hypercall3(int, dm_op, dom, nr_bufs, bufs);
+ int ret;
+ stac();
+ ret = _hypercall3(int, dm_op, dom, nr_bufs, bufs);
+ clac();
+ return ret;
}
static inline void
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a6fd40aade7c..da6728383052 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -144,6 +144,14 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_RTM));
}
+static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ebx & bit(X86_FEATURE_MPX));
+}
+
static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 80890dee66ce..fb0055953fbc 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -900,7 +900,7 @@ static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
if (rc != X86EMUL_CONTINUE) \
goto done; \
ctxt->_eip += sizeof(_type); \
- _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \
+ memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
ctxt->fetch.ptr += sizeof(_type); \
_x; \
})
@@ -3942,6 +3942,25 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt)
}
/*
+ * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
+ * and restore MXCSR.
+ */
+static size_t __fxstate_size(int nregs)
+{
+ return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
+}
+
+static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
+{
+ bool cr4_osfxsr;
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ return __fxstate_size(16);
+
+ cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
+ return __fxstate_size(cr4_osfxsr ? 8 : 0);
+}
+
+/*
* FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
* 1) 16 bit mode
* 2) 32 bit mode
@@ -3962,7 +3981,6 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt)
static int em_fxsave(struct x86_emulate_ctxt *ctxt)
{
struct fxregs_state fx_state;
- size_t size;
int rc;
rc = check_fxsr(ctxt);
@@ -3978,68 +3996,42 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)
- size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]);
- else
- size = offsetof(struct fxregs_state, xmm_space[0]);
-
- return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
-}
-
-static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
- struct fxregs_state *new)
-{
- int rc = X86EMUL_CONTINUE;
- struct fxregs_state old;
-
- rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old));
- if (rc != X86EMUL_CONTINUE)
- return rc;
-
- /*
- * 64 bit host will restore XMM 8-15, which is not correct on non-64
- * bit guests. Load the current values in order to preserve 64 bit
- * XMMs after fxrstor.
- */
-#ifdef CONFIG_X86_64
- /* XXX: accessing XMM 8-15 very awkwardly */
- memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16);
-#endif
-
- /*
- * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but
- * does save and restore MXCSR.
- */
- if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))
- memcpy(new->xmm_space, old.xmm_space, 8 * 16);
-
- return rc;
+ return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
+ fxstate_size(ctxt));
}
static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
{
struct fxregs_state fx_state;
int rc;
+ size_t size;
rc = check_fxsr(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
- if (rc != X86EMUL_CONTINUE)
- return rc;
+ ctxt->ops->get_fpu(ctxt);
- if (fx_state.mxcsr >> 16)
- return emulate_gp(ctxt, 0);
+ size = fxstate_size(ctxt);
+ if (size < __fxstate_size(16)) {
+ rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
+ if (rc != X86EMUL_CONTINUE)
+ goto out;
+ }
- ctxt->ops->get_fpu(ctxt);
+ rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+ if (rc != X86EMUL_CONTINUE)
+ goto out;
- if (ctxt->mode < X86EMUL_MODE_PROT64)
- rc = fxrstor_fixup(ctxt, &fx_state);
+ if (fx_state.mxcsr >> 16) {
+ rc = emulate_gp(ctxt, 0);
+ goto out;
+ }
if (rc == X86EMUL_CONTINUE)
rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
+out:
ctxt->ops->put_fpu(ctxt);
return rc;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d24c8742d9b0..2819d4c123eb 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1495,6 +1495,7 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
static void cancel_hv_timer(struct kvm_lapic *apic)
{
+ WARN_ON(!apic->lapic_timer.hv_timer_in_use);
preempt_disable();
kvm_x86_ops->cancel_hv_timer(apic->vcpu);
apic->lapic_timer.hv_timer_in_use = false;
@@ -1503,25 +1504,56 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
static bool start_hv_timer(struct kvm_lapic *apic)
{
- u64 tscdeadline = apic->lapic_timer.tscdeadline;
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+ int r;
- if ((atomic_read(&apic->lapic_timer.pending) &&
- !apic_lvtt_period(apic)) ||
- kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
- if (apic->lapic_timer.hv_timer_in_use)
- cancel_hv_timer(apic);
- } else {
- apic->lapic_timer.hv_timer_in_use = true;
- hrtimer_cancel(&apic->lapic_timer.timer);
+ if (!kvm_x86_ops->set_hv_timer)
+ return false;
+
+ if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
+ return false;
- /* In case the sw timer triggered in the window */
- if (atomic_read(&apic->lapic_timer.pending) &&
- !apic_lvtt_period(apic))
- cancel_hv_timer(apic);
+ r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline);
+ if (r < 0)
+ return false;
+
+ ktimer->hv_timer_in_use = true;
+ hrtimer_cancel(&ktimer->timer);
+
+ /*
+ * Also recheck ktimer->pending, in case the sw timer triggered in
+ * the window. For periodic timer, leave the hv timer running for
+ * simplicity, and the deadline will be recomputed on the next vmexit.
+ */
+ if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) {
+ if (r)
+ apic_timer_expired(apic);
+ return false;
}
- trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
- apic->lapic_timer.hv_timer_in_use);
- return apic->lapic_timer.hv_timer_in_use;
+
+ trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true);
+ return true;
+}
+
+static void start_sw_timer(struct kvm_lapic *apic)
+{
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+ if (apic->lapic_timer.hv_timer_in_use)
+ cancel_hv_timer(apic);
+ if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
+ return;
+
+ if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
+ start_sw_period(apic);
+ else if (apic_lvtt_tscdeadline(apic))
+ start_sw_tscdeadline(apic);
+ trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
+}
+
+static void restart_apic_timer(struct kvm_lapic *apic)
+{
+ if (!start_hv_timer(apic))
+ start_sw_timer(apic);
}
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
@@ -1535,19 +1567,14 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
advance_periodic_target_expiration(apic);
- if (!start_hv_timer(apic))
- start_sw_period(apic);
+ restart_apic_timer(apic);
}
}
EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- WARN_ON(apic->lapic_timer.hv_timer_in_use);
-
- start_hv_timer(apic);
+ restart_apic_timer(vcpu->arch.apic);
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
@@ -1556,33 +1583,28 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
/* Possibly the TSC deadline timer is not enabled yet */
- if (!apic->lapic_timer.hv_timer_in_use)
- return;
-
- cancel_hv_timer(apic);
+ if (apic->lapic_timer.hv_timer_in_use)
+ start_sw_timer(apic);
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
- if (atomic_read(&apic->lapic_timer.pending))
- return;
+void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
- if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
- start_sw_period(apic);
- else if (apic_lvtt_tscdeadline(apic))
- start_sw_tscdeadline(apic);
+ WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+ restart_apic_timer(apic);
}
-EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
static void start_apic_timer(struct kvm_lapic *apic)
{
atomic_set(&apic->lapic_timer.pending, 0);
- if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
- if (set_target_expiration(apic) &&
- !(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
- start_sw_period(apic);
- } else if (apic_lvtt_tscdeadline(apic)) {
- if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
- start_sw_tscdeadline(apic);
- }
+ if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
+ && !set_target_expiration(apic))
+ return;
+
+ restart_apic_timer(apic);
}
static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
@@ -1813,16 +1835,6 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
* LAPIC interface
*----------------------------------------------------------------------
*/
-u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- if (!lapic_in_kernel(vcpu))
- return 0;
-
- return apic->lapic_timer.tscdeadline;
-}
-
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index bcbe811f3b97..29caa2c3dff9 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -87,7 +87,6 @@ int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
-u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
@@ -216,4 +215,5 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
+void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cb8225969255..aafd399cf8c6 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -183,13 +183,13 @@ static u64 __read_mostly shadow_user_mask;
static u64 __read_mostly shadow_accessed_mask;
static u64 __read_mostly shadow_dirty_mask;
static u64 __read_mostly shadow_mmio_mask;
+static u64 __read_mostly shadow_mmio_value;
static u64 __read_mostly shadow_present_mask;
/*
- * The mask/value to distinguish a PTE that has been marked not-present for
- * access tracking purposes.
- * The mask would be either 0 if access tracking is disabled, or
- * SPTE_SPECIAL_MASK|VMX_EPT_RWX_MASK if access tracking is enabled.
+ * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value.
+ * Non-present SPTEs with shadow_acc_track_value set are in place for access
+ * tracking.
*/
static u64 __read_mostly shadow_acc_track_mask;
static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK;
@@ -207,16 +207,40 @@ static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIF
static void mmu_spte_set(u64 *sptep, u64 spte);
static void mmu_free_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
{
+ BUG_ON((mmio_mask & mmio_value) != mmio_value);
+ shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK;
shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
+static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
+{
+ return sp->role.ad_disabled;
+}
+
+static inline bool spte_ad_enabled(u64 spte)
+{
+ MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+ return !(spte & shadow_acc_track_value);
+}
+
+static inline u64 spte_shadow_accessed_mask(u64 spte)
+{
+ MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+ return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
+}
+
+static inline u64 spte_shadow_dirty_mask(u64 spte)
+{
+ MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+ return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
+}
+
static inline bool is_access_track_spte(u64 spte)
{
- /* Always false if shadow_acc_track_mask is zero. */
- return (spte & shadow_acc_track_mask) == shadow_acc_track_value;
+ return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0;
}
/*
@@ -270,7 +294,7 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
u64 mask = generation_mmio_spte_mask(gen);
access &= ACC_WRITE_MASK | ACC_USER_MASK;
- mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
+ mask |= shadow_mmio_value | access | gfn << PAGE_SHIFT;
trace_mark_mmio_spte(sptep, gfn, access, gen);
mmu_spte_set(sptep, mask);
@@ -278,7 +302,7 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
static bool is_mmio_spte(u64 spte)
{
- return (spte & shadow_mmio_mask) == shadow_mmio_mask;
+ return (spte & shadow_mmio_mask) == shadow_mmio_value;
}
static gfn_t get_mmio_spte_gfn(u64 spte)
@@ -315,12 +339,20 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
return likely(kvm_gen == spte_gen);
}
+/*
+ * Sets the shadow PTE masks used by the MMU.
+ *
+ * Assumptions:
+ * - Setting either @accessed_mask or @dirty_mask requires setting both
+ * - At least one of @accessed_mask or @acc_track_mask must be set
+ */
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
u64 acc_track_mask)
{
- if (acc_track_mask != 0)
- acc_track_mask |= SPTE_SPECIAL_MASK;
+ BUG_ON(!dirty_mask != !accessed_mask);
+ BUG_ON(!accessed_mask && !acc_track_mask);
+ BUG_ON(acc_track_mask & shadow_acc_track_value);
shadow_user_mask = user_mask;
shadow_accessed_mask = accessed_mask;
@@ -329,7 +361,6 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
shadow_x_mask = x_mask;
shadow_present_mask = p_mask;
shadow_acc_track_mask = acc_track_mask;
- WARN_ON(shadow_accessed_mask != 0 && shadow_acc_track_mask != 0);
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
@@ -549,7 +580,7 @@ static bool spte_has_volatile_bits(u64 spte)
is_access_track_spte(spte))
return true;
- if (shadow_accessed_mask) {
+ if (spte_ad_enabled(spte)) {
if ((spte & shadow_accessed_mask) == 0 ||
(is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0))
return true;
@@ -560,14 +591,17 @@ static bool spte_has_volatile_bits(u64 spte)
static bool is_accessed_spte(u64 spte)
{
- return shadow_accessed_mask ? spte & shadow_accessed_mask
- : !is_access_track_spte(spte);
+ u64 accessed_mask = spte_shadow_accessed_mask(spte);
+
+ return accessed_mask ? spte & accessed_mask
+ : !is_access_track_spte(spte);
}
static bool is_dirty_spte(u64 spte)
{
- return shadow_dirty_mask ? spte & shadow_dirty_mask
- : spte & PT_WRITABLE_MASK;
+ u64 dirty_mask = spte_shadow_dirty_mask(spte);
+
+ return dirty_mask ? spte & dirty_mask : spte & PT_WRITABLE_MASK;
}
/* Rules for using mmu_spte_set:
@@ -707,10 +741,10 @@ static u64 mmu_spte_get_lockless(u64 *sptep)
static u64 mark_spte_for_access_track(u64 spte)
{
- if (shadow_accessed_mask != 0)
+ if (spte_ad_enabled(spte))
return spte & ~shadow_accessed_mask;
- if (shadow_acc_track_mask == 0 || is_access_track_spte(spte))
+ if (is_access_track_spte(spte))
return spte;
/*
@@ -729,7 +763,6 @@ static u64 mark_spte_for_access_track(u64 spte)
spte |= (spte & shadow_acc_track_saved_bits_mask) <<
shadow_acc_track_saved_bits_shift;
spte &= ~shadow_acc_track_mask;
- spte |= shadow_acc_track_value;
return spte;
}
@@ -741,6 +774,7 @@ static u64 restore_acc_track_spte(u64 spte)
u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift)
& shadow_acc_track_saved_bits_mask;
+ WARN_ON_ONCE(spte_ad_enabled(spte));
WARN_ON_ONCE(!is_access_track_spte(spte));
new_spte &= ~shadow_acc_track_mask;
@@ -759,7 +793,7 @@ static bool mmu_spte_age(u64 *sptep)
if (!is_accessed_spte(spte))
return false;
- if (shadow_accessed_mask) {
+ if (spte_ad_enabled(spte)) {
clear_bit((ffs(shadow_accessed_mask) - 1),
(unsigned long *)sptep);
} else {
@@ -1390,6 +1424,22 @@ static bool spte_clear_dirty(u64 *sptep)
return mmu_spte_update(sptep, spte);
}
+static bool wrprot_ad_disabled_spte(u64 *sptep)
+{
+ bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT,
+ (unsigned long *)sptep);
+ if (was_writable)
+ kvm_set_pfn_dirty(spte_to_pfn(*sptep));
+
+ return was_writable;
+}
+
+/*
+ * Gets the GFN ready for another round of dirty logging by clearing the
+ * - D bit on ad-enabled SPTEs, and
+ * - W bit on ad-disabled SPTEs.
+ * Returns true iff any D or W bits were cleared.
+ */
static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
{
u64 *sptep;
@@ -1397,7 +1447,10 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
bool flush = false;
for_each_rmap_spte(rmap_head, &iter, sptep)
- flush |= spte_clear_dirty(sptep);
+ if (spte_ad_enabled(*sptep))
+ flush |= spte_clear_dirty(sptep);
+ else
+ flush |= wrprot_ad_disabled_spte(sptep);
return flush;
}
@@ -1420,7 +1473,8 @@ static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
bool flush = false;
for_each_rmap_spte(rmap_head, &iter, sptep)
- flush |= spte_set_dirty(sptep);
+ if (spte_ad_enabled(*sptep))
+ flush |= spte_set_dirty(sptep);
return flush;
}
@@ -1452,7 +1506,8 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
}
/**
- * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages
+ * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write
+ * protect the page if the D-bit isn't supported.
* @kvm: kvm instance
* @slot: slot to clear D-bit
* @gfn_offset: start of the BITS_PER_LONG pages we care about
@@ -1766,18 +1821,9 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
u64 *sptep;
struct rmap_iterator iter;
- /*
- * If there's no access bit in the secondary pte set by the hardware and
- * fast access tracking is also not enabled, it's up to gup-fast/gup to
- * set the access bit in the primary pte or in the page structure.
- */
- if (!shadow_accessed_mask && !shadow_acc_track_mask)
- goto out;
-
for_each_rmap_spte(rmap_head, &iter, sptep)
if (is_accessed_spte(*sptep))
return 1;
-out:
return 0;
}
@@ -1798,18 +1844,6 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
- /*
- * In case of absence of EPT Access and Dirty Bits supports,
- * emulate the accessed bit for EPT, by checking if this page has
- * an EPT mapping, and clearing it if it does. On the next access,
- * a new EPT mapping will be established.
- * This has some overhead, but not as much as the cost of swapping
- * out actively used pages or breaking up actively used hugepages.
- */
- if (!shadow_accessed_mask && !shadow_acc_track_mask)
- return kvm_handle_hva_range(kvm, start, end, 0,
- kvm_unmap_rmapp);
-
return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
}
@@ -2398,7 +2432,12 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
- shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
+ shadow_user_mask | shadow_x_mask;
+
+ if (sp_ad_disabled(sp))
+ spte |= shadow_acc_track_value;
+ else
+ spte |= shadow_accessed_mask;
mmu_spte_set(sptep, spte);
@@ -2666,10 +2705,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
{
u64 spte = 0;
int ret = 0;
+ struct kvm_mmu_page *sp;
if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
return 0;
+ sp = page_header(__pa(sptep));
+ if (sp_ad_disabled(sp))
+ spte |= shadow_acc_track_value;
+
/*
* For the EPT case, shadow_present_mask is 0 if hardware
* supports exec-only page table entries. In that case,
@@ -2678,7 +2722,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
*/
spte |= shadow_present_mask;
if (!speculative)
- spte |= shadow_accessed_mask;
+ spte |= spte_shadow_accessed_mask(spte);
if (pte_access & ACC_EXEC_MASK)
spte |= shadow_x_mask;
@@ -2735,7 +2779,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (pte_access & ACC_WRITE_MASK) {
kvm_vcpu_mark_page_dirty(vcpu, gfn);
- spte |= shadow_dirty_mask;
+ spte |= spte_shadow_dirty_mask(spte);
}
if (speculative)
@@ -2877,16 +2921,16 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
{
struct kvm_mmu_page *sp;
+ sp = page_header(__pa(sptep));
+
/*
- * Since it's no accessed bit on EPT, it's no way to
- * distinguish between actually accessed translations
- * and prefetched, so disable pte prefetch if EPT is
- * enabled.
+ * Without accessed bits, there's no way to distinguish between
+ * actually accessed translations and prefetched, so disable pte
+ * prefetch if accessed bits aren't available.
*/
- if (!shadow_accessed_mask)
+ if (sp_ad_disabled(sp))
return;
- sp = page_header(__pa(sptep));
if (sp->role.level > PT_PAGE_TABLE_LEVEL)
return;
@@ -4290,6 +4334,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->base_role.word = 0;
context->base_role.smm = is_smm(vcpu);
+ context->base_role.ad_disabled = (shadow_accessed_mask == 0);
context->page_fault = tdp_page_fault;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
@@ -4377,6 +4422,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->root_level = context->shadow_root_level;
context->root_hpa = INVALID_PAGE;
context->direct_map = false;
+ context->base_role.ad_disabled = !accessed_dirty;
update_permission_bitmask(vcpu, context, true);
update_pkru_bitmask(vcpu, context, true);
@@ -4636,6 +4682,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
mask.smep_andnot_wp = 1;
mask.smap_andnot_wp = 1;
mask.smm = 1;
+ mask.ad_disabled = 1;
/*
* If we don't have indirect shadow pages, it means no page is
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 330bf3a811fb..a276834950c1 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -51,7 +51,7 @@ static inline u64 rsvd_bits(int s, int e)
return ((1ULL << (e - s + 1)) - 1) << s;
}
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 5a24b846a1cb..8b97a6cba8d1 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -30,8 +30,9 @@
\
role.word = __entry->role; \
\
- trace_seq_printf(p, "sp gen %lx gfn %llx %u%s q%u%s %s%s" \
- " %snxe root %u %s%c", __entry->mmu_valid_gen, \
+ trace_seq_printf(p, "sp gen %lx gfn %llx l%u%s q%u%s %s%s" \
+ " %snxe %sad root %u %s%c", \
+ __entry->mmu_valid_gen, \
__entry->gfn, role.level, \
role.cr4_pae ? " pae" : "", \
role.quadrant, \
@@ -39,6 +40,7 @@
access_str[role.access], \
role.invalid ? " invalid" : "", \
role.nxe ? "" : "!", \
+ role.ad_disabled ? "!" : "", \
__entry->root_count, \
__entry->unsync ? "unsync" : "sync", 0); \
saved_ptr; \
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 33460fcdeef9..905ea6052517 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -190,6 +190,7 @@ struct vcpu_svm {
struct nested_state nested;
bool nmi_singlestep;
+ u64 nmi_singlestep_guest_rflags;
unsigned int3_injected;
unsigned long int3_rip;
@@ -964,6 +965,18 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}
+static void disable_nmi_singlestep(struct vcpu_svm *svm)
+{
+ svm->nmi_singlestep = false;
+ if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
+ /* Clear our flags if they were not set by the guest */
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+ svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+ svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
+ }
+}
+
/* Note:
* This hash table is used to map VM_ID to a struct kvm_arch,
* when handling AMD IOMMU GALOG notification to schedule in
@@ -1713,11 +1726,24 @@ static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
{
- return to_svm(vcpu)->vmcb->save.rflags;
+ struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned long rflags = svm->vmcb->save.rflags;
+
+ if (svm->nmi_singlestep) {
+ /* Hide our flags if they were not set by the guest */
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
+ rflags &= ~X86_EFLAGS_TF;
+ if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
+ rflags &= ~X86_EFLAGS_RF;
+ }
+ return rflags;
}
static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
+ if (to_svm(vcpu)->nmi_singlestep)
+ rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+
/*
* Any change of EFLAGS.VM is accompanied by a reload of SS
* (caused by either a task switch or an inter-privilege IRET),
@@ -2112,10 +2138,7 @@ static int db_interception(struct vcpu_svm *svm)
}
if (svm->nmi_singlestep) {
- svm->nmi_singlestep = false;
- if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
- svm->vmcb->save.rflags &=
- ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+ disable_nmi_singlestep(svm);
}
if (svm->vcpu.guest_debug &
@@ -2370,8 +2393,8 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
static int nested_svm_check_permissions(struct vcpu_svm *svm)
{
- if (!(svm->vcpu.arch.efer & EFER_SVME)
- || !is_paging(&svm->vcpu)) {
+ if (!(svm->vcpu.arch.efer & EFER_SVME) ||
+ !is_paging(&svm->vcpu)) {
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
@@ -2381,7 +2404,7 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm)
return 1;
}
- return 0;
+ return 0;
}
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
@@ -2534,6 +2557,31 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
}
+/* DB exceptions for our internal use must not cause vmexit */
+static int nested_svm_intercept_db(struct vcpu_svm *svm)
+{
+ unsigned long dr6;
+
+ /* if we're not singlestepping, it's not ours */
+ if (!svm->nmi_singlestep)
+ return NESTED_EXIT_DONE;
+
+ /* if it's not a singlestep exception, it's not ours */
+ if (kvm_get_dr(&svm->vcpu, 6, &dr6))
+ return NESTED_EXIT_DONE;
+ if (!(dr6 & DR6_BS))
+ return NESTED_EXIT_DONE;
+
+ /* if the guest is singlestepping, it should get the vmexit */
+ if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
+ disable_nmi_singlestep(svm);
+ return NESTED_EXIT_DONE;
+ }
+
+ /* it's ours, the nested hypervisor must not see this one */
+ return NESTED_EXIT_HOST;
+}
+
static int nested_svm_exit_special(struct vcpu_svm *svm)
{
u32 exit_code = svm->vmcb->control.exit_code;
@@ -2589,8 +2637,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
}
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
- if (svm->nested.intercept_exceptions & excp_bits)
- vmexit = NESTED_EXIT_DONE;
+ if (svm->nested.intercept_exceptions & excp_bits) {
+ if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
+ vmexit = nested_svm_intercept_db(svm);
+ else
+ vmexit = NESTED_EXIT_DONE;
+ }
/* async page fault always cause vmexit */
else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
svm->apf_reason != 0)
@@ -4627,10 +4679,17 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
== HF_NMI_MASK)
return; /* IRET will cause a vm exit */
+ if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0)
+ return; /* STGI will cause a vm exit */
+
+ if (svm->nested.exit_required)
+ return; /* we're not going to run the guest yet */
+
/*
* Something prevents NMI from been injected. Single step over possible
* problem (IRET or exception injection or interrupt shadow)
*/
+ svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
}
@@ -4771,6 +4830,22 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(svm->nested.exit_required))
return;
+ /*
+ * Disable singlestep if we're injecting an interrupt/exception.
+ * We don't want our modified rflags to be pushed on the stack where
+ * we might not be able to easily reset them if we disabled NMI
+ * singlestep later.
+ */
+ if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
+ /*
+ * Event injection happens before external interrupts cause a
+ * vmexit and interrupts are disabled here, so smp_send_reschedule
+ * is enough to force an immediate vmexit.
+ */
+ disable_nmi_singlestep(svm);
+ smp_send_reschedule(vcpu->cpu);
+ }
+
pre_svm_run(svm);
sync_lapic_to_cr8(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6dcc4873e435..f76efad248ab 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -913,8 +913,9 @@ static void nested_release_page_clean(struct page *page)
kvm_release_page_clean(page);
}
+static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
-static u64 construct_eptp(unsigned long root_hpa);
+static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
static bool vmx_xsaves_supported(void);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -2772,7 +2773,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
if (enable_ept_ad_bits) {
vmx->nested.nested_vmx_secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_PML;
- vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+ vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
}
} else
vmx->nested.nested_vmx_ept_caps = 0;
@@ -3198,7 +3199,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
case MSR_IA32_BNDCFGS:
- if (!kvm_mpx_supported())
+ if (!kvm_mpx_supported() ||
+ (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
return 1;
msr_info->data = vmcs_read64(GUEST_BNDCFGS);
break;
@@ -3280,7 +3282,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_BNDCFGS:
- if (!kvm_mpx_supported())
+ if (!kvm_mpx_supported() ||
+ (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
+ return 1;
+ if (is_noncanonical_address(data & PAGE_MASK) ||
+ (data & MSR_IA32_BNDCFGS_RSVD))
return 1;
vmcs_write64(GUEST_BNDCFGS, data);
break;
@@ -4013,7 +4019,7 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
if (enable_ept) {
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
- ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
+ ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
} else {
vpid_sync_context(vpid);
}
@@ -4188,14 +4194,15 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
vmx->emulation_required = emulation_required(vcpu);
}
-static u64 construct_eptp(unsigned long root_hpa)
+static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
{
u64 eptp;
/* TODO write the value reading from MSR */
eptp = VMX_EPT_DEFAULT_MT |
VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
- if (enable_ept_ad_bits)
+ if (enable_ept_ad_bits &&
+ (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
eptp |= VMX_EPT_AD_ENABLE_BIT;
eptp |= (root_hpa & PAGE_MASK);
@@ -4209,7 +4216,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
guest_cr3 = cr3;
if (enable_ept) {
- eptp = construct_eptp(cr3);
+ eptp = construct_eptp(vcpu, cr3);
vmcs_write64(EPT_POINTER, eptp);
if (is_paging(vcpu) || is_guest_mode(vcpu))
guest_cr3 = kvm_read_cr3(vcpu);
@@ -5170,7 +5177,8 @@ static void ept_set_mmio_spte_mask(void)
* EPT Misconfigurations can be generated if the value of bits 2:0
* of an EPT paging-structure entry is 110b (write/execute).
*/
- kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE);
+ kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
+ VMX_EPT_MISCONFIG_WX_VALUE);
}
#define VMX_XSS_EXIT_BITMAP 0
@@ -6220,17 +6228,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- if (is_guest_mode(vcpu)
- && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) {
- /*
- * Fix up exit_qualification according to whether guest
- * page table accesses are reads or writes.
- */
- u64 eptp = nested_ept_get_cr3(vcpu);
- if (!(eptp & VMX_EPT_AD_ENABLE_BIT))
- exit_qualification &= ~EPT_VIOLATION_ACC_WRITE;
- }
-
/*
* EPT violation happened while executing iret from NMI,
* "blocked by NMI" bit has to be set before next VM entry.
@@ -6453,7 +6450,7 @@ void vmx_enable_tdp(void)
enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
0ull, VMX_EPT_EXECUTABLE_MASK,
cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
- enable_ept_ad_bits ? 0ull : VMX_EPT_RWX_MASK);
+ VMX_EPT_RWX_MASK);
ept_set_mmio_spte_mask();
kvm_enable_tdp();
@@ -6557,7 +6554,6 @@ static __init int hardware_setup(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
- vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
vmx_msr_bitmap_legacy, PAGE_SIZE);
@@ -7661,7 +7657,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
unsigned long type, types;
gva_t gva;
struct x86_exception e;
- int vpid;
+ struct {
+ u64 vpid;
+ u64 gla;
+ } operand;
if (!(vmx->nested.nested_vmx_secondary_ctls_high &
SECONDARY_EXEC_ENABLE_VPID) ||
@@ -7691,17 +7690,28 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
vmx_instruction_info, false, &gva))
return 1;
- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid,
- sizeof(u32), &e)) {
+ if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+ sizeof(operand), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
+ if (operand.vpid >> 16) {
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
switch (type) {
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+ if (is_noncanonical_address(operand.gla)) {
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ /* fall through */
case VMX_VPID_EXTENT_SINGLE_CONTEXT:
case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
- if (!vpid) {
+ if (!operand.vpid) {
nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
return kvm_skip_emulated_instruction(vcpu);
@@ -9394,6 +9404,11 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
vmcs12->guest_physical_address = fault->address;
}
+static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
+{
+ return nested_ept_get_cr3(vcpu) & VMX_EPT_AD_ENABLE_BIT;
+}
+
/* Callbacks for nested_ept_init_mmu_context: */
static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
@@ -9404,18 +9419,18 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
{
- u64 eptp;
+ bool wants_ad;
WARN_ON(mmu_is_nested(vcpu));
- eptp = nested_ept_get_cr3(vcpu);
- if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits)
+ wants_ad = nested_ept_ad_enabled(vcpu);
+ if (wants_ad && !enable_ept_ad_bits)
return 1;
kvm_mmu_unload(vcpu);
kvm_init_shadow_ept_mmu(vcpu,
to_vmx(vcpu)->nested.nested_vmx_ept_caps &
VMX_EPT_EXECUTE_ONLY_BIT,
- eptp & VMX_EPT_AD_ENABLE_BIT);
+ wants_ad);
vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
@@ -10728,8 +10743,7 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
}
- if (nested_cpu_has_ept(vmcs12))
- vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+ vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
if (nested_cpu_has_vid(vmcs12))
vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
@@ -10754,8 +10768,6 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
if (kvm_mpx_supported())
vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
- if (nested_cpu_has_xsaves(vmcs12))
- vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
}
/*
@@ -11152,7 +11164,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
vmx->hv_deadline_tsc = tscl + delta_tsc;
vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
- return 0;
+
+ return delta_tsc == 0;
}
static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0e846f0cb83b..6c7266f7766d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2841,10 +2841,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_vcpu_write_tsc_offset(vcpu, offset);
vcpu->arch.tsc_catchup = 1;
}
- if (kvm_lapic_hv_timer_in_use(vcpu) &&
- kvm_x86_ops->set_hv_timer(vcpu,
- kvm_get_lapic_target_expiration_tsc(vcpu)))
- kvm_lapic_switch_to_sw_timer(vcpu);
+
+ if (kvm_lapic_hv_timer_in_use(vcpu))
+ kvm_lapic_restart_hv_timer(vcpu);
+
/*
* On a host with synchronized TSC, there is no need to update
* kvmclock on vcpu->cpu migration
@@ -6011,7 +6011,7 @@ static void kvm_set_mmio_spte_mask(void)
mask &= ~1ull;
#endif
- kvm_mmu_set_mmio_spte_mask(mask);
+ kvm_mmu_set_mmio_spte_mask(mask, mask);
}
#ifdef CONFIG_X86_64
@@ -6733,7 +6733,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_immediate_exit = false;
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
kvm_mmu_unload(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@ -6897,7 +6897,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops->sync_pir_to_irr(vcpu);
}
- if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
+ if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
|| need_resched() || signal_pending(current)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index f2a7faf4706e..b33093f84528 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -19,9 +19,6 @@
*/
#define SKBDATA %r10
#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
-#define MAX_BPF_STACK (512 /* from filter.h */ + \
- 32 /* space for rbx,r13,r14,r15 */ + \
- 8 /* space for skb_copy_bits */)
#define FUNC(name) \
.globl name; \
@@ -66,7 +63,7 @@ FUNC(sk_load_byte_positive_offset)
/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \
- lea -MAX_BPF_STACK + 32(%rbp), %rdx;\
+ lea 32(%rbp), %rdx;\
FRAME_BEGIN; \
mov %rbx, %rdi; /* arg1 == skb */ \
push %r9; \
@@ -83,14 +80,14 @@ FUNC(sk_load_byte_positive_offset)
bpf_slow_path_word:
bpf_slow_path_common(4)
js bpf_error
- mov - MAX_BPF_STACK + 32(%rbp),%eax
+ mov 32(%rbp),%eax
bswap %eax
ret
bpf_slow_path_half:
bpf_slow_path_common(2)
js bpf_error
- mov - MAX_BPF_STACK + 32(%rbp),%ax
+ mov 32(%rbp),%ax
rol $8,%ax
movzwl %ax,%eax
ret
@@ -98,7 +95,7 @@ bpf_slow_path_half:
bpf_slow_path_byte:
bpf_slow_path_common(1)
js bpf_error
- movzbl - MAX_BPF_STACK + 32(%rbp),%eax
+ movzbl 32(%rbp),%eax
ret
#define sk_negative_common(SIZE) \
@@ -148,9 +145,10 @@ FUNC(sk_load_byte_negative_offset)
bpf_error:
# force a return 0 from jit handler
xor %eax,%eax
- mov - MAX_BPF_STACK(%rbp),%rbx
- mov - MAX_BPF_STACK + 8(%rbp),%r13
- mov - MAX_BPF_STACK + 16(%rbp),%r14
- mov - MAX_BPF_STACK + 24(%rbp),%r15
+ mov (%rbp),%rbx
+ mov 8(%rbp),%r13
+ mov 16(%rbp),%r14
+ mov 24(%rbp),%r15
+ add $40, %rbp
leaveq
ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f58939393eef..e1324f280e06 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -197,17 +197,16 @@ struct jit_context {
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
-#define STACKSIZE \
- (MAX_BPF_STACK + \
- 32 /* space for rbx, r13, r14, r15 */ + \
+#define AUX_STACK_SPACE \
+ (32 /* space for rbx, r13, r14, r15 */ + \
8 /* space for skb_copy_bits() buffer */)
-#define PROLOGUE_SIZE 48
+#define PROLOGUE_SIZE 37
/* emit x64 prologue code for BPF program and check it's size.
* bpf_tail_call helper will skip it while jumping into another program
*/
-static void emit_prologue(u8 **pprog)
+static void emit_prologue(u8 **pprog, u32 stack_depth)
{
u8 *prog = *pprog;
int cnt = 0;
@@ -215,13 +214,17 @@ static void emit_prologue(u8 **pprog)
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
- /* sub rsp, STACKSIZE */
- EMIT3_off32(0x48, 0x81, 0xEC, STACKSIZE);
+ /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
+ EMIT3_off32(0x48, 0x81, 0xEC,
+ round_up(stack_depth, 8) + AUX_STACK_SPACE);
+
+ /* sub rbp, AUX_STACK_SPACE */
+ EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
/* all classic BPF filters use R6(rbx) save it */
- /* mov qword ptr [rbp-X],rbx */
- EMIT3_off32(0x48, 0x89, 0x9D, -STACKSIZE);
+ /* mov qword ptr [rbp+0],rbx */
+ EMIT4(0x48, 0x89, 0x5D, 0);
/* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
* as temporary, so all tcpdump filters need to spill/fill R7(r13) and
@@ -231,12 +234,12 @@ static void emit_prologue(u8 **pprog)
* than synthetic ones. Therefore not worth adding complexity.
*/
- /* mov qword ptr [rbp-X],r13 */
- EMIT3_off32(0x4C, 0x89, 0xAD, -STACKSIZE + 8);
- /* mov qword ptr [rbp-X],r14 */
- EMIT3_off32(0x4C, 0x89, 0xB5, -STACKSIZE + 16);
- /* mov qword ptr [rbp-X],r15 */
- EMIT3_off32(0x4C, 0x89, 0xBD, -STACKSIZE + 24);
+ /* mov qword ptr [rbp+8],r13 */
+ EMIT4(0x4C, 0x89, 0x6D, 8);
+ /* mov qword ptr [rbp+16],r14 */
+ EMIT4(0x4C, 0x89, 0x75, 16);
+ /* mov qword ptr [rbp+24],r15 */
+ EMIT4(0x4C, 0x89, 0x7D, 24);
/* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
* we need to reset the counter to 0. It's done in two instructions,
@@ -246,8 +249,8 @@ static void emit_prologue(u8 **pprog)
/* xor eax, eax */
EMIT2(0x31, 0xc0);
- /* mov qword ptr [rbp-X], rax */
- EMIT3_off32(0x48, 0x89, 0x85, -STACKSIZE + 32);
+ /* mov qword ptr [rbp+32], rax */
+ EMIT4(0x48, 0x89, 0x45, 32);
BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
*pprog = prog;
@@ -289,13 +292,13 @@ static void emit_bpf_tail_call(u8 **pprog)
/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */
+ EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
#define OFFSET2 36
EMIT2(X86_JA, OFFSET2); /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
- EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
+ EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */
/* prog = array->ptrs[index]; */
EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
@@ -361,7 +364,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int proglen = 0;
u8 *prog = temp;
- emit_prologue(&prog);
+ emit_prologue(&prog, bpf_prog->aux->stack_depth);
if (seen_ld_abs)
emit_load_skb_data_hlen(&prog);
@@ -877,7 +880,7 @@ xadd: if (is_imm8(insn->off))
}
break;
- case BPF_JMP | BPF_CALL | BPF_X:
+ case BPF_JMP | BPF_TAIL_CALL:
emit_bpf_tail_call(&prog);
break;
@@ -1036,15 +1039,17 @@ common_load:
seen_exit = true;
/* update cleanup_addr */
ctx->cleanup_addr = proglen;
- /* mov rbx, qword ptr [rbp-X] */
- EMIT3_off32(0x48, 0x8B, 0x9D, -STACKSIZE);
- /* mov r13, qword ptr [rbp-X] */
- EMIT3_off32(0x4C, 0x8B, 0xAD, -STACKSIZE + 8);
- /* mov r14, qword ptr [rbp-X] */
- EMIT3_off32(0x4C, 0x8B, 0xB5, -STACKSIZE + 16);
- /* mov r15, qword ptr [rbp-X] */
- EMIT3_off32(0x4C, 0x8B, 0xBD, -STACKSIZE + 24);
-
+ /* mov rbx, qword ptr [rbp+0] */
+ EMIT4(0x48, 0x8B, 0x5D, 0);
+ /* mov r13, qword ptr [rbp+8] */
+ EMIT4(0x4C, 0x8B, 0x6D, 8);
+ /* mov r14, qword ptr [rbp+16] */
+ EMIT4(0x4C, 0x8B, 0x75, 16);
+ /* mov r15, qword ptr [rbp+24] */
+ EMIT4(0x4C, 0x8B, 0x7D, 24);
+
+ /* add rbp, AUX_STACK_SPACE */
+ EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE);
EMIT1(0xC9); /* leave */
EMIT1(0xC3); /* ret */
break;
@@ -1162,6 +1167,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bpf_jit_binary_lock_ro(header);
prog->bpf_func = (void *)image;
prog->jited = 1;
+ prog->jited_len = proglen;
} else {
prog = orig_prog;
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a5ffcbb20cc0..0e7ef69e8531 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -106,15 +106,83 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int),
return rc >= 0 ? 0 : rc;
}
-static void clamp_max_cpus(void)
+static int xen_vcpu_setup_restore(int cpu)
{
-#ifdef CONFIG_SMP
- if (setup_max_cpus > MAX_VIRT_CPUS)
- setup_max_cpus = MAX_VIRT_CPUS;
-#endif
+ int rc = 0;
+
+ /* Any per_cpu(xen_vcpu) is stale, so reset it */
+ xen_vcpu_info_reset(cpu);
+
+ /*
+ * For PVH and PVHVM, setup online VCPUs only. The rest will
+ * be handled by hotplug.
+ */
+ if (xen_pv_domain() ||
+ (xen_hvm_domain() && cpu_online(cpu))) {
+ rc = xen_vcpu_setup(cpu);
+ }
+
+ return rc;
+}
+
+/*
+ * On restore, set the vcpu placement up again.
+ * If it fails, then we're in a bad state, since
+ * we can't back out from using it...
+ */
+void xen_vcpu_restore(void)
+{
+ int cpu, rc;
+
+ for_each_possible_cpu(cpu) {
+ bool other_cpu = (cpu != smp_processor_id());
+ bool is_up;
+
+ if (xen_vcpu_nr(cpu) == XEN_VCPU_ID_INVALID)
+ continue;
+
+ /* Only Xen 4.5 and higher support this. */
+ is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up,
+ xen_vcpu_nr(cpu), NULL) > 0;
+
+ if (other_cpu && is_up &&
+ HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL))
+ BUG();
+
+ if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock))
+ xen_setup_runstate_info(cpu);
+
+ rc = xen_vcpu_setup_restore(cpu);
+ if (rc)
+ pr_emerg_once("vcpu restore failed for cpu=%d err=%d. "
+ "System will hang.\n", cpu, rc);
+ /*
+ * In case xen_vcpu_setup_restore() fails, do not bring up the
+ * VCPU. This helps us avoid the resulting OOPS when the VCPU
+ * accesses pvclock_vcpu_time via xen_vcpu (which is NULL.)
+ * Note that this does not improve the situation much -- now the
+ * VM hangs instead of OOPSing -- with the VCPUs that did not
+ * fail, spinning in stop_machine(), waiting for the failed
+ * VCPUs to come up.
+ */
+ if (other_cpu && is_up && (rc == 0) &&
+ HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
+ BUG();
+ }
}
-void xen_vcpu_setup(int cpu)
+void xen_vcpu_info_reset(int cpu)
+{
+ if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) {
+ per_cpu(xen_vcpu, cpu) =
+ &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
+ } else {
+ /* Set to NULL so that if somebody accesses it we get an OOPS */
+ per_cpu(xen_vcpu, cpu) = NULL;
+ }
+}
+
+int xen_vcpu_setup(int cpu)
{
struct vcpu_register_vcpu_info info;
int err;
@@ -123,11 +191,11 @@ void xen_vcpu_setup(int cpu)
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
/*
- * This path is called twice on PVHVM - first during bootup via
- * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being
- * hotplugged: cpu_up -> xen_hvm_cpu_notify.
- * As we can only do the VCPUOP_register_vcpu_info once lets
- * not over-write its result.
+ * This path is called on PVHVM at bootup (xen_hvm_smp_prepare_boot_cpu)
+ * and at restore (xen_vcpu_restore). Also called for hotplugged
+ * VCPUs (cpu_init -> xen_hvm_cpu_prepare_hvm).
+ * However, the hypercall can only be done once (see below) so if a VCPU
+ * is offlined and comes back online then let's not redo the hypercall.
*
* For PV it is called during restore (xen_vcpu_restore) and bootup
* (xen_setup_vcpu_info_placement). The hotplug mechanism does not
@@ -135,42 +203,44 @@ void xen_vcpu_setup(int cpu)
*/
if (xen_hvm_domain()) {
if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
- return;
+ return 0;
}
- if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
- per_cpu(xen_vcpu, cpu) =
- &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
- if (!xen_have_vcpu_info_placement) {
- if (cpu >= MAX_VIRT_CPUS)
- clamp_max_cpus();
- return;
+ if (xen_have_vcpu_info_placement) {
+ vcpup = &per_cpu(xen_vcpu_info, cpu);
+ info.mfn = arbitrary_virt_to_mfn(vcpup);
+ info.offset = offset_in_page(vcpup);
+
+ /*
+ * Check to see if the hypervisor will put the vcpu_info
+ * structure where we want it, which allows direct access via
+ * a percpu-variable.
+ * N.B. This hypercall can _only_ be called once per CPU.
+ * Subsequent calls will error out with -EINVAL. This is due to
+ * the fact that hypervisor has no unregister variant and this
+ * hypercall does not allow to over-write info.mfn and
+ * info.offset.
+ */
+ err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info,
+ xen_vcpu_nr(cpu), &info);
+
+ if (err) {
+ pr_warn_once("register_vcpu_info failed: cpu=%d err=%d\n",
+ cpu, err);
+ xen_have_vcpu_info_placement = 0;
+ } else {
+ /*
+ * This cpu is using the registered vcpu info, even if
+ * later ones fail to.
+ */
+ per_cpu(xen_vcpu, cpu) = vcpup;
+ }
}
- vcpup = &per_cpu(xen_vcpu_info, cpu);
- info.mfn = arbitrary_virt_to_mfn(vcpup);
- info.offset = offset_in_page(vcpup);
-
- /* Check to see if the hypervisor will put the vcpu_info
- structure where we want it, which allows direct access via
- a percpu-variable.
- N.B. This hypercall can _only_ be called once per CPU. Subsequent
- calls will error out with -EINVAL. This is due to the fact that
- hypervisor has no unregister variant and this hypercall does not
- allow to over-write info.mfn and info.offset.
- */
- err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
- &info);
+ if (!xen_have_vcpu_info_placement)
+ xen_vcpu_info_reset(cpu);
- if (err) {
- printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
- xen_have_vcpu_info_placement = 0;
- clamp_max_cpus();
- } else {
- /* This cpu is using the registered vcpu info, even if
- later ones fail to. */
- per_cpu(xen_vcpu, cpu) = vcpup;
- }
+ return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0);
}
void xen_reboot(int reason)
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index a6d014f47e52..87d791356ea9 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -1,5 +1,6 @@
#include <linux/cpu.h>
#include <linux/kexec.h>
+#include <linux/memblock.h>
#include <xen/features.h>
#include <xen/events.h>
@@ -10,9 +11,11 @@
#include <asm/reboot.h>
#include <asm/setup.h>
#include <asm/hypervisor.h>
+#include <asm/e820/api.h>
#include <asm/xen/cpuid.h>
#include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>
#include "xen-ops.h"
#include "mmu.h"
@@ -20,37 +23,34 @@
void __ref xen_hvm_init_shared_info(void)
{
- int cpu;
struct xen_add_to_physmap xatp;
- static struct shared_info *shared_info_page;
+ u64 pa;
+
+ if (HYPERVISOR_shared_info == &xen_dummy_shared_info) {
+ /*
+ * Search for a free page starting at 4kB physical address.
+ * Low memory is preferred to avoid an EPT large page split up
+ * by the mapping.
+ * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
+ * the BIOS used for HVM guests is well behaved and won't
+ * clobber memory other than the first 4kB.
+ */
+ for (pa = PAGE_SIZE;
+ !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
+ memblock_is_reserved(pa);
+ pa += PAGE_SIZE)
+ ;
+
+ memblock_reserve(pa, PAGE_SIZE);
+ HYPERVISOR_shared_info = __va(pa);
+ }
- if (!shared_info_page)
- shared_info_page = (struct shared_info *)
- extend_brk(PAGE_SIZE, PAGE_SIZE);
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
- xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
+ xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info);
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
BUG();
-
- HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
-
- /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
- * page, we use it in the event channel upcall and in some pvclock
- * related functions. We don't need the vcpu_info placement
- * optimizations because we don't use any pv_mmu or pv_irq op on
- * HVM.
- * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
- * online but xen_hvm_init_shared_info is run at resume time too and
- * in that case multiple vcpus might be online. */
- for_each_online_cpu(cpu) {
- /* Leave it to be NULL. */
- if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
- continue;
- per_cpu(xen_vcpu, cpu) =
- &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
- }
}
static void __init init_hvm_pv_info(void)
@@ -106,7 +106,7 @@ static void xen_hvm_crash_shutdown(struct pt_regs *regs)
static int xen_cpu_up_prepare_hvm(unsigned int cpu)
{
- int rc;
+ int rc = 0;
/*
* This can happen if CPU was offlined earlier and
@@ -121,7 +121,9 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu)
per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
else
per_cpu(xen_vcpu_id, cpu) = cpu;
- xen_vcpu_setup(cpu);
+ rc = xen_vcpu_setup(cpu);
+ if (rc)
+ return rc;
if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
xen_setup_timer(cpu);
@@ -130,9 +132,8 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu)
if (rc) {
WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
cpu, rc);
- return rc;
}
- return 0;
+ return rc;
}
static int xen_cpu_dead_hvm(unsigned int cpu)
@@ -154,6 +155,13 @@ static void __init xen_hvm_guest_init(void)
xen_hvm_init_shared_info();
+ /*
+ * xen_vcpu is a pointer to the vcpu_info struct in the shared_info
+ * page, we use it in the event channel upcall and in some pvclock
+ * related functions.
+ */
+ xen_vcpu_info_reset(0);
+
xen_panic_handler_init();
if (xen_feature(XENFEAT_hvm_callback_vector))
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index f33eef4ebd12..811e4ddb3f37 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -89,8 +89,6 @@
void *xen_initial_gdt;
-RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
-
static int xen_cpu_up_prepare_pv(unsigned int cpu);
static int xen_cpu_dead_pv(unsigned int cpu);
@@ -107,35 +105,6 @@ struct tls_descs {
*/
static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
-/*
- * On restore, set the vcpu placement up again.
- * If it fails, then we're in a bad state, since
- * we can't back out from using it...
- */
-void xen_vcpu_restore(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- bool other_cpu = (cpu != smp_processor_id());
- bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu),
- NULL);
-
- if (other_cpu && is_up &&
- HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL))
- BUG();
-
- xen_setup_runstate_info(cpu);
-
- if (xen_have_vcpu_info_placement)
- xen_vcpu_setup(cpu);
-
- if (other_cpu && is_up &&
- HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
- BUG();
- }
-}
-
static void __init xen_banner(void)
{
unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
@@ -960,30 +929,43 @@ void xen_setup_shared_info(void)
HYPERVISOR_shared_info =
(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
-#ifndef CONFIG_SMP
- /* In UP this is as good a place as any to set up shared info */
- xen_setup_vcpu_info_placement();
-#endif
-
xen_setup_mfn_list_list();
- /*
- * Now that shared info is set up we can start using routines that
- * point to pvclock area.
- */
- if (system_state == SYSTEM_BOOTING)
+ if (system_state == SYSTEM_BOOTING) {
+#ifndef CONFIG_SMP
+ /*
+ * In UP this is as good a place as any to set up shared info.
+ * Limit this to boot only, at restore vcpu setup is done via
+ * xen_vcpu_restore().
+ */
+ xen_setup_vcpu_info_placement();
+#endif
+ /*
+ * Now that shared info is set up we can start using routines
+ * that point to pvclock area.
+ */
xen_init_time_ops();
+ }
}
/* This is called once we have the cpu_possible_mask */
-void xen_setup_vcpu_info_placement(void)
+void __ref xen_setup_vcpu_info_placement(void)
{
int cpu;
for_each_possible_cpu(cpu) {
/* Set up direct vCPU id mapping for PV guests. */
per_cpu(xen_vcpu_id, cpu) = cpu;
- xen_vcpu_setup(cpu);
+
+ /*
+ * xen_vcpu_setup(cpu) can fail -- in which case it
+ * falls back to the shared_info version for cpus
+ * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS.
+ *
+ * xen_cpu_up_prepare_pv() handles the rest by failing
+ * them in hotplug.
+ */
+ (void) xen_vcpu_setup(cpu);
}
/*
@@ -1332,9 +1314,17 @@ asmlinkage __visible void __init xen_start_kernel(void)
*/
acpi_numa = -1;
#endif
- /* Don't do the full vcpu_info placement stuff until we have a
- possible map and a non-dummy shared_info. */
- per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
+ /* Let's presume PV guests always boot on vCPU with id 0. */
+ per_cpu(xen_vcpu_id, 0) = 0;
+
+ /*
+ * Setup xen_vcpu early because start_kernel needs it for
+ * local_irq_disable(), irqs_disabled().
+ *
+ * Don't do the full vcpu_info placement stuff until we have
+ * the cpu_possible_mask and a non-dummy shared_info.
+ */
+ xen_vcpu_info_reset(0);
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
@@ -1431,9 +1421,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
#endif
xen_raw_console_write("about to get started...\n");
- /* Let's presume PV guests always boot on vCPU with id 0. */
- per_cpu(xen_vcpu_id, 0) = 0;
-
+ /* We need this for printk timestamps */
xen_setup_runstate_info(0);
xen_efi_init();
@@ -1451,6 +1439,9 @@ static int xen_cpu_up_prepare_pv(unsigned int cpu)
{
int rc;
+ if (per_cpu(xen_vcpu, cpu) == NULL)
+ return -ENODEV;
+
xen_setup_timer(cpu);
rc = xen_smp_intr_init(cpu);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a5bf7c451435..c81046323ebc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -499,7 +499,7 @@ static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages,
void __init xen_remap_memory(void)
{
unsigned long buf = (unsigned long)&xen_remap_buf;
- unsigned long mfn_save, mfn, pfn;
+ unsigned long mfn_save, pfn;
unsigned long remapped = 0;
unsigned int i;
unsigned long pfn_s = ~0UL;
@@ -515,8 +515,7 @@ void __init xen_remap_memory(void)
pfn = xen_remap_buf.target_pfn;
for (i = 0; i < xen_remap_buf.size; i++) {
- mfn = xen_remap_buf.mfns[i];
- xen_update_mem_tables(pfn, mfn);
+ xen_update_mem_tables(pfn, xen_remap_buf.mfns[i]);
remapped++;
pfn++;
}
@@ -530,8 +529,6 @@ void __init xen_remap_memory(void)
pfn_s = xen_remap_buf.target_pfn;
len = xen_remap_buf.size;
}
-
- mfn = xen_remap_mfn;
xen_remap_mfn = xen_remap_buf.next_area_mfn;
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 82ac611f2fc1..e7f02eb73727 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -1,4 +1,5 @@
#include <linux/smp.h>
+#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/cpumask.h>
#include <linux/percpu.h>
@@ -114,6 +115,36 @@ int xen_smp_intr_init(unsigned int cpu)
return rc;
}
+void __init xen_smp_cpus_done(unsigned int max_cpus)
+{
+ int cpu, rc, count = 0;
+
+ if (xen_hvm_domain())
+ native_smp_cpus_done(max_cpus);
+
+ if (xen_have_vcpu_info_placement)
+ return;
+
+ for_each_online_cpu(cpu) {
+ if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
+ continue;
+
+ rc = cpu_down(cpu);
+
+ if (rc == 0) {
+ /*
+ * Reset vcpu_info so this cpu cannot be onlined again.
+ */
+ xen_vcpu_info_reset(cpu);
+ count++;
+ } else {
+ pr_warn("%s: failed to bring CPU %d down, error %d\n",
+ __func__, cpu, rc);
+ }
+ }
+ WARN(count, "%s: brought %d CPUs offline\n", __func__, count);
+}
+
void xen_smp_send_reschedule(int cpu)
{
xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index 8ebb6acca64a..87d3c76cba37 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -14,6 +14,8 @@ extern void xen_smp_intr_free(unsigned int cpu);
int xen_smp_intr_init_pv(unsigned int cpu);
void xen_smp_intr_free_pv(unsigned int cpu);
+void xen_smp_cpus_done(unsigned int max_cpus);
+
void xen_smp_send_reschedule(int cpu);
void xen_smp_send_call_function_ipi(const struct cpumask *mask);
void xen_smp_send_call_function_single_ipi(int cpu);
diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c
index f18561bbf5c9..fd60abedf658 100644
--- a/arch/x86/xen/smp_hvm.c
+++ b/arch/x86/xen/smp_hvm.c
@@ -12,7 +12,8 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void)
native_smp_prepare_boot_cpu();
/*
- * Setup vcpu_info for boot CPU.
+ * Setup vcpu_info for boot CPU. Secondary CPUs get their vcpu_info
+ * in xen_cpu_up_prepare_hvm().
*/
xen_vcpu_setup(0);
@@ -27,10 +28,20 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void)
static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
{
+ int cpu;
+
native_smp_prepare_cpus(max_cpus);
WARN_ON(xen_smp_intr_init(0));
xen_init_lock_cpu(0);
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == 0)
+ continue;
+
+ /* Set default vcpu_id to make sure that we don't use cpu-0's */
+ per_cpu(xen_vcpu_id, cpu) = XEN_VCPU_ID_INVALID;
+ }
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -60,4 +71,5 @@ void __init xen_hvm_smp_init(void)
smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
+ smp_ops.smp_cpus_done = xen_smp_cpus_done;
}
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index aae32535f4ec..1ea598e5f030 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -371,10 +371,6 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
return 0;
}
-static void xen_pv_smp_cpus_done(unsigned int max_cpus)
-{
-}
-
#ifdef CONFIG_HOTPLUG_CPU
static int xen_pv_cpu_disable(void)
{
@@ -469,7 +465,7 @@ static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
static const struct smp_ops xen_smp_ops __initconst = {
.smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu,
.smp_prepare_cpus = xen_pv_smp_prepare_cpus,
- .smp_cpus_done = xen_pv_smp_cpus_done,
+ .smp_cpus_done = xen_smp_cpus_done,
.cpu_up = xen_pv_cpu_up,
.cpu_die = xen_pv_cpu_die,
diff --git a/arch/x86/xen/suspend_hvm.c b/arch/x86/xen/suspend_hvm.c
index 01afcadde50a..484999416d8b 100644
--- a/arch/x86/xen/suspend_hvm.c
+++ b/arch/x86/xen/suspend_hvm.c
@@ -8,15 +8,10 @@
void xen_hvm_post_suspend(int suspend_cancelled)
{
- int cpu;
-
- if (!suspend_cancelled)
+ if (!suspend_cancelled) {
xen_hvm_init_shared_info();
+ xen_vcpu_restore();
+ }
xen_callback_vector();
xen_unplug_emulated_devices();
- if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
- for_each_online_cpu(cpu) {
- xen_setup_runstate_info(cpu);
- }
- }
}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9a440a42c618..0d5004477db6 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -78,7 +78,8 @@ bool xen_vcpu_stolen(int vcpu);
extern int xen_have_vcpu_info_placement;
-void xen_vcpu_setup(int cpu);
+int xen_vcpu_setup(int cpu);
+void xen_vcpu_info_reset(int cpu);
void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 1eb6d2fe70d3..3eed2761c149 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -109,4 +109,8 @@
#define SO_COOKIE 57
+#define SCM_TIMESTAMPING_PKTINFO 58
+
+#define SO_PEERGROUPS 59
+
#endif /* _XTENSA_SOCKET_H */