diff options
Diffstat (limited to 'arch')
189 files changed, 2914 insertions, 2597 deletions
diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h index 7b82dc9a8556..133a4884ed44 100644 --- a/arch/alpha/include/asm/uaccess.h +++ b/arch/alpha/include/asm/uaccess.h @@ -326,7 +326,6 @@ clear_user(void __user *to, long len) (uaccess_kernel() ? ~0UL : TASK_SIZE) extern long strncpy_from_user(char *dest, const char __user *src, long count); -extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); #include <asm/extable.h> diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index a56e608db2f9..b37153ecf2ac 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -10,7 +10,6 @@ #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_FADVISE64 #define __ARCH_WANT_SYS_GETPGRP -#define __ARCH_WANT_SYS_OLD_GETRLIMIT #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_FORK diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index b23d6fbbb225..df0d0a5e9353 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -564,25 +564,20 @@ SYSCALL_DEFINE0(getdtablesize) */ SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen) { - unsigned len; - int i; + int len, err = 0; + char *kname; - if (!access_ok(VERIFY_WRITE, name, namelen)) - return -EFAULT; - - len = namelen; - if (len > 32) - len = 32; + if (namelen > 32) + namelen = 32; down_read(&uts_sem); - for (i = 0; i < len; ++i) { - __put_user(utsname()->domainname[i], name + i); - if (utsname()->domainname[i] == '\0') - break; - } + kname = utsname()->domainname; + len = strnlen(kname, namelen); + if (copy_to_user(name, kname, min(len + 1, namelen))) + err = -EFAULT; up_read(&uts_sem); - return 0; + return err; } /* @@ -718,9 +713,8 @@ SYSCALL_DEFINE2(osf_sigstack, struct sigstack __user *, uss, if (uoss) { error = -EFAULT; - if (! access_ok(VERIFY_WRITE, uoss, sizeof(*uoss)) - || __put_user(oss_sp, &uoss->ss_sp) - || __put_user(oss_os, &uoss->ss_onstack)) + if (put_user(oss_sp, &uoss->ss_sp) || + put_user(oss_os, &uoss->ss_onstack)) goto out; } @@ -957,37 +951,45 @@ struct itimerval32 static inline long get_tv32(struct timeval *o, struct timeval32 __user *i) { - return (!access_ok(VERIFY_READ, i, sizeof(*i)) || - (__get_user(o->tv_sec, &i->tv_sec) | - __get_user(o->tv_usec, &i->tv_usec))); + struct timeval32 tv; + if (copy_from_user(&tv, i, sizeof(struct timeval32))) + return -EFAULT; + o->tv_sec = tv.tv_sec; + o->tv_usec = tv.tv_usec; + return 0; } static inline long put_tv32(struct timeval32 __user *o, struct timeval *i) { - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->tv_sec, &o->tv_sec) | - __put_user(i->tv_usec, &o->tv_usec))); + return copy_to_user(o, &(struct timeval32){ + .tv_sec = o->tv_sec, + .tv_usec = o->tv_usec}, + sizeof(struct timeval32)); } static inline long get_it32(struct itimerval *o, struct itimerval32 __user *i) { - return (!access_ok(VERIFY_READ, i, sizeof(*i)) || - (__get_user(o->it_interval.tv_sec, &i->it_interval.tv_sec) | - __get_user(o->it_interval.tv_usec, &i->it_interval.tv_usec) | - __get_user(o->it_value.tv_sec, &i->it_value.tv_sec) | - __get_user(o->it_value.tv_usec, &i->it_value.tv_usec))); + struct itimerval32 itv; + if (copy_from_user(&itv, i, sizeof(struct itimerval32))) + return -EFAULT; + o->it_interval.tv_sec = itv.it_interval.tv_sec; + o->it_interval.tv_usec = itv.it_interval.tv_usec; + o->it_value.tv_sec = itv.it_value.tv_sec; + o->it_value.tv_usec = itv.it_value.tv_usec; + return 0; } static inline long put_it32(struct itimerval32 __user *o, struct itimerval *i) { - return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || - (__put_user(i->it_interval.tv_sec, &o->it_interval.tv_sec) | - __put_user(i->it_interval.tv_usec, &o->it_interval.tv_usec) | - __put_user(i->it_value.tv_sec, &o->it_value.tv_sec) | - __put_user(i->it_value.tv_usec, &o->it_value.tv_usec))); + return copy_to_user(o, &(struct itimerval32){ + .it_interval.tv_sec = o->it_interval.tv_sec, + .it_interval.tv_usec = o->it_interval.tv_usec, + .it_value.tv_sec = o->it_value.tv_sec, + .it_value.tv_usec = o->it_value.tv_usec}, + sizeof(struct itimerval32)); } static inline void @@ -1106,20 +1108,17 @@ SYSCALL_DEFINE5(osf_select, int, n, fd_set __user *, inp, fd_set __user *, outp, { struct timespec end_time, *to = NULL; if (tvp) { - time_t sec, usec; - + struct timeval tv; to = &end_time; - if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp)) - || __get_user(sec, &tvp->tv_sec) - || __get_user(usec, &tvp->tv_usec)) { + if (get_tv32(&tv, tvp)) return -EFAULT; - } - if (sec < 0 || usec < 0) + if (tv.tv_sec < 0 || tv.tv_usec < 0) return -EINVAL; - if (poll_select_set_timeout(to, sec, usec * NSEC_PER_USEC)) + if (poll_select_set_timeout(to, tv.tv_sec, + tv.tv_usec * NSEC_PER_USEC)) return -EINVAL; } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index abd59fad1a34..0b731e8ab17e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -22,6 +22,7 @@ config ARM select CLONE_BACKWARDS select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS + select DMA_NOOP_OPS if !MMU select EDAC_SUPPORT select EDAC_ATOMIC_SCRUB select GENERIC_ALLOCATOR diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 9b1b7be2ec0e..9a92de63426f 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -33,6 +33,7 @@ #include <linux/scatterlist.h> #include <asm/cacheflush.h> +#include <asm/dma-iommu.h> #undef STATS @@ -256,7 +257,7 @@ static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size, if (buf == NULL) { dev_err(dev, "%s: unable to map unsafe buffer %p!\n", __func__, ptr); - return DMA_ERROR_CODE; + return ARM_MAPPING_ERROR; } dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", @@ -326,7 +327,7 @@ static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page, ret = needs_bounce(dev, dma_addr, size); if (ret < 0) - return DMA_ERROR_CODE; + return ARM_MAPPING_ERROR; if (ret == 0) { arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir); @@ -335,7 +336,7 @@ static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page, if (PageHighMem(page)) { dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n"); - return DMA_ERROR_CODE; + return ARM_MAPPING_ERROR; } return map_single(dev, page_address(page) + offset, size, dir, attrs); @@ -444,12 +445,17 @@ static void dmabounce_sync_for_device(struct device *dev, arm_dma_ops.sync_single_for_device(dev, handle, size, dir); } -static int dmabounce_set_mask(struct device *dev, u64 dma_mask) +static int dmabounce_dma_supported(struct device *dev, u64 dma_mask) { if (dev->archdata.dmabounce) return 0; - return arm_dma_ops.set_dma_mask(dev, dma_mask); + return arm_dma_ops.dma_supported(dev, dma_mask); +} + +static int dmabounce_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return arm_dma_ops.mapping_error(dev, dma_addr); } static const struct dma_map_ops dmabounce_ops = { @@ -465,7 +471,8 @@ static const struct dma_map_ops dmabounce_ops = { .unmap_sg = arm_dma_unmap_sg, .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, .sync_sg_for_device = arm_dma_sync_sg_for_device, - .set_dma_mask = dmabounce_set_mask, + .dma_supported = dmabounce_dma_supported, + .mapping_error = dmabounce_mapping_error, }; static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev, diff --git a/arch/arm/configs/lpc32xx_defconfig b/arch/arm/configs/lpc32xx_defconfig index 6ba430d2b5b2..e15fa5f168bb 100644 --- a/arch/arm/configs/lpc32xx_defconfig +++ b/arch/arm/configs/lpc32xx_defconfig @@ -112,7 +112,7 @@ CONFIG_GPIO_SX150X=y CONFIG_GPIO_74X164=y CONFIG_GPIO_MAX7301=y CONFIG_GPIO_MC33880=y -CONFIG_GPIO_MCP23S08=y +CONFIG_PINCTRL_MCP23S08=y CONFIG_SENSORS_DS620=y CONFIG_SENSORS_MAX6639=y CONFIG_WATCHDOG=y diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index 2ef282f96651..c090ec675eac 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h @@ -9,6 +9,8 @@ #include <linux/kmemcheck.h> #include <linux/kref.h> +#define ARM_MAPPING_ERROR (~(dma_addr_t)0x0) + struct dma_iommu_mapping { /* iommu specific data */ struct iommu_domain *domain; @@ -33,5 +35,7 @@ int arm_iommu_attach_device(struct device *dev, struct dma_iommu_mapping *mapping); void arm_iommu_detach_device(struct device *dev); +int arm_dma_supported(struct device *dev, u64 mask); + #endif /* __KERNEL__ */ #endif diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 680d3f3889e7..4e0285a66ef8 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -12,18 +12,14 @@ #include <xen/xen.h> #include <asm/xen/hypervisor.h> -#define DMA_ERROR_CODE (~(dma_addr_t)0x0) extern const struct dma_map_ops arm_dma_ops; extern const struct dma_map_ops arm_coherent_dma_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - return &arm_dma_ops; + return IS_ENABLED(CONFIG_MMU) ? &arm_dma_ops : &dma_noop_ops; } -#define HAVE_ARCH_DMA_SUPPORTED 1 -extern int dma_supported(struct device *dev, u64 mask); - #ifdef __arch_page_to_dma #error Please update to __arch_pfn_to_dma #endif diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index f0e66577ce05..127e2dd2e21c 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -44,7 +44,9 @@ #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS #endif -#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_SLEEP \ + KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int __attribute_const__ kvm_target_cpu(void); @@ -233,8 +235,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); -void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu); -void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu); int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); @@ -291,20 +291,12 @@ static inline void kvm_arm_init_debug(void) {} static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} -static inline int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, - struct kvm_device_attr *attr) -{ - return -ENXIO; -} -static inline int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, - struct kvm_device_attr *attr) -{ - return -ENXIO; -} -static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, - struct kvm_device_attr *attr) -{ - return -ENXIO; -} + +int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); +int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); +int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 2577405d082d..6838abc04279 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -526,7 +526,6 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo /* These are from lib/ code, and use __get_user() and friends */ extern long strncpy_from_user(char *dest, const char __user *src, long count); -extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); #endif /* _ASMARM_UACCESS_H */ diff --git a/arch/arm/include/asm/xen/events.h b/arch/arm/include/asm/xen/events.h index 71e473d05fcc..620dc75362e5 100644 --- a/arch/arm/include/asm/xen/events.h +++ b/arch/arm/include/asm/xen/events.h @@ -16,7 +16,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) return raw_irqs_disabled_flags(regs->ARM_cpsr); } -#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((ptr), \ +#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((long long*)(ptr),\ atomic64_t, \ counter), (val)) diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 5e3c673fa3f4..5db2d4c6a55f 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -203,6 +203,14 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff #define VGIC_LEVEL_INFO_LINE_LEVEL 0 +/* Device Control API on vcpu fd */ +#define KVM_ARM_VCPU_PMU_V3_CTRL 0 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_TIMER_CTRL 1 +#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 +#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 #define KVM_DEV_ARM_ITS_SAVE_TABLES 1 #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index fa6182a40941..1e0784ebbfd6 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -301,3 +301,54 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, { return -EINVAL; } + +int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_set_attr(vcpu, attr); + break; + default: + ret = -ENXIO; + break; + } + + return ret; +} + +int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_get_attr(vcpu, attr); + break; + default: + ret = -ENXIO; + break; + } + + return ret; +} + +int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_has_attr(vcpu, attr); + break; + default: + ret = -ENXIO; + break; + } + + return ret; +} diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index f86a9aaef462..54442e375354 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -72,6 +72,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_wfx(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; kvm_vcpu_block(vcpu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); } kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index 624a510d31df..ebd2dd46adf7 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -237,8 +237,10 @@ void __hyp_text __noreturn __hyp_panic(int cause) vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR); host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + __timer_save_state(vcpu); __deactivate_traps(vcpu); __deactivate_vm(vcpu); + __banked_restore_state(host_ctxt); __sysreg_restore_state(host_ctxt); } diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 1da8b2d14550..5ed0c3ee33d6 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -37,16 +37,6 @@ static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; -static const struct kvm_irq_level cortexa_ptimer_irq = { - { .irq = 30 }, - .level = 1, -}; - -static const struct kvm_irq_level cortexa_vtimer_irq = { - { .irq = 27 }, - .level = 1, -}; - /******************************************************************************* * Exported reset function @@ -62,16 +52,12 @@ static const struct kvm_irq_level cortexa_vtimer_irq = { int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { struct kvm_regs *reset_regs; - const struct kvm_irq_level *cpu_vtimer_irq; - const struct kvm_irq_level *cpu_ptimer_irq; switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A7: case KVM_ARM_TARGET_CORTEX_A15: reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); - cpu_vtimer_irq = &cortexa_vtimer_irq; - cpu_ptimer_irq = &cortexa_ptimer_irq; break; default: return -ENODEV; @@ -84,5 +70,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_reset_coprocs(vcpu); /* Reset arch_timer context */ - return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); + return kvm_timer_vcpu_reset(vcpu); } diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index c6c4c9c8824b..877a0e3fd17d 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -1045,8 +1045,8 @@ config ARM_L1_CACHE_SHIFT default 5 config ARM_DMA_MEM_BUFFERABLE - bool "Use non-cacheable memory for DMA" if (CPU_V6 || CPU_V6K) && !CPU_V7 - default y if CPU_V6 || CPU_V6K || CPU_V7 + bool "Use non-cacheable memory for DMA" if (CPU_V6 || CPU_V6K || CPU_V7M) && !CPU_V7 + default y if CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M help Historically, the kernel has used strongly ordered mappings to provide DMA coherent memory. With the advent of ARMv7, mapping @@ -1061,6 +1061,10 @@ config ARM_DMA_MEM_BUFFERABLE and therefore turning this on may result in unpredictable driver behaviour. Therefore, we offer this as an option. + On some of the beefier ARMv7-M machines (with DMA and write + buffers) you likely want this enabled, while those that + didn't need it until now also won't need it in the future. + You are recommended say 'Y' here and debug any affected drivers. config ARM_HEAVY_MB diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index b3dea80715b4..950d19babb5f 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -2,9 +2,8 @@ # Makefile for the linux arm-specific parts of the memory manager. # -obj-y := dma-mapping.o extable.o fault.o init.o \ - iomap.o - +obj-y := extable.o fault.o init.o iomap.o +obj-y += dma-mapping$(MMUEXT).o obj-$(CONFIG_MMU) += fault-armv.o flush.o idmap.o ioremap.o \ mmap.o pgd.o mmu.o pageattr.o diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c new file mode 100644 index 000000000000..90ee354d803e --- /dev/null +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -0,0 +1,228 @@ +/* + * Based on linux/arch/arm/mm/dma-mapping.c + * + * Copyright (C) 2000-2004 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/dma-mapping.h> +#include <linux/scatterlist.h> + +#include <asm/cachetype.h> +#include <asm/cacheflush.h> +#include <asm/outercache.h> +#include <asm/cp15.h> + +#include "dma.h" + +/* + * dma_noop_ops is used if + * - MMU/MPU is off + * - cpu is v7m w/o cache support + * - device is coherent + * otherwise arm_nommu_dma_ops is used. + * + * arm_nommu_dma_ops rely on consistent DMA memory (please, refer to + * [1] on how to declare such memory). + * + * [1] Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt + */ + +static void *arm_nommu_dma_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + unsigned long attrs) + +{ + const struct dma_map_ops *ops = &dma_noop_ops; + + /* + * We are here because: + * - no consistent DMA region has been defined, so we can't + * continue. + * - there is no space left in consistent DMA region, so we + * only can fallback to generic allocator if we are + * advertised that consistency is not required. + */ + + if (attrs & DMA_ATTR_NON_CONSISTENT) + return ops->alloc(dev, size, dma_handle, gfp, attrs); + + WARN_ON_ONCE(1); + return NULL; +} + +static void arm_nommu_dma_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr, + unsigned long attrs) +{ + const struct dma_map_ops *ops = &dma_noop_ops; + + if (attrs & DMA_ATTR_NON_CONSISTENT) + ops->free(dev, size, cpu_addr, dma_addr, attrs); + else + WARN_ON_ONCE(1); + + return; +} + +static void __dma_page_cpu_to_dev(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + dmac_map_area(__va(paddr), size, dir); + + if (dir == DMA_FROM_DEVICE) + outer_inv_range(paddr, paddr + size); + else + outer_clean_range(paddr, paddr + size); +} + +static void __dma_page_dev_to_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + if (dir != DMA_TO_DEVICE) { + outer_inv_range(paddr, paddr + size); + dmac_unmap_area(__va(paddr), size, dir); + } +} + +static dma_addr_t arm_nommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + dma_addr_t handle = page_to_phys(page) + offset; + + __dma_page_cpu_to_dev(handle, size, dir); + + return handle; +} + +static void arm_nommu_dma_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + __dma_page_dev_to_cpu(handle, size, dir); +} + + +static int arm_nommu_dma_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + sg_dma_address(sg) = sg_phys(sg); + sg_dma_len(sg) = sg->length; + __dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir); + } + + return nents; +} + +static void arm_nommu_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + __dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir); +} + +static void arm_nommu_dma_sync_single_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + __dma_page_cpu_to_dev(handle, size, dir); +} + +static void arm_nommu_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + __dma_page_cpu_to_dev(handle, size, dir); +} + +static void arm_nommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + __dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir); +} + +static void arm_nommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + __dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir); +} + +const struct dma_map_ops arm_nommu_dma_ops = { + .alloc = arm_nommu_dma_alloc, + .free = arm_nommu_dma_free, + .map_page = arm_nommu_dma_map_page, + .unmap_page = arm_nommu_dma_unmap_page, + .map_sg = arm_nommu_dma_map_sg, + .unmap_sg = arm_nommu_dma_unmap_sg, + .sync_single_for_device = arm_nommu_dma_sync_single_for_device, + .sync_single_for_cpu = arm_nommu_dma_sync_single_for_cpu, + .sync_sg_for_device = arm_nommu_dma_sync_sg_for_device, + .sync_sg_for_cpu = arm_nommu_dma_sync_sg_for_cpu, +}; +EXPORT_SYMBOL(arm_nommu_dma_ops); + +static const struct dma_map_ops *arm_nommu_get_dma_map_ops(bool coherent) +{ + return coherent ? &dma_noop_ops : &arm_nommu_dma_ops; +} + +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent) +{ + const struct dma_map_ops *dma_ops; + + if (IS_ENABLED(CONFIG_CPU_V7M)) { + /* + * Cache support for v7m is optional, so can be treated as + * coherent if no cache has been detected. Note that it is not + * enough to check if MPU is in use or not since in absense of + * MPU system memory map is used. + */ + dev->archdata.dma_coherent = (cacheid) ? coherent : true; + } else { + /* + * Assume coherent DMA in case MMU/MPU has not been set up. + */ + dev->archdata.dma_coherent = (get_cr() & CR_M) ? coherent : true; + } + + dma_ops = arm_nommu_get_dma_map_ops(dev->archdata.dma_coherent); + + set_dma_ops(dev, dma_ops); +} + +void arch_teardown_dma_ops(struct device *dev) +{ +} + +#define PREALLOC_DMA_DEBUG_ENTRIES 4096 + +static int __init dma_debug_do_init(void) +{ + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + return 0; +} +core_initcall(dma_debug_do_init); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index bd83c531828a..e7380bafbfa6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -180,6 +180,11 @@ static void arm_dma_sync_single_for_device(struct device *dev, __dma_page_cpu_to_dev(page, offset, size, dir); } +static int arm_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == ARM_MAPPING_ERROR; +} + const struct dma_map_ops arm_dma_ops = { .alloc = arm_dma_alloc, .free = arm_dma_free, @@ -193,6 +198,8 @@ const struct dma_map_ops arm_dma_ops = { .sync_single_for_device = arm_dma_sync_single_for_device, .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, .sync_sg_for_device = arm_dma_sync_sg_for_device, + .mapping_error = arm_dma_mapping_error, + .dma_supported = arm_dma_supported, }; EXPORT_SYMBOL(arm_dma_ops); @@ -211,6 +218,8 @@ const struct dma_map_ops arm_coherent_dma_ops = { .get_sgtable = arm_dma_get_sgtable, .map_page = arm_coherent_dma_map_page, .map_sg = arm_dma_map_sg, + .mapping_error = arm_dma_mapping_error, + .dma_supported = arm_dma_supported, }; EXPORT_SYMBOL(arm_coherent_dma_ops); @@ -344,8 +353,6 @@ static void __dma_free_buffer(struct page *page, size_t size) } } -#ifdef CONFIG_MMU - static void *__alloc_from_contiguous(struct device *dev, size_t size, pgprot_t prot, struct page **ret_page, const void *caller, bool want_vaddr, @@ -647,22 +654,6 @@ static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot) return prot; } -#define nommu() 0 - -#else /* !CONFIG_MMU */ - -#define nommu() 1 - -#define __get_dma_pgprot(attrs, prot) __pgprot(0) -#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL -#define __alloc_from_pool(size, ret_page) NULL -#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag, gfp) NULL -#define __free_from_pool(cpu_addr, size) do { } while (0) -#define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0) -#define __dma_free_remap(cpu_addr, size) do { } while (0) - -#endif /* CONFIG_MMU */ - static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, struct page **ret_page) { @@ -799,13 +790,13 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp &= ~(__GFP_COMP); args.gfp = gfp; - *handle = DMA_ERROR_CODE; + *handle = ARM_MAPPING_ERROR; allowblock = gfpflags_allow_blocking(gfp); cma = allowblock ? dev_get_cma_area(dev) : false; if (cma) buf->allocator = &cma_allocator; - else if (nommu() || is_coherent) + else if (is_coherent) buf->allocator = &simple_allocator; else if (allowblock) buf->allocator = &remap_allocator; @@ -854,8 +845,7 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - int ret = -ENXIO; -#ifdef CONFIG_MMU + int ret; unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long pfn = dma_to_pfn(dev, dma_addr); @@ -870,10 +860,6 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, vma->vm_end - vma->vm_start, vma->vm_page_prot); } -#else - ret = vm_iomap_memory(vma, vma->vm_start, - (vma->vm_end - vma->vm_start)); -#endif /* CONFIG_MMU */ return ret; } @@ -892,9 +878,7 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { -#ifdef CONFIG_MMU vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); -#endif /* CONFIG_MMU */ return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); } @@ -1177,11 +1161,10 @@ void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, * during bus mastering, then you would pass 0x00ffffff as the mask * to this function. */ -int dma_supported(struct device *dev, u64 mask) +int arm_dma_supported(struct device *dev, u64 mask) { return __dma_supported(dev, mask, false); } -EXPORT_SYMBOL(dma_supported); #define PREALLOC_DMA_DEBUG_ENTRIES 4096 @@ -1254,7 +1237,7 @@ static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, if (i == mapping->nr_bitmaps) { if (extend_iommu_mapping(mapping)) { spin_unlock_irqrestore(&mapping->lock, flags); - return DMA_ERROR_CODE; + return ARM_MAPPING_ERROR; } start = bitmap_find_next_zero_area(mapping->bitmaps[i], @@ -1262,7 +1245,7 @@ static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, if (start > mapping->bits) { spin_unlock_irqrestore(&mapping->lock, flags); - return DMA_ERROR_CODE; + return ARM_MAPPING_ERROR; } bitmap_set(mapping->bitmaps[i], start, count); @@ -1445,7 +1428,7 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size, int i; dma_addr = __alloc_iova(mapping, size); - if (dma_addr == DMA_ERROR_CODE) + if (dma_addr == ARM_MAPPING_ERROR) return dma_addr; iova = dma_addr; @@ -1472,7 +1455,7 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size, fail: iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); __free_iova(mapping, dma_addr, size); - return DMA_ERROR_CODE; + return ARM_MAPPING_ERROR; } static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) @@ -1533,7 +1516,7 @@ static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp, return NULL; *handle = __iommu_create_mapping(dev, &page, size, attrs); - if (*handle == DMA_ERROR_CODE) + if (*handle == ARM_MAPPING_ERROR) goto err_mapping; return addr; @@ -1561,7 +1544,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size, struct page **pages; void *addr = NULL; - *handle = DMA_ERROR_CODE; + *handle = ARM_MAPPING_ERROR; size = PAGE_ALIGN(size); if (coherent_flag == COHERENT || !gfpflags_allow_blocking(gfp)) @@ -1582,7 +1565,7 @@ static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size, return NULL; *handle = __iommu_create_mapping(dev, pages, size, attrs); - if (*handle == DMA_ERROR_CODE) + if (*handle == ARM_MAPPING_ERROR) goto err_buffer; if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) @@ -1732,10 +1715,10 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, int prot; size = PAGE_ALIGN(size); - *handle = DMA_ERROR_CODE; + *handle = ARM_MAPPING_ERROR; iova_base = iova = __alloc_iova(mapping, size); - if (iova == DMA_ERROR_CODE) + if (iova == ARM_MAPPING_ERROR) return -ENOMEM; for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { @@ -1775,7 +1758,7 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, for (i = 1; i < nents; i++) { s = sg_next(s); - s->dma_address = DMA_ERROR_CODE; + s->dma_address = ARM_MAPPING_ERROR; s->dma_length = 0; if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { @@ -1950,7 +1933,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p int ret, prot, len = PAGE_ALIGN(size + offset); dma_addr = __alloc_iova(mapping, len); - if (dma_addr == DMA_ERROR_CODE) + if (dma_addr == ARM_MAPPING_ERROR) return dma_addr; prot = __dma_info_to_prot(dir, attrs); @@ -1962,7 +1945,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p return dma_addr + offset; fail: __free_iova(mapping, dma_addr, len); - return DMA_ERROR_CODE; + return ARM_MAPPING_ERROR; } /** @@ -2056,7 +2039,7 @@ static dma_addr_t arm_iommu_map_resource(struct device *dev, size_t len = PAGE_ALIGN(size + offset); dma_addr = __alloc_iova(mapping, len); - if (dma_addr == DMA_ERROR_CODE) + if (dma_addr == ARM_MAPPING_ERROR) return dma_addr; prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO; @@ -2068,7 +2051,7 @@ static dma_addr_t arm_iommu_map_resource(struct device *dev, return dma_addr + offset; fail: __free_iova(mapping, dma_addr, len); - return DMA_ERROR_CODE; + return ARM_MAPPING_ERROR; } /** @@ -2140,6 +2123,9 @@ const struct dma_map_ops iommu_ops = { .map_resource = arm_iommu_map_resource, .unmap_resource = arm_iommu_unmap_resource, + + .mapping_error = arm_dma_mapping_error, + .dma_supported = arm_dma_supported, }; const struct dma_map_ops iommu_coherent_ops = { @@ -2156,6 +2142,9 @@ const struct dma_map_ops iommu_coherent_ops = { .map_resource = arm_iommu_map_resource, .unmap_resource = arm_iommu_unmap_resource, + + .mapping_error = arm_dma_mapping_error, + .dma_supported = arm_dma_supported, }; /** diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index f0325d96b97a..785d2a562a23 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -185,23 +185,6 @@ EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); const struct dma_map_ops *xen_dma_ops; EXPORT_SYMBOL(xen_dma_ops); -static const struct dma_map_ops xen_swiotlb_dma_ops = { - .alloc = xen_swiotlb_alloc_coherent, - .free = xen_swiotlb_free_coherent, - .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, - .sync_single_for_device = xen_swiotlb_sync_single_for_device, - .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = xen_swiotlb_sync_sg_for_device, - .map_sg = xen_swiotlb_map_sg_attrs, - .unmap_sg = xen_swiotlb_unmap_sg_attrs, - .map_page = xen_swiotlb_map_page, - .unmap_page = xen_swiotlb_unmap_page, - .dma_supported = xen_swiotlb_dma_supported, - .set_dma_mask = xen_swiotlb_set_dma_mask, - .mmap = xen_swiotlb_dma_mmap, - .get_sgtable = xen_swiotlb_get_sgtable, -}; - int __init xen_mm_init(void) { struct gnttab_cache_flush cflush; diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 0ed01f2d5ee4..e71eefa2e427 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -144,17 +144,17 @@ bool __set_phys_to_machine_multi(unsigned long pfn, return true; } - p2m_entry = kzalloc(sizeof(struct xen_p2m_entry), GFP_NOWAIT); - if (!p2m_entry) { - pr_warn("cannot allocate xen_p2m_entry\n"); + p2m_entry = kzalloc(sizeof(*p2m_entry), GFP_NOWAIT); + if (!p2m_entry) return false; - } + p2m_entry->pfn = pfn; p2m_entry->nr_pages = nr_pages; p2m_entry->mfn = mfn; write_lock_irqsave(&p2m_lock, irqflags); - if ((rc = xen_add_phys_to_mach_entry(p2m_entry)) < 0) { + rc = xen_add_phys_to_mach_entry(p2m_entry); + if (rc < 0) { write_unlock_irqrestore(&p2m_lock, irqflags); return false; } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ff925ece82d6..8addb851ab5e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -488,6 +488,17 @@ config CAVIUM_ERRATUM_27456 If unsure, say Y. +config CAVIUM_ERRATUM_30115 + bool "Cavium erratum 30115: Guest may disable interrupts in host" + default y + help + On ThunderX T88 pass 1.x through 2.2, T81 pass 1.0 through + 1.2, and T83 Pass 1.0, KVM guest execution may disable + interrupts in host. Trapping both GICv3 group-0 and group-1 + accesses sidesteps the issue. + + If unsure, say Y. + config QCOM_FALKOR_ERRATUM_1003 bool "Falkor E1003: Incorrect translation due to ASID change" default y diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 1a98bc8602a2..8cef47fa2218 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -89,7 +89,7 @@ static inline void gic_write_ctlr(u32 val) static inline void gic_write_grpen1(u32 val) { - write_sysreg_s(val, SYS_ICC_GRPEN1_EL1); + write_sysreg_s(val, SYS_ICC_IGRPEN1_EL1); isb(); } diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b3aab8a17868..8d2272c6822c 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -38,7 +38,8 @@ #define ARM64_WORKAROUND_REPEAT_TLBI 17 #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 #define ARM64_WORKAROUND_858921 19 +#define ARM64_WORKAROUND_CAVIUM_30115 20 -#define ARM64_NCAPS 20 +#define ARM64_NCAPS 21 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 0984d1b3a8f2..235e77d98261 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define CAVIUM_CPU_PART_THUNDERX 0x0A1 #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 +#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 #define BRCM_CPU_PART_VULCAN 0x516 @@ -96,6 +97,7 @@ #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) +#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index f72779aad276..0df756b24863 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -24,7 +24,6 @@ #include <xen/xen.h> #include <asm/xen/hypervisor.h> -#define DMA_ERROR_CODE (~(dma_addr_t)0) extern const struct dma_map_ops dummy_dma_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 28bf02efce76..8cabd57b6348 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -19,6 +19,7 @@ #define __ASM_ESR_H #include <asm/memory.h> +#include <asm/sysreg.h> #define ESR_ELx_EC_UNKNOWN (0x00) #define ESR_ELx_EC_WFx (0x01) @@ -182,6 +183,29 @@ #define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ ESR_ELx_SYS64_ISS_DIR_READ) +#define esr_sys64_to_sysreg(e) \ + sys_reg((((e) & ESR_ELx_SYS64_ISS_OP0_MASK) >> \ + ESR_ELx_SYS64_ISS_OP0_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \ + ESR_ELx_SYS64_ISS_OP1_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \ + ESR_ELx_SYS64_ISS_CRN_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \ + ESR_ELx_SYS64_ISS_CRM_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ + ESR_ELx_SYS64_ISS_OP2_SHIFT)) + +#define esr_cp15_to_sysreg(e) \ + sys_reg(3, \ + (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \ + ESR_ELx_SYS64_ISS_OP1_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \ + ESR_ELx_SYS64_ISS_CRN_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \ + ESR_ELx_SYS64_ISS_CRM_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ + ESR_ELx_SYS64_ISS_OP2_SHIFT)) + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 1f252a95bc02..d68630007b14 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -42,7 +42,9 @@ #define KVM_VCPU_MAX_FEATURES 4 -#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_SLEEP \ + KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -334,8 +336,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); -void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu); -void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu); u64 __kvm_call_hyp(void *hypfn, ...); #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index b18e852d27e8..4572a9b560fa 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -127,6 +127,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); +int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); void __timer_save_state(struct kvm_vcpu *vcpu); void __timer_restore_state(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b4d13d9267ff..16e44fa9b3b6 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -180,14 +180,31 @@ #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) +#define SYS_ICC_IAR0_EL1 sys_reg(3, 0, 12, 8, 0) +#define SYS_ICC_EOIR0_EL1 sys_reg(3, 0, 12, 8, 1) +#define SYS_ICC_HPPIR0_EL1 sys_reg(3, 0, 12, 8, 2) +#define SYS_ICC_BPR0_EL1 sys_reg(3, 0, 12, 8, 3) +#define SYS_ICC_AP0Rn_EL1(n) sys_reg(3, 0, 12, 8, 4 | n) +#define SYS_ICC_AP0R0_EL1 SYS_ICC_AP0Rn_EL1(0) +#define SYS_ICC_AP0R1_EL1 SYS_ICC_AP0Rn_EL1(1) +#define SYS_ICC_AP0R2_EL1 SYS_ICC_AP0Rn_EL1(2) +#define SYS_ICC_AP0R3_EL1 SYS_ICC_AP0Rn_EL1(3) +#define SYS_ICC_AP1Rn_EL1(n) sys_reg(3, 0, 12, 9, n) +#define SYS_ICC_AP1R0_EL1 SYS_ICC_AP1Rn_EL1(0) +#define SYS_ICC_AP1R1_EL1 SYS_ICC_AP1Rn_EL1(1) +#define SYS_ICC_AP1R2_EL1 SYS_ICC_AP1Rn_EL1(2) +#define SYS_ICC_AP1R3_EL1 SYS_ICC_AP1Rn_EL1(3) #define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) +#define SYS_ICC_RPR_EL1 sys_reg(3, 0, 12, 11, 3) #define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) #define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) #define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) +#define SYS_ICC_HPPIR1_EL1 sys_reg(3, 0, 12, 12, 2) #define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) #define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) #define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) -#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) +#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) +#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) #define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) #define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) @@ -287,8 +304,8 @@ #define SCTLR_ELx_M 1 #define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \ - (1 << 16) | (1 << 18) | (1 << 22) | (1 << 23) | \ - (1 << 28) | (1 << 29)) + (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \ + (1 << 29)) #define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 7b8a04789cef..59f09e6a6cb8 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -349,7 +349,6 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo extern long strncpy_from_user(char *dest, const char __user *src, long count); -extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 70eea2ecc663..9f3ca24bbcc6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -232,6 +232,9 @@ struct kvm_arch_memory_slot { #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_TIMER_CTRL 1 +#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 +#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 2ed2a7657711..0e27f86ee709 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -133,6 +133,27 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), }, #endif +#ifdef CONFIG_CAVIUM_ERRATUM_30115 + { + /* Cavium ThunderX, T88 pass 1.x - 2.2 */ + .desc = "Cavium erratum 30115", + .capability = ARM64_WORKAROUND_CAVIUM_30115, + MIDR_RANGE(MIDR_THUNDERX, 0x00, + (1 << MIDR_VARIANT_SHIFT) | 2), + }, + { + /* Cavium ThunderX, T81 pass 1.0 - 1.2 */ + .desc = "Cavium erratum 30115", + .capability = ARM64_WORKAROUND_CAVIUM_30115, + MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x02), + }, + { + /* Cavium ThunderX, T83 pass 1.0 */ + .desc = "Cavium erratum 30115", + .capability = ARM64_WORKAROUND_CAVIUM_30115, + MIDR_RANGE(MIDR_THUNDERX_83XX, 0x00, 0x00), + }, +#endif { .desc = "Mismatched cache line size", .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index b37446a8ffdb..5c7f657dd207 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -390,6 +390,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_PMU_V3_CTRL: ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); break; + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_set_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -407,6 +410,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_PMU_V3_CTRL: ret = kvm_arm_pmu_v3_get_attr(vcpu, attr); break; + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_get_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -424,6 +430,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_PMU_V3_CTRL: ret = kvm_arm_pmu_v3_has_attr(vcpu, attr); break; + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_has_attr(vcpu, attr); + break; default: ret = -ENXIO; break; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index fa1b18e364fc..17d8a1677a0b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -89,6 +89,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; kvm_vcpu_block(vcpu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); } kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index aede1658aeda..945e79c641c4 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -350,6 +350,20 @@ again: } } + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + exit_code == ARM_EXCEPTION_TRAP && + (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { + int ret = __vgic_v3_perform_cpuif_access(vcpu); + + if (ret == 1) { + __skip_instr(vcpu); + goto again; + } + + /* 0 falls through to be handled out of EL2 */ + } + fp_enabled = __fpsimd_enabled(); __sysreg_save_guest_state(guest_ctxt); @@ -422,6 +436,7 @@ void __hyp_text __noreturn __hyp_panic(void) vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + __timer_save_state(vcpu); __deactivate_traps(vcpu); __deactivate_vm(vcpu); __sysreg_restore_host_state(host_ctxt); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 561badf93de8..3256b9228e75 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -46,16 +46,6 @@ static const struct kvm_regs default_regs_reset32 = { COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), }; -static const struct kvm_irq_level default_ptimer_irq = { - .irq = 30, - .level = 1, -}; - -static const struct kvm_irq_level default_vtimer_irq = { - .irq = 27, - .level = 1, -}; - static bool cpu_has_32bit_el1(void) { u64 pfr0; @@ -108,8 +98,6 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - const struct kvm_irq_level *cpu_vtimer_irq; - const struct kvm_irq_level *cpu_ptimer_irq; const struct kvm_regs *cpu_reset; switch (vcpu->arch.target) { @@ -122,8 +110,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) cpu_reset = &default_regs_reset; } - cpu_vtimer_irq = &default_vtimer_irq; - cpu_ptimer_irq = &default_ptimer_irq; break; } @@ -137,5 +123,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_pmu_vcpu_reset(vcpu); /* Reset timer */ - return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); + return kvm_timer_vcpu_reset(vcpu); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0fe27024a2e1..77862881ae86 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -56,7 +56,8 @@ */ static bool read_from_write_only(struct kvm_vcpu *vcpu, - const struct sys_reg_params *params) + struct sys_reg_params *params, + const struct sys_reg_desc *r) { WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n"); print_sys_reg_instr(params); @@ -64,6 +65,16 @@ static bool read_from_write_only(struct kvm_vcpu *vcpu, return false; } +static bool write_to_read_only(struct kvm_vcpu *vcpu, + struct sys_reg_params *params, + const struct sys_reg_desc *r) +{ + WARN_ONCE(1, "Unexpected sys_reg write to read-only register\n"); + print_sys_reg_instr(params); + kvm_inject_undefined(vcpu); + return false; +} + /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ static u32 cache_levels; @@ -93,7 +104,7 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (!p->is_write) - return read_from_write_only(vcpu, p); + return read_from_write_only(vcpu, p, r); kvm_set_way_flush(vcpu); return true; @@ -135,7 +146,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (!p->is_write) - return read_from_write_only(vcpu, p); + return read_from_write_only(vcpu, p, r); vgic_v3_dispatch_sgi(vcpu, p->regval); @@ -773,7 +784,7 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return trap_raz_wi(vcpu, p, r); if (!p->is_write) - return read_from_write_only(vcpu, p); + return read_from_write_only(vcpu, p, r); if (pmu_write_swinc_el0_disabled(vcpu)) return false; @@ -953,7 +964,15 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, + { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only }, + { SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only }, + { SYS_DESC(SYS_ICC_HPPIR0_EL1), write_to_read_only }, + { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only }, + { SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, + { SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only }, + { SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only }, + { SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre }, { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h index 7fb0008c4fa3..5188c7007169 100644 --- a/arch/arm64/kvm/trace.h +++ b/arch/arm64/kvm/trace.h @@ -93,6 +93,8 @@ TRACE_EVENT(kvm_arm_set_dreg32, TP_printk("%s: 0x%08x", __entry->name, __entry->value) ); +TRACE_DEFINE_SIZEOF(__u64); + TRACE_EVENT(kvm_arm_set_regset, TP_PROTO(const char *type, int len, __u64 *control, __u64 *value), TP_ARGS(type, len, control, value), diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 6260b69e5622..116786d2e8e8 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -268,36 +268,21 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } static const struct sys_reg_desc gic_v3_icc_reg_descs[] = { - /* ICC_PMR_EL1 */ - { Op0(3), Op1(0), CRn(4), CRm(6), Op2(0), access_gic_pmr }, - /* ICC_BPR0_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(8), Op2(3), access_gic_bpr0 }, - /* ICC_AP0R0_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(8), Op2(4), access_gic_ap0r }, - /* ICC_AP0R1_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(8), Op2(5), access_gic_ap0r }, - /* ICC_AP0R2_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(8), Op2(6), access_gic_ap0r }, - /* ICC_AP0R3_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(8), Op2(7), access_gic_ap0r }, - /* ICC_AP1R0_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(9), Op2(0), access_gic_ap1r }, - /* ICC_AP1R1_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(9), Op2(1), access_gic_ap1r }, - /* ICC_AP1R2_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(9), Op2(2), access_gic_ap1r }, - /* ICC_AP1R3_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(9), Op2(3), access_gic_ap1r }, - /* ICC_BPR1_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(12), Op2(3), access_gic_bpr1 }, - /* ICC_CTLR_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(12), Op2(4), access_gic_ctlr }, - /* ICC_SRE_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(12), Op2(5), access_gic_sre }, - /* ICC_IGRPEN0_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(12), Op2(6), access_gic_grpen0 }, - /* ICC_GRPEN1_EL1 */ - { Op0(3), Op1(0), CRn(12), CRm(12), Op2(7), access_gic_grpen1 }, + { SYS_DESC(SYS_ICC_PMR_EL1), access_gic_pmr }, + { SYS_DESC(SYS_ICC_BPR0_EL1), access_gic_bpr0 }, + { SYS_DESC(SYS_ICC_AP0R0_EL1), access_gic_ap0r }, + { SYS_DESC(SYS_ICC_AP0R1_EL1), access_gic_ap0r }, + { SYS_DESC(SYS_ICC_AP0R2_EL1), access_gic_ap0r }, + { SYS_DESC(SYS_ICC_AP0R3_EL1), access_gic_ap0r }, + { SYS_DESC(SYS_ICC_AP1R0_EL1), access_gic_ap1r }, + { SYS_DESC(SYS_ICC_AP1R1_EL1), access_gic_ap1r }, + { SYS_DESC(SYS_ICC_AP1R2_EL1), access_gic_ap1r }, + { SYS_DESC(SYS_ICC_AP1R3_EL1), access_gic_ap1r }, + { SYS_DESC(SYS_ICC_BPR1_EL1), access_gic_bpr1 }, + { SYS_DESC(SYS_ICC_CTLR_EL1), access_gic_ctlr }, + { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre }, + { SYS_DESC(SYS_ICC_IGRPEN0_EL1), access_gic_grpen0 }, + { SYS_DESC(SYS_ICC_IGRPEN1_EL1), access_gic_grpen1 }, }; int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3e340b625436..e90cd1db42a8 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -175,7 +175,6 @@ static void *__dma_alloc(struct device *dev, size_t size, no_map: __dma_free_coherent(dev, size, ptr, *dma_handle, attrs); no_mem: - *dma_handle = DMA_ERROR_CODE; return NULL; } @@ -478,7 +477,7 @@ static dma_addr_t __dummy_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - return DMA_ERROR_CODE; + return 0; } static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr, diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index 3c1bd640042a..89bdb8264305 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -41,6 +41,7 @@ config BLACKFIN select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW select HAVE_NMI + select ARCH_NO_COHERENT_DMA_MMAP config GENERIC_CSUM def_bool y diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig index ba4267f658af..3ce77f07208a 100644 --- a/arch/blackfin/configs/BF609-EZKIT_defconfig +++ b/arch/blackfin/configs/BF609-EZKIT_defconfig @@ -105,7 +105,7 @@ CONFIG_SPI=y CONFIG_SPI_ADI_V3=y CONFIG_GPIOLIB=y CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_MCP23S08=y +CONFIG_PINCTRL_MCP23S08=y # CONFIG_HWMON is not set CONFIG_WATCHDOG=y CONFIG_BFIN_WDT=y diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h index f54a34f31cea..45da4bcb050e 100644 --- a/arch/blackfin/include/asm/uaccess.h +++ b/arch/blackfin/include/asm/uaccess.h @@ -194,13 +194,6 @@ static inline long __must_check strnlen_user(const char __user *src, long n) return strnlen((const char __force *)src, n) + 1; } -static inline long __must_check strlen_user(const char __user *src) -{ - if (!access_ok(VERIFY_READ, src, 1)) - return 0; - return strlen((const char __force *)src) + 1; -} - /* * Zero Userspace */ diff --git a/arch/blackfin/mach-bf527/boards/tll6527m.c b/arch/blackfin/mach-bf527/boards/tll6527m.c index c1acce4c2e45..ce5488e8226b 100644 --- a/arch/blackfin/mach-bf527/boards/tll6527m.c +++ b/arch/blackfin/mach-bf527/boards/tll6527m.c @@ -348,14 +348,14 @@ static struct platform_device bfin_i2s = { }; #endif -#if IS_ENABLED(CONFIG_GPIO_MCP23S08) +#if IS_ENABLED(CONFIG_PINCTRL_MCP23S08) #include <linux/spi/mcp23s08.h> static const struct mcp23s08_platform_data bfin_mcp23s08_sys_gpio_info = { - .chip[0].is_present = true, + .spi_present_mask = BIT(0), .base = 0x30, }; static const struct mcp23s08_platform_data bfin_mcp23s08_usr_gpio_info = { - .chip[2].is_present = true, + .spi_present_mask = BIT(2), .base = 0x38, }; #endif @@ -423,7 +423,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = { .mode = SPI_CPHA | SPI_CPOL, }, #endif -#if IS_ENABLED(CONFIG_GPIO_MCP23S08) +#if IS_ENABLED(CONFIG_PINCTRL_MCP23S08) { .modalias = "mcp23s08", .platform_data = &bfin_mcp23s08_sys_gpio_info, diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c index 9231e5a72b93..51157a255824 100644 --- a/arch/blackfin/mach-bf609/boards/ezkit.c +++ b/arch/blackfin/mach-bf609/boards/ezkit.c @@ -1887,7 +1887,7 @@ static struct platform_device i2c_bfin_twi1_device = { }; #endif -#if IS_ENABLED(CONFIG_GPIO_MCP23S08) +#if IS_ENABLED(CONFIG_PINCTRL_MCP23S08) #include <linux/spi/mcp23s08.h> static const struct mcp23s08_platform_data bfin_mcp23s08_soft_switch0 = { .base = 120, @@ -1929,7 +1929,7 @@ static struct i2c_board_info __initdata bfin_i2c_board_info0[] = { I2C_BOARD_INFO("ssm2602", 0x1b), }, #endif -#if IS_ENABLED(CONFIG_GPIO_MCP23S08) +#if IS_ENABLED(CONFIG_PINCTRL_MCP23S08) { I2C_BOARD_INFO("mcp23017", 0x21), .platform_data = (void *)&bfin_mcp23s08_soft_switch0 diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h index aca9f755e4f8..05daf1038111 100644 --- a/arch/c6x/include/asm/dma-mapping.h +++ b/arch/c6x/include/asm/dma-mapping.h @@ -12,11 +12,6 @@ #ifndef _ASM_C6X_DMA_MAPPING_H #define _ASM_C6X_DMA_MAPPING_H -/* - * DMA errors are defined by all-bits-set in the DMA address. - */ -#define DMA_ERROR_CODE ~0 - extern const struct dma_map_ops c6x_dma_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) diff --git a/arch/cris/include/asm/uaccess.h b/arch/cris/include/asm/uaccess.h index 0d473aec3066..b0c6b077b632 100644 --- a/arch/cris/include/asm/uaccess.h +++ b/arch/cris/include/asm/uaccess.h @@ -173,12 +173,6 @@ extern unsigned long __copy_user_in(void *to, const void __user *from, unsigned extern unsigned long __do_clear_user(void __user *to, unsigned long n); static inline long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - return __do_strncpy_from_user(dst, src, count); -} - -static inline long strncpy_from_user(char *dst, const char __user *src, long count) { long res = -EFAULT; @@ -363,6 +357,4 @@ __clear_user(void __user *to, unsigned long n) return __do_clear_user(to, n); } -#define strlen_user(str) strnlen_user((str), 0x7ffffffe) - #endif /* _CRIS_UACCESS_H */ diff --git a/arch/frv/include/asm/uaccess.h b/arch/frv/include/asm/uaccess.h index e4e33b4cd3ae..ff9562dc6825 100644 --- a/arch/frv/include/asm/uaccess.h +++ b/arch/frv/include/asm/uaccess.h @@ -282,6 +282,4 @@ clear_user(void __user *to, unsigned long n) extern long strncpy_from_user(char *dst, const char __user *src, long count); extern long strnlen_user(const char __user *src, long count); -#define strlen_user(str) strnlen_user(str, 32767) - #endif /* _ASM_UACCESS_H */ diff --git a/arch/hexagon/include/asm/dma-mapping.h b/arch/hexagon/include/asm/dma-mapping.h index d3a87bd9b686..463dbc18f853 100644 --- a/arch/hexagon/include/asm/dma-mapping.h +++ b/arch/hexagon/include/asm/dma-mapping.h @@ -29,8 +29,6 @@ #include <asm/io.h> struct device; -extern int bad_dma_address; -#define DMA_ERROR_CODE bad_dma_address extern const struct dma_map_ops *dma_ops; @@ -39,9 +37,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return dma_ops; } -#define HAVE_ARCH_DMA_SUPPORTED 1 -extern int dma_supported(struct device *dev, u64 mask); -extern int dma_is_consistent(struct device *dev, dma_addr_t dma_handle); extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction); diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index e74b65009587..546792d176a4 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -25,25 +25,16 @@ #include <linux/module.h> #include <asm/page.h> +#define HEXAGON_MAPPING_ERROR 0 + const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); -int bad_dma_address; /* globals are automatically initialized to zero */ - static inline void *dma_addr_to_virt(dma_addr_t dma_addr) { return phys_to_virt((unsigned long) dma_addr); } -int dma_supported(struct device *dev, u64 mask) -{ - if (mask == DMA_BIT_MASK(32)) - return 1; - else - return 0; -} -EXPORT_SYMBOL(dma_supported); - static struct gen_pool *coherent_pool; @@ -181,7 +172,7 @@ static dma_addr_t hexagon_map_page(struct device *dev, struct page *page, WARN_ON(size == 0); if (!check_addr("map_single", dev, bus, size)) - return bad_dma_address; + return HEXAGON_MAPPING_ERROR; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_sync(dma_addr_to_virt(bus), size, dir); @@ -203,6 +194,11 @@ static void hexagon_sync_single_for_device(struct device *dev, dma_sync(dma_addr_to_virt(dma_handle), size, dir); } +static int hexagon_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == HEXAGON_MAPPING_ERROR; +} + const struct dma_map_ops hexagon_dma_ops = { .alloc = hexagon_dma_alloc_coherent, .free = hexagon_free_coherent, @@ -210,6 +206,7 @@ const struct dma_map_ops hexagon_dma_ops = { .map_page = hexagon_map_page, .sync_single_for_cpu = hexagon_sync_single_for_cpu, .sync_single_for_device = hexagon_sync_single_for_device, + .mapping_error = hexagon_mapping_error, .is_phys = 1, }; diff --git a/arch/hexagon/kernel/hexagon_ksyms.c b/arch/hexagon/kernel/hexagon_ksyms.c index 00bcad9cbd8f..aa248f595431 100644 --- a/arch/hexagon/kernel/hexagon_ksyms.c +++ b/arch/hexagon/kernel/hexagon_ksyms.c @@ -40,7 +40,6 @@ EXPORT_SYMBOL(memset); /* Additional variables */ EXPORT_SYMBOL(__phys_offset); EXPORT_SYMBOL(_dflt_cache_att); -EXPORT_SYMBOL(bad_dma_address); #define DECLARE_EXPORT(name) \ extern void name(void); EXPORT_SYMBOL(name) diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 73ec3c6f4cfe..3ce5ab4339f3 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -12,8 +12,6 @@ #define ARCH_HAS_DMA_GET_REQUIRED_MASK -#define DMA_ERROR_CODE 0 - extern const struct dma_map_ops *dma_ops; extern struct ia64_machine_vector ia64_mv; extern void set_iommu_machvec(void); diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 82a7646c4416..b2106b01e84f 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -277,18 +277,6 @@ extern long __must_check __strncpy_from_user (char *to, const char __user *from, __sfu_ret; \ }) -/* Returns: 0 if bad, string length+1 (memory size) of string if ok */ -extern unsigned long __strlen_user (const char __user *); - -#define strlen_user(str) \ -({ \ - const char __user *__su_str = (str); \ - unsigned long __su_ret = 0; \ - if (__access_ok(__su_str, 0)) \ - __su_ret = __strlen_user(__su_str); \ - __su_ret; \ -}) - /* * Returns: 0 if exception before NUL or reaching the supplied limit * (N), a value greater than N if the limit would be exceeded, else diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile index 0a40b14407b1..1a36a3a39624 100644 --- a/arch/ia64/lib/Makefile +++ b/arch/ia64/lib/Makefile @@ -5,7 +5,7 @@ lib-y := io.o __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \ checksum.o clear_page.o csum_partial_copy.o \ - clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ + clear_user.o strncpy_from_user.o strnlen_user.o \ flush.o ip_fast_csum.o do_csum.o \ memset.o strlen.o xor.o diff --git a/arch/ia64/lib/strlen_user.S b/arch/ia64/lib/strlen_user.S deleted file mode 100644 index 9d257684e733..000000000000 --- a/arch/ia64/lib/strlen_user.S +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Optimized version of the strlen_user() function - * - * Inputs: - * in0 address of buffer - * - * Outputs: - * ret0 0 in case of fault, strlen(buffer)+1 otherwise - * - * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - * Stephane Eranian <eranian@hpl.hp.com> - * - * 01/19/99 S.Eranian heavily enhanced version (see details below) - * 09/24/99 S.Eranian added speculation recovery code - */ - -#include <asm/asmmacro.h> -#include <asm/export.h> - -// -// int strlen_user(char *) -// ------------------------ -// Returns: -// - length of string + 1 -// - 0 in case an exception is raised -// -// This is an enhanced version of the basic strlen_user. it includes a -// combination of compute zero index (czx), parallel comparisons, speculative -// loads and loop unroll using rotating registers. -// -// General Ideas about the algorithm: -// The goal is to look at the string in chunks of 8 bytes. -// so we need to do a few extra checks at the beginning because the -// string may not be 8-byte aligned. In this case we load the 8byte -// quantity which includes the start of the string and mask the unused -// bytes with 0xff to avoid confusing czx. -// We use speculative loads and software pipelining to hide memory -// latency and do read ahead safely. This way we defer any exception. -// -// Because we don't want the kernel to be relying on particular -// settings of the DCR register, we provide recovery code in case -// speculation fails. The recovery code is going to "redo" the work using -// only normal loads. If we still get a fault then we return an -// error (ret0=0). Otherwise we return the strlen+1 as usual. -// The fact that speculation may fail can be caused, for instance, by -// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., -// a NaT bit will be set if the translation is not present. The normal -// load, on the other hand, will cause the translation to be inserted -// if the mapping exists. -// -// It should be noted that we execute recovery code only when we need -// to use the data that has been speculatively loaded: we don't execute -// recovery code on pure read ahead data. -// -// Remarks: -// - the cmp r0,r0 is used as a fast way to initialize a predicate -// register to 1. This is required to make sure that we get the parallel -// compare correct. -// -// - we don't use the epilogue counter to exit the loop but we need to set -// it to zero beforehand. -// -// - after the loop we must test for Nat values because neither the -// czx nor cmp instruction raise a NaT consumption fault. We must be -// careful not to look too far for a Nat for which we don't care. -// For instance we don't need to look at a NaT in val2 if the zero byte -// was in val1. -// -// - Clearly performance tuning is required. -// - -#define saved_pfs r11 -#define tmp r10 -#define base r16 -#define orig r17 -#define saved_pr r18 -#define src r19 -#define mask r20 -#define val r21 -#define val1 r22 -#define val2 r23 - -GLOBAL_ENTRY(__strlen_user) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,11,0,0,8 - - .rotr v[2], w[2] // declares our 4 aliases - - extr.u tmp=in0,0,3 // tmp=least significant 3 bits - mov orig=in0 // keep trackof initial byte address - dep src=0,in0,0,3 // src=8byte-aligned in0 address - .save pr, saved_pr - mov saved_pr=pr // preserve predicates (rotation) - ;; - - .body - - ld8.s v[1]=[src],8 // load the initial 8bytes (must speculate) - shl tmp=tmp,3 // multiply by 8bits/byte - mov mask=-1 // our mask - ;; - ld8.s w[1]=[src],8 // load next 8 bytes in 2nd pipeline - cmp.eq p6,p0=r0,r0 // sets p6 (required because of // cmp.and) - sub tmp=64,tmp // how many bits to shift our mask on the right - ;; - shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part - mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) - ;; - add base=-16,src // keep track of aligned base - chk.s v[1], .recover // if already NaT, then directly skip to recover - or v[1]=v[1],mask // now we have a safe initial byte pattern - ;; -1: - ld8.s v[0]=[src],8 // speculatively load next - czx1.r val1=v[1] // search 0 byte from right - czx1.r val2=w[1] // search 0 byte from right following 8bytes - ;; - ld8.s w[0]=[src],8 // speculatively load next to next - cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 - cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 -(p6) br.wtop.dptk.few 1b // loop until p6 == 0 - ;; - // - // We must return try the recovery code iff - // val1_is_nat || (val1==8 && val2_is_nat) - // - // XXX Fixme - // - there must be a better way of doing the test - // - cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) - tnat.nz p6,p7=val1 // test NaT on val1 -(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT - ;; - // - // if we come here p7 is true, i.e., initialized for // cmp - // - cmp.eq.and p7,p0=8,val1// val1==8? - tnat.nz.and p7,p0=val2 // test NaT if val2 -(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT - ;; -(p8) mov val1=val2 // val2 contains the value -(p8) adds src=-16,src // correct position when 3 ahead -(p9) adds src=-24,src // correct position when 4 ahead - ;; - sub ret0=src,orig // distance from origin - sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1 - mov pr=saved_pr,0xffffffffffff0000 - ;; - sub ret0=ret0,tmp // length=now - back -1 - mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what - br.ret.sptk.many rp // end of normal execution - - // - // Outlined recovery code when speculation failed - // - // This time we don't use speculation and rely on the normal exception - // mechanism. that's why the loop is not as good as the previous one - // because read ahead is not possible - // - // XXX Fixme - // - today we restart from the beginning of the string instead - // of trying to continue where we left off. - // -.recover: - EX(.Lexit1, ld8 val=[base],8) // load the initial bytes - ;; - or val=val,mask // remask first bytes - cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop - ;; - // - // ar.ec is still zero here - // -2: - EX(.Lexit1, (p6) ld8 val=[base],8) - ;; - czx1.r val1=val // search 0 byte from right - ;; - cmp.eq p6,p0=8,val1 // val1==8 ? -(p6) br.wtop.dptk.few 2b // loop until p6 == 0 - ;; - sub ret0=base,orig // distance from base - sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1 - mov pr=saved_pr,0xffffffffffff0000 - ;; - sub ret0=ret0,tmp // length=now - back -1 - mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what - br.ret.sptk.many rp // end of successful recovery code - - // - // We failed even on the normal load (called from exception handler) - // -.Lexit1: - mov ret0=0 - mov pr=saved_pr,0xffffffffffff0000 - mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what - br.ret.sptk.many rp -END(__strlen_user) -EXPORT_SYMBOL(__strlen_user) diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 95474460b367..87cde1e4b38c 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -19,6 +19,7 @@ config M32R select HAVE_DEBUG_STACKOVERFLOW select CPU_NO_EFFICIENT_FFS select DMA_NOOP_OPS + select ARCH_NO_COHERENT_DMA_MMAP if !MMU config SBUS bool diff --git a/arch/m32r/include/asm/dma-mapping.h b/arch/m32r/include/asm/dma-mapping.h index c01d9f52d228..aff3ae8b62f7 100644 --- a/arch/m32r/include/asm/dma-mapping.h +++ b/arch/m32r/include/asm/dma-mapping.h @@ -8,8 +8,6 @@ #include <linux/dma-debug.h> #include <linux/io.h> -#define DMA_ERROR_CODE (~(dma_addr_t)0x0) - static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &dma_noop_ops; diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h index 07be349c00ad..496c4716dbc8 100644 --- a/arch/m32r/include/asm/uaccess.h +++ b/arch/m32r/include/asm/uaccess.h @@ -482,8 +482,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) long __must_check strncpy_from_user(char *dst, const char __user *src, long count); -long __must_check __strncpy_from_user(char *dst, - const char __user *src, long count); /** * __clear_user: - Zero a block of memory in user space, with less checking. @@ -511,22 +509,6 @@ unsigned long __clear_user(void __user *mem, unsigned long len); */ unsigned long clear_user(void __user *mem, unsigned long len); -/** - * strlen_user: - Get the size of a string in user space. - * @str: The string to measure. - * - * Context: User context only. This function may sleep if pagefaults are - * enabled. - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * - * If there is a limit on the length of a valid string, you may wish to - * consider using strnlen_user() instead. - */ -#define strlen_user(str) strnlen_user(str, ~0UL >> 1) long strnlen_user(const char __user *str, long n); #endif /* _ASM_M32R_UACCESS_H */ diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h index 59db80193454..de602533a3bd 100644 --- a/arch/m32r/include/asm/unistd.h +++ b/arch/m32r/include/asm/unistd.h @@ -18,7 +18,6 @@ #define __ARCH_WANT_SYS_FADVISE64 #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_LLSEEK -#define __ARCH_WANT_SYS_OLD_GETRLIMIT /*will be unused*/ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_CLONE #define __ARCH_WANT_SYS_FORK diff --git a/arch/m32r/kernel/m32r_ksyms.c b/arch/m32r/kernel/m32r_ksyms.c index a4d43b5cc102..68da6b800453 100644 --- a/arch/m32r/kernel/m32r_ksyms.c +++ b/arch/m32r/kernel/m32r_ksyms.c @@ -23,7 +23,6 @@ EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(clear_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(strnlen_user); diff --git a/arch/m32r/lib/usercopy.c b/arch/m32r/lib/usercopy.c index b3ef2c899f96..b723b11107c7 100644 --- a/arch/m32r/lib/usercopy.c +++ b/arch/m32r/lib/usercopy.c @@ -89,14 +89,6 @@ do { \ #endif /* CONFIG_ISA_DUAL_ISSUE */ long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} - -long strncpy_from_user(char *dst, const char __user *src, long count) { long res = -EFAULT; diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index d140206d5d29..5abb548f0e70 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -2,6 +2,7 @@ config M68K bool default y select ARCH_MIGHT_HAVE_PC_PARPORT if ISA + select ARCH_NO_COHERENT_DMA_MMAP if !MMU select HAVE_IDE select HAVE_AOUT if MMU select HAVE_DEBUG_BUGVERBOSE diff --git a/arch/m68k/include/asm/uaccess_mm.h b/arch/m68k/include/asm/uaccess_mm.h index ef856ffeffdf..1da1e23de74c 100644 --- a/arch/m68k/include/asm/uaccess_mm.h +++ b/arch/m68k/include/asm/uaccess_mm.h @@ -378,7 +378,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) (uaccess_kernel() ? ~0UL : TASK_SIZE) extern long strncpy_from_user(char *dst, const char __user *src, long count); -extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); unsigned long __clear_user(void __user *to, unsigned long n); diff --git a/arch/m68k/include/asm/uaccess_no.h b/arch/m68k/include/asm/uaccess_no.h index e482c3899ff1..53d7b792a43d 100644 --- a/arch/m68k/include/asm/uaccess_no.h +++ b/arch/m68k/include/asm/uaccess_no.h @@ -141,8 +141,6 @@ static inline long strnlen_user(const char *src, long n) return(strlen(src) + 1); /* DAVIDM make safer */ } -#define strlen_user(str) strnlen_user(str, 32767) - /* * Zero Userspace */ diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h index 9c8fbf8fb5aa..47469e26641a 100644 --- a/arch/metag/include/asm/uaccess.h +++ b/arch/metag/include/asm/uaccess.h @@ -188,8 +188,6 @@ strncpy_from_user(char *dst, const char __user *src, long count) */ extern long __must_check strnlen_user(const char __user *src, long count); -#define strlen_user(str) strnlen_user(str, 32767) - extern unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n); extern unsigned long raw_copy_to_user(void __user *to, const void *from, diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 8e47121b8b8b..4ed8ebf33509 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -2,6 +2,7 @@ config MICROBLAZE def_bool y select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_NO_COHERENT_DMA_MMAP if !MMU select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select TIMER_OF diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h index 3fad5e722a66..e15cd2f76e23 100644 --- a/arch/microblaze/include/asm/dma-mapping.h +++ b/arch/microblaze/include/asm/dma-mapping.h @@ -28,8 +28,6 @@ #include <asm/io.h> #include <asm/cacheflush.h> -#define DMA_ERROR_CODE (~(dma_addr_t)0x0) - #define __dma_alloc_coherent(dev, gfp, size, handle) NULL #define __dma_free_coherent(size, addr) ((void)0) diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 38f2c9ccef10..81f16aadbf9e 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -355,14 +355,12 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) */ extern int __strncpy_user(char *to, const char __user *from, int len); -#define __strncpy_from_user __strncpy_user - static inline long strncpy_from_user(char *dst, const char __user *src, long count) { if (!access_ok(VERIFY_READ, src, 1)) return -EFAULT; - return __strncpy_from_user(dst, src, count); + return __strncpy_user(dst, src, count); } /* diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 2828ecde133d..45bcd1cfcec0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -364,6 +364,7 @@ config MACH_INGENIC select SYS_SUPPORTS_ZBOOT_UART16550 select DMA_NONCOHERENT select IRQ_MIPS_CPU + select PINCTRL select GPIOLIB select COMMON_CLK select GENERIC_IRQ_CHIP diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index 1652d8d60b1e..fd138d9978c1 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -29,18 +29,30 @@ &uart0 { status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_uart0>; }; &uart1 { status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_uart1>; }; &uart3 { status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_uart2>; }; &uart4 { status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_uart4>; }; &nemc { @@ -61,6 +73,13 @@ ingenic,nemc-tAW = <15>; ingenic,nemc-tSTRV = <100>; + /* + * Only CLE/ALE are needed for the devices that are connected, rather + * than the full address line set. + */ + pinctrl-names = "default"; + pinctrl-0 = <&pins_nemc>; + nand@1 { reg = <1>; @@ -69,6 +88,9 @@ nand-ecc-mode = "hw"; nand-on-flash-bbt; + pinctrl-names = "default"; + pinctrl-0 = <&pins_nemc_cs1>; + partitions { compatible = "fixed-partitions"; #address-cells = <2>; @@ -106,3 +128,41 @@ &bch { status = "okay"; }; + +&pinctrl { + pins_uart0: uart0 { + function = "uart0"; + groups = "uart0-data"; + bias-disable; + }; + + pins_uart1: uart1 { + function = "uart1"; + groups = "uart1-data"; + bias-disable; + }; + + pins_uart2: uart2 { + function = "uart2"; + groups = "uart2-data", "uart2-hwflow"; + bias-disable; + }; + + pins_uart4: uart4 { + function = "uart4"; + groups = "uart4-data"; + bias-disable; + }; + + pins_nemc: nemc { + function = "nemc"; + groups = "nemc-data", "nemc-cle-ale", "nemc-rd-we", "nemc-frd-fwe"; + bias-disable; + }; + + pins_nemc_cs1: nemc-cs1 { + function = "nemc-cs1"; + groups = "nemc-cs1"; + bias-disable; + }; +}; diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi index 3e1587f1f77a..2ca7ce7481f1 100644 --- a/arch/mips/boot/dts/ingenic/jz4740.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi @@ -55,6 +55,74 @@ clock-names = "rtc"; }; + pinctrl: pin-controller@10010000 { + compatible = "ingenic,jz4740-pinctrl"; + reg = <0x10010000 0x400>; + + #address-cells = <1>; + #size-cells = <0>; + + gpa: gpio@0 { + compatible = "ingenic,jz4740-gpio"; + reg = <0>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 0 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <28>; + }; + + gpb: gpio@1 { + compatible = "ingenic,jz4740-gpio"; + reg = <1>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 32 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <27>; + }; + + gpc: gpio@2 { + compatible = "ingenic,jz4740-gpio"; + reg = <2>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 64 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <26>; + }; + + gpd: gpio@3 { + compatible = "ingenic,jz4740-gpio"; + reg = <3>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 96 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <25>; + }; + }; + uart0: serial@10030000 { compatible = "ingenic,jz4740-uart"; reg = <0x10030000 0x100>; diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index b868b429add2..4853ef67b3ab 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -44,6 +44,104 @@ #clock-cells = <1>; }; + pinctrl: pin-controller@10010000 { + compatible = "ingenic,jz4780-pinctrl"; + reg = <0x10010000 0x600>; + + #address-cells = <1>; + #size-cells = <0>; + + gpa: gpio@0 { + compatible = "ingenic,jz4780-gpio"; + reg = <0>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 0 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <17>; + }; + + gpb: gpio@1 { + compatible = "ingenic,jz4780-gpio"; + reg = <1>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 32 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <16>; + }; + + gpc: gpio@2 { + compatible = "ingenic,jz4780-gpio"; + reg = <2>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 64 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <15>; + }; + + gpd: gpio@3 { + compatible = "ingenic,jz4780-gpio"; + reg = <3>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 96 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <14>; + }; + + gpe: gpio@4 { + compatible = "ingenic,jz4780-gpio"; + reg = <4>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 128 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <13>; + }; + + gpf: gpio@5 { + compatible = "ingenic,jz4780-gpio"; + reg = <5>; + + gpio-controller; + gpio-ranges = <&pinctrl 0 160 32>; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + + interrupt-parent = <&intc>; + interrupts = <12>; + }; + }; + uart0: serial@10030000 { compatible = "ingenic,jz4780-uart"; reg = <0x10030000 0x100>; diff --git a/arch/mips/boot/dts/ingenic/qi_lb60.dts b/arch/mips/boot/dts/ingenic/qi_lb60.dts index be1a7d3a3e1b..b715ee2ac2ee 100644 --- a/arch/mips/boot/dts/ingenic/qi_lb60.dts +++ b/arch/mips/boot/dts/ingenic/qi_lb60.dts @@ -17,3 +17,16 @@ &rtc_dev { system-power-controller; }; + +&uart0 { + pinctrl-names = "default"; + pinctrl-0 = <&pins_uart0>; +}; + +&pinctrl { + pins_uart0: uart0 { + function = "uart0"; + groups = "uart0-data"; + bias-disable; + }; +}; diff --git a/arch/mips/include/asm/mach-jz4740/gpio.h b/arch/mips/include/asm/mach-jz4740/gpio.h index 7c7708a23baa..fd847c984701 100644 --- a/arch/mips/include/asm/mach-jz4740/gpio.h +++ b/arch/mips/include/asm/mach-jz4740/gpio.h @@ -16,380 +16,9 @@ #ifndef _JZ_GPIO_H #define _JZ_GPIO_H -#include <linux/types.h> - -enum jz_gpio_function { - JZ_GPIO_FUNC_NONE, - JZ_GPIO_FUNC1, - JZ_GPIO_FUNC2, - JZ_GPIO_FUNC3, -}; - -/* - Usually a driver for a SoC component has to request several gpio pins and - configure them as function pins. - jz_gpio_bulk_request can be used to ease this process. - Usually one would do something like: - - static const struct jz_gpio_bulk_request i2c_pins[] = { - JZ_GPIO_BULK_PIN(I2C_SDA), - JZ_GPIO_BULK_PIN(I2C_SCK), - }; - - inside the probe function: - - ret = jz_gpio_bulk_request(i2c_pins, ARRAY_SIZE(i2c_pins)); - if (ret) { - ... - - inside the remove function: - - jz_gpio_bulk_free(i2c_pins, ARRAY_SIZE(i2c_pins)); - -*/ - -struct jz_gpio_bulk_request { - int gpio; - const char *name; - enum jz_gpio_function function; -}; - -#define JZ_GPIO_BULK_PIN(pin) { \ - .gpio = JZ_GPIO_ ## pin, \ - .name = #pin, \ - .function = JZ_GPIO_FUNC_ ## pin \ -} - -int jz_gpio_bulk_request(const struct jz_gpio_bulk_request *request, size_t num); -void jz_gpio_bulk_free(const struct jz_gpio_bulk_request *request, size_t num); -void jz_gpio_bulk_suspend(const struct jz_gpio_bulk_request *request, size_t num); -void jz_gpio_bulk_resume(const struct jz_gpio_bulk_request *request, size_t num); -void jz_gpio_enable_pullup(unsigned gpio); -void jz_gpio_disable_pullup(unsigned gpio); -int jz_gpio_set_function(int gpio, enum jz_gpio_function function); - -int jz_gpio_port_direction_input(int port, uint32_t mask); -int jz_gpio_port_direction_output(int port, uint32_t mask); -void jz_gpio_port_set_value(int port, uint32_t value, uint32_t mask); -uint32_t jz_gpio_port_get_value(int port, uint32_t mask); - #define JZ_GPIO_PORTA(x) ((x) + 32 * 0) #define JZ_GPIO_PORTB(x) ((x) + 32 * 1) #define JZ_GPIO_PORTC(x) ((x) + 32 * 2) #define JZ_GPIO_PORTD(x) ((x) + 32 * 3) -/* Port A function pins */ -#define JZ_GPIO_MEM_DATA0 JZ_GPIO_PORTA(0) -#define JZ_GPIO_MEM_DATA1 JZ_GPIO_PORTA(1) -#define JZ_GPIO_MEM_DATA2 JZ_GPIO_PORTA(2) -#define JZ_GPIO_MEM_DATA3 JZ_GPIO_PORTA(3) -#define JZ_GPIO_MEM_DATA4 JZ_GPIO_PORTA(4) -#define JZ_GPIO_MEM_DATA5 JZ_GPIO_PORTA(5) -#define JZ_GPIO_MEM_DATA6 JZ_GPIO_PORTA(6) -#define JZ_GPIO_MEM_DATA7 JZ_GPIO_PORTA(7) -#define JZ_GPIO_MEM_DATA8 JZ_GPIO_PORTA(8) -#define JZ_GPIO_MEM_DATA9 JZ_GPIO_PORTA(9) -#define JZ_GPIO_MEM_DATA10 JZ_GPIO_PORTA(10) -#define JZ_GPIO_MEM_DATA11 JZ_GPIO_PORTA(11) -#define JZ_GPIO_MEM_DATA12 JZ_GPIO_PORTA(12) -#define JZ_GPIO_MEM_DATA13 JZ_GPIO_PORTA(13) -#define JZ_GPIO_MEM_DATA14 JZ_GPIO_PORTA(14) -#define JZ_GPIO_MEM_DATA15 JZ_GPIO_PORTA(15) -#define JZ_GPIO_MEM_DATA16 JZ_GPIO_PORTA(16) -#define JZ_GPIO_MEM_DATA17 JZ_GPIO_PORTA(17) -#define JZ_GPIO_MEM_DATA18 JZ_GPIO_PORTA(18) -#define JZ_GPIO_MEM_DATA19 JZ_GPIO_PORTA(19) -#define JZ_GPIO_MEM_DATA20 JZ_GPIO_PORTA(20) -#define JZ_GPIO_MEM_DATA21 JZ_GPIO_PORTA(21) -#define JZ_GPIO_MEM_DATA22 JZ_GPIO_PORTA(22) -#define JZ_GPIO_MEM_DATA23 JZ_GPIO_PORTA(23) -#define JZ_GPIO_MEM_DATA24 JZ_GPIO_PORTA(24) -#define JZ_GPIO_MEM_DATA25 JZ_GPIO_PORTA(25) -#define JZ_GPIO_MEM_DATA26 JZ_GPIO_PORTA(26) -#define JZ_GPIO_MEM_DATA27 JZ_GPIO_PORTA(27) -#define JZ_GPIO_MEM_DATA28 JZ_GPIO_PORTA(28) -#define JZ_GPIO_MEM_DATA29 JZ_GPIO_PORTA(29) -#define JZ_GPIO_MEM_DATA30 JZ_GPIO_PORTA(30) -#define JZ_GPIO_MEM_DATA31 JZ_GPIO_PORTA(31) - -#define JZ_GPIO_FUNC_MEM_DATA0 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA1 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA2 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA3 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA4 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA5 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA6 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA7 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA8 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA9 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA10 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA11 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA12 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA13 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA14 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA15 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA16 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA17 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA18 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA19 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA20 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA21 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA22 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA23 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA24 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA25 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA26 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA27 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA28 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA29 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA30 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DATA31 JZ_GPIO_FUNC1 - -/* Port B function pins */ -#define JZ_GPIO_MEM_ADDR0 JZ_GPIO_PORTB(0) -#define JZ_GPIO_MEM_ADDR1 JZ_GPIO_PORTB(1) -#define JZ_GPIO_MEM_ADDR2 JZ_GPIO_PORTB(2) -#define JZ_GPIO_MEM_ADDR3 JZ_GPIO_PORTB(3) -#define JZ_GPIO_MEM_ADDR4 JZ_GPIO_PORTB(4) -#define JZ_GPIO_MEM_ADDR5 JZ_GPIO_PORTB(5) -#define JZ_GPIO_MEM_ADDR6 JZ_GPIO_PORTB(6) -#define JZ_GPIO_MEM_ADDR7 JZ_GPIO_PORTB(7) -#define JZ_GPIO_MEM_ADDR8 JZ_GPIO_PORTB(8) -#define JZ_GPIO_MEM_ADDR9 JZ_GPIO_PORTB(9) -#define JZ_GPIO_MEM_ADDR10 JZ_GPIO_PORTB(10) -#define JZ_GPIO_MEM_ADDR11 JZ_GPIO_PORTB(11) -#define JZ_GPIO_MEM_ADDR12 JZ_GPIO_PORTB(12) -#define JZ_GPIO_MEM_ADDR13 JZ_GPIO_PORTB(13) -#define JZ_GPIO_MEM_ADDR14 JZ_GPIO_PORTB(14) -#define JZ_GPIO_MEM_ADDR15 JZ_GPIO_PORTB(15) -#define JZ_GPIO_MEM_ADDR16 JZ_GPIO_PORTB(16) -#define JZ_GPIO_LCD_CLS JZ_GPIO_PORTB(17) -#define JZ_GPIO_LCD_SPL JZ_GPIO_PORTB(18) -#define JZ_GPIO_MEM_DCS JZ_GPIO_PORTB(19) -#define JZ_GPIO_MEM_RAS JZ_GPIO_PORTB(20) -#define JZ_GPIO_MEM_CAS JZ_GPIO_PORTB(21) -#define JZ_GPIO_MEM_SDWE JZ_GPIO_PORTB(22) -#define JZ_GPIO_MEM_CKE JZ_GPIO_PORTB(23) -#define JZ_GPIO_MEM_CKO JZ_GPIO_PORTB(24) -#define JZ_GPIO_MEM_CS0 JZ_GPIO_PORTB(25) -#define JZ_GPIO_MEM_CS1 JZ_GPIO_PORTB(26) -#define JZ_GPIO_MEM_CS2 JZ_GPIO_PORTB(27) -#define JZ_GPIO_MEM_CS3 JZ_GPIO_PORTB(28) -#define JZ_GPIO_MEM_RD JZ_GPIO_PORTB(29) -#define JZ_GPIO_MEM_WR JZ_GPIO_PORTB(30) -#define JZ_GPIO_MEM_WE0 JZ_GPIO_PORTB(31) - -#define JZ_GPIO_FUNC_MEM_ADDR0 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR1 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR2 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR3 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR4 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR5 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR6 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR7 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR8 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR9 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR10 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR11 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR12 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR13 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR14 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR15 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_ADDR16 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_CLS JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_SPL JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_DCS JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_RAS JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_CAS JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_SDWE JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_CKE JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_CKO JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_CS0 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_CS1 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_CS2 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_CS3 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_RD JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_WR JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_WE0 JZ_GPIO_FUNC1 - - -#define JZ_GPIO_MEM_ADDR21 JZ_GPIO_PORTB(17) -#define JZ_GPIO_MEM_ADDR22 JZ_GPIO_PORTB(18) - -#define JZ_GPIO_FUNC_MEM_ADDR21 JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_MEM_ADDR22 JZ_GPIO_FUNC2 - -/* Port C function pins */ -#define JZ_GPIO_LCD_DATA0 JZ_GPIO_PORTC(0) -#define JZ_GPIO_LCD_DATA1 JZ_GPIO_PORTC(1) -#define JZ_GPIO_LCD_DATA2 JZ_GPIO_PORTC(2) -#define JZ_GPIO_LCD_DATA3 JZ_GPIO_PORTC(3) -#define JZ_GPIO_LCD_DATA4 JZ_GPIO_PORTC(4) -#define JZ_GPIO_LCD_DATA5 JZ_GPIO_PORTC(5) -#define JZ_GPIO_LCD_DATA6 JZ_GPIO_PORTC(6) -#define JZ_GPIO_LCD_DATA7 JZ_GPIO_PORTC(7) -#define JZ_GPIO_LCD_DATA8 JZ_GPIO_PORTC(8) -#define JZ_GPIO_LCD_DATA9 JZ_GPIO_PORTC(9) -#define JZ_GPIO_LCD_DATA10 JZ_GPIO_PORTC(10) -#define JZ_GPIO_LCD_DATA11 JZ_GPIO_PORTC(11) -#define JZ_GPIO_LCD_DATA12 JZ_GPIO_PORTC(12) -#define JZ_GPIO_LCD_DATA13 JZ_GPIO_PORTC(13) -#define JZ_GPIO_LCD_DATA14 JZ_GPIO_PORTC(14) -#define JZ_GPIO_LCD_DATA15 JZ_GPIO_PORTC(15) -#define JZ_GPIO_LCD_DATA16 JZ_GPIO_PORTC(16) -#define JZ_GPIO_LCD_DATA17 JZ_GPIO_PORTC(17) -#define JZ_GPIO_LCD_PCLK JZ_GPIO_PORTC(18) -#define JZ_GPIO_LCD_HSYNC JZ_GPIO_PORTC(19) -#define JZ_GPIO_LCD_VSYNC JZ_GPIO_PORTC(20) -#define JZ_GPIO_LCD_DE JZ_GPIO_PORTC(21) -#define JZ_GPIO_LCD_PS JZ_GPIO_PORTC(22) -#define JZ_GPIO_LCD_REV JZ_GPIO_PORTC(23) -#define JZ_GPIO_MEM_WE1 JZ_GPIO_PORTC(24) -#define JZ_GPIO_MEM_WE2 JZ_GPIO_PORTC(25) -#define JZ_GPIO_MEM_WE3 JZ_GPIO_PORTC(26) -#define JZ_GPIO_MEM_WAIT JZ_GPIO_PORTC(27) -#define JZ_GPIO_MEM_FRE JZ_GPIO_PORTC(28) -#define JZ_GPIO_MEM_FWE JZ_GPIO_PORTC(29) - -#define JZ_GPIO_FUNC_LCD_DATA0 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA1 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA2 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA3 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA4 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA5 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA6 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA7 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA8 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA9 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA10 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA11 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA12 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA13 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA14 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA15 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA16 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DATA17 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_PCLK JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_VSYNC JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_HSYNC JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_DE JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_PS JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_LCD_REV JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_WE1 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_WE2 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_WE3 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_WAIT JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_FRE JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MEM_FWE JZ_GPIO_FUNC1 - - -#define JZ_GPIO_MEM_ADDR19 JZ_GPIO_PORTB(22) -#define JZ_GPIO_MEM_ADDR20 JZ_GPIO_PORTB(23) - -#define JZ_GPIO_FUNC_MEM_ADDR19 JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_MEM_ADDR20 JZ_GPIO_FUNC2 - -/* Port D function pins */ -#define JZ_GPIO_CIM_DATA0 JZ_GPIO_PORTD(0) -#define JZ_GPIO_CIM_DATA1 JZ_GPIO_PORTD(1) -#define JZ_GPIO_CIM_DATA2 JZ_GPIO_PORTD(2) -#define JZ_GPIO_CIM_DATA3 JZ_GPIO_PORTD(3) -#define JZ_GPIO_CIM_DATA4 JZ_GPIO_PORTD(4) -#define JZ_GPIO_CIM_DATA5 JZ_GPIO_PORTD(5) -#define JZ_GPIO_CIM_DATA6 JZ_GPIO_PORTD(6) -#define JZ_GPIO_CIM_DATA7 JZ_GPIO_PORTD(7) -#define JZ_GPIO_MSC_CMD JZ_GPIO_PORTD(8) -#define JZ_GPIO_MSC_CLK JZ_GPIO_PORTD(9) -#define JZ_GPIO_MSC_DATA0 JZ_GPIO_PORTD(10) -#define JZ_GPIO_MSC_DATA1 JZ_GPIO_PORTD(11) -#define JZ_GPIO_MSC_DATA2 JZ_GPIO_PORTD(12) -#define JZ_GPIO_MSC_DATA3 JZ_GPIO_PORTD(13) -#define JZ_GPIO_CIM_MCLK JZ_GPIO_PORTD(14) -#define JZ_GPIO_CIM_PCLK JZ_GPIO_PORTD(15) -#define JZ_GPIO_CIM_VSYNC JZ_GPIO_PORTD(16) -#define JZ_GPIO_CIM_HSYNC JZ_GPIO_PORTD(17) -#define JZ_GPIO_SPI_CLK JZ_GPIO_PORTD(18) -#define JZ_GPIO_SPI_CE0 JZ_GPIO_PORTD(19) -#define JZ_GPIO_SPI_DT JZ_GPIO_PORTD(20) -#define JZ_GPIO_SPI_DR JZ_GPIO_PORTD(21) -#define JZ_GPIO_SPI_CE1 JZ_GPIO_PORTD(22) -#define JZ_GPIO_PWM0 JZ_GPIO_PORTD(23) -#define JZ_GPIO_PWM1 JZ_GPIO_PORTD(24) -#define JZ_GPIO_PWM2 JZ_GPIO_PORTD(25) -#define JZ_GPIO_PWM3 JZ_GPIO_PORTD(26) -#define JZ_GPIO_PWM4 JZ_GPIO_PORTD(27) -#define JZ_GPIO_PWM5 JZ_GPIO_PORTD(28) -#define JZ_GPIO_PWM6 JZ_GPIO_PORTD(30) -#define JZ_GPIO_PWM7 JZ_GPIO_PORTD(31) - -#define JZ_GPIO_FUNC_CIM_DATA JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_CIM_DATA0 JZ_GPIO_FUNC_CIM_DATA -#define JZ_GPIO_FUNC_CIM_DATA1 JZ_GPIO_FUNC_CIM_DATA -#define JZ_GPIO_FUNC_CIM_DATA2 JZ_GPIO_FUNC_CIM_DATA -#define JZ_GPIO_FUNC_CIM_DATA3 JZ_GPIO_FUNC_CIM_DATA -#define JZ_GPIO_FUNC_CIM_DATA4 JZ_GPIO_FUNC_CIM_DATA -#define JZ_GPIO_FUNC_CIM_DATA5 JZ_GPIO_FUNC_CIM_DATA -#define JZ_GPIO_FUNC_CIM_DATA6 JZ_GPIO_FUNC_CIM_DATA -#define JZ_GPIO_FUNC_CIM_DATA7 JZ_GPIO_FUNC_CIM_DATA -#define JZ_GPIO_FUNC_MSC_CMD JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MSC_CLK JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MSC_DATA JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_MSC_DATA0 JZ_GPIO_FUNC_MSC_DATA -#define JZ_GPIO_FUNC_MSC_DATA1 JZ_GPIO_FUNC_MSC_DATA -#define JZ_GPIO_FUNC_MSC_DATA2 JZ_GPIO_FUNC_MSC_DATA -#define JZ_GPIO_FUNC_MSC_DATA3 JZ_GPIO_FUNC_MSC_DATA -#define JZ_GPIO_FUNC_CIM_MCLK JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_CIM_PCLK JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_CIM_VSYNC JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_CIM_HSYNC JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_SPI_CLK JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_SPI_CE0 JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_SPI_DT JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_SPI_DR JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_SPI_CE1 JZ_GPIO_FUNC1 - -#define JZ_GPIO_FUNC_PWM JZ_GPIO_FUNC1 -#define JZ_GPIO_FUNC_PWM0 JZ_GPIO_FUNC_PWM -#define JZ_GPIO_FUNC_PWM1 JZ_GPIO_FUNC_PWM -#define JZ_GPIO_FUNC_PWM2 JZ_GPIO_FUNC_PWM -#define JZ_GPIO_FUNC_PWM3 JZ_GPIO_FUNC_PWM -#define JZ_GPIO_FUNC_PWM4 JZ_GPIO_FUNC_PWM -#define JZ_GPIO_FUNC_PWM5 JZ_GPIO_FUNC_PWM -#define JZ_GPIO_FUNC_PWM6 JZ_GPIO_FUNC_PWM -#define JZ_GPIO_FUNC_PWM7 JZ_GPIO_FUNC_PWM - -#define JZ_GPIO_MEM_SCLK_RSTN JZ_GPIO_PORTD(18) -#define JZ_GPIO_MEM_BCLK JZ_GPIO_PORTD(19) -#define JZ_GPIO_MEM_SDATO JZ_GPIO_PORTD(20) -#define JZ_GPIO_MEM_SDATI JZ_GPIO_PORTD(21) -#define JZ_GPIO_MEM_SYNC JZ_GPIO_PORTD(22) -#define JZ_GPIO_I2C_SDA JZ_GPIO_PORTD(23) -#define JZ_GPIO_I2C_SCK JZ_GPIO_PORTD(24) -#define JZ_GPIO_UART0_TXD JZ_GPIO_PORTD(25) -#define JZ_GPIO_UART0_RXD JZ_GPIO_PORTD(26) -#define JZ_GPIO_MEM_ADDR17 JZ_GPIO_PORTD(27) -#define JZ_GPIO_MEM_ADDR18 JZ_GPIO_PORTD(28) -#define JZ_GPIO_UART0_CTS JZ_GPIO_PORTD(30) -#define JZ_GPIO_UART0_RTS JZ_GPIO_PORTD(31) - -#define JZ_GPIO_FUNC_MEM_SCLK_RSTN JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_MEM_BCLK JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_MEM_SDATO JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_MEM_SDATI JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_MEM_SYNC JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_I2C_SDA JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_I2C_SCK JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_UART0_TXD JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_UART0_RXD JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_MEM_ADDR17 JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_MEM_ADDR18 JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_UART0_CTS JZ_GPIO_FUNC2 -#define JZ_GPIO_FUNC_UART0_RTS JZ_GPIO_FUNC2 - -#define JZ_GPIO_UART1_RXD JZ_GPIO_PORTD(30) -#define JZ_GPIO_UART1_TXD JZ_GPIO_PORTD(31) - -#define JZ_GPIO_FUNC_UART1_RXD JZ_GPIO_FUNC3 -#define JZ_GPIO_FUNC_UART1_TXD JZ_GPIO_FUNC3 - #endif diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 99e629a590a5..9700251159b1 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -967,60 +967,6 @@ __clear_user(void __user *addr, __kernel_size_t size) __cl_size; \ }) -extern long __strncpy_from_kernel_nocheck_asm(char *__to, const char __user *__from, long __len); -extern long __strncpy_from_user_nocheck_asm(char *__to, const char __user *__from, long __len); - -/* - * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * Caller must check the specified block with access_ok() before calling - * this function. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -static inline long -__strncpy_from_user(char *__to, const char __user *__from, long __len) -{ - long res; - - if (eva_kernel_access()) { - __asm__ __volatile__( - "move\t$4, %1\n\t" - "move\t$5, %2\n\t" - "move\t$6, %3\n\t" - __MODULE_JAL(__strncpy_from_kernel_nocheck_asm) - "move\t%0, $2" - : "=r" (res) - : "r" (__to), "r" (__from), "r" (__len) - : "$2", "$3", "$4", "$5", "$6", __UA_t0, "$31", "memory"); - } else { - might_fault(); - __asm__ __volatile__( - "move\t$4, %1\n\t" - "move\t$5, %2\n\t" - "move\t$6, %3\n\t" - __MODULE_JAL(__strncpy_from_user_nocheck_asm) - "move\t%0, $2" - : "=r" (res) - : "r" (__to), "r" (__from), "r" (__len) - : "$2", "$3", "$4", "$5", "$6", __UA_t0, "$31", "memory"); - } - - return res; -} - extern long __strncpy_from_kernel_asm(char *__to, const char __user *__from, long __len); extern long __strncpy_from_user_asm(char *__to, const char __user *__from, long __len); @@ -1073,82 +1019,6 @@ strncpy_from_user(char *__to, const char __user *__from, long __len) return res; } -extern long __strlen_kernel_asm(const char __user *s); -extern long __strlen_user_asm(const char __user *s); - -/* - * strlen_user: - Get the size of a string in user space. - * @str: The string to measure. - * - * Context: User context only. This function may sleep if pagefaults are - * enabled. - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * - * If there is a limit on the length of a valid string, you may wish to - * consider using strnlen_user() instead. - */ -static inline long strlen_user(const char __user *s) -{ - long res; - - if (eva_kernel_access()) { - __asm__ __volatile__( - "move\t$4, %1\n\t" - __MODULE_JAL(__strlen_kernel_asm) - "move\t%0, $2" - : "=r" (res) - : "r" (s) - : "$2", "$4", __UA_t0, "$31"); - } else { - might_fault(); - __asm__ __volatile__( - "move\t$4, %1\n\t" - __MODULE_JAL(__strlen_user_asm) - "move\t%0, $2" - : "=r" (res) - : "r" (s) - : "$2", "$4", __UA_t0, "$31"); - } - - return res; -} - -extern long __strnlen_kernel_nocheck_asm(const char __user *s, long n); -extern long __strnlen_user_nocheck_asm(const char __user *s, long n); - -/* Returns: 0 if bad, string length+1 (memory size) of string if ok */ -static inline long __strnlen_user(const char __user *s, long n) -{ - long res; - - if (eva_kernel_access()) { - __asm__ __volatile__( - "move\t$4, %1\n\t" - "move\t$5, %2\n\t" - __MODULE_JAL(__strnlen_kernel_nocheck_asm) - "move\t%0, $2" - : "=r" (res) - : "r" (s), "r" (n) - : "$2", "$4", "$5", __UA_t0, "$31"); - } else { - might_fault(); - __asm__ __volatile__( - "move\t$4, %1\n\t" - "move\t$5, %2\n\t" - __MODULE_JAL(__strnlen_user_nocheck_asm) - "move\t%0, $2" - : "=r" (res) - : "r" (s), "r" (n) - : "$2", "$4", "$5", __UA_t0, "$31"); - } - - return res; -} - extern long __strnlen_kernel_asm(const char __user *s, long n); extern long __strnlen_user_asm(const char __user *s, long n); diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index e55813029d5a..3c09450908aa 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -35,7 +35,6 @@ #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_LLSEEK #define __ARCH_WANT_SYS_NICE -#define __ARCH_WANT_SYS_OLD_GETRLIMIT #define __ARCH_WANT_SYS_OLD_UNAME #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING diff --git a/arch/mips/jz4740/Makefile b/arch/mips/jz4740/Makefile index 39d70bde8cfe..6b9c1f7c31c9 100644 --- a/arch/mips/jz4740/Makefile +++ b/arch/mips/jz4740/Makefile @@ -7,8 +7,6 @@ obj-y += prom.o time.o reset.o setup.o \ platform.o timer.o -obj-$(CONFIG_MACH_JZ4740) += gpio.o - CFLAGS_setup.o = -I$(src)/../../../scripts/dtc/libfdt # board specific support diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c index a5bd94b95263..6d7f97552200 100644 --- a/arch/mips/jz4740/board-qi_lb60.c +++ b/arch/mips/jz4740/board-qi_lb60.c @@ -22,6 +22,8 @@ #include <linux/input/matrix_keypad.h> #include <linux/spi/spi.h> #include <linux/spi/spi_gpio.h> +#include <linux/pinctrl/machine.h> +#include <linux/pinctrl/pinconf-generic.h> #include <linux/power_supply.h> #include <linux/power/jz4740-battery.h> #include <linux/power/gpio-charger.h> @@ -159,7 +161,7 @@ static struct jz_nand_platform_data qi_lb60_nand_pdata = { static struct gpiod_lookup_table qi_lb60_nand_gpio_table = { .dev_id = "jz4740-nand.0", .table = { - GPIO_LOOKUP("Bank C", 30, "busy", 0), + GPIO_LOOKUP("GPIOC", 30, "busy", 0), { }, }, }; @@ -421,8 +423,8 @@ static struct platform_device qi_lb60_audio_device = { static struct gpiod_lookup_table qi_lb60_audio_gpio_table = { .dev_id = "qi-lb60-audio", .table = { - GPIO_LOOKUP("Bank B", 29, "snd", 0), - GPIO_LOOKUP("Bank D", 4, "amp", 0), + GPIO_LOOKUP("GPIOB", 29, "snd", 0), + GPIO_LOOKUP("GPIOD", 4, "amp", 0), { }, }, }; @@ -447,13 +449,36 @@ static struct platform_device *jz_platform_devices[] __initdata = { &qi_lb60_audio_device, }; -static void __init board_gpio_setup(void) -{ - /* We only need to enable/disable pullup here for pins used in generic - * drivers. Everything else is done by the drivers themselves. */ - jz_gpio_disable_pullup(QI_LB60_GPIO_SD_VCC_EN_N); - jz_gpio_disable_pullup(QI_LB60_GPIO_SD_CD); -} +static unsigned long pin_cfg_bias_disable[] = { + PIN_CONFIG_BIAS_DISABLE, +}; + +static struct pinctrl_map pin_map[] __initdata = { + /* NAND pin configuration */ + PIN_MAP_MUX_GROUP_DEFAULT("jz4740-nand", + "10010000.jz4740-pinctrl", "nand", "nand-cs1"), + + /* fbdev pin configuration */ + PIN_MAP_MUX_GROUP("jz4740-fb", PINCTRL_STATE_DEFAULT, + "10010000.jz4740-pinctrl", "lcd", "lcd-8bit"), + PIN_MAP_MUX_GROUP("jz4740-fb", PINCTRL_STATE_SLEEP, + "10010000.jz4740-pinctrl", "lcd", "lcd-no-pins"), + + /* MMC pin configuration */ + PIN_MAP_MUX_GROUP_DEFAULT("jz4740-mmc.0", + "10010000.jz4740-pinctrl", "mmc", "mmc-1bit"), + PIN_MAP_MUX_GROUP_DEFAULT("jz4740-mmc.0", + "10010000.jz4740-pinctrl", "mmc", "mmc-4bit"), + PIN_MAP_CONFIGS_PIN_DEFAULT("jz4740-mmc.0", + "10010000.jz4740-pinctrl", "PD0", pin_cfg_bias_disable), + PIN_MAP_CONFIGS_PIN_DEFAULT("jz4740-mmc.0", + "10010000.jz4740-pinctrl", "PD2", pin_cfg_bias_disable), + + /* PWM pin configuration */ + PIN_MAP_MUX_GROUP_DEFAULT("jz4740-pwm", + "10010000.jz4740-pinctrl", "pwm4", "pwm4"), +}; + static int __init qi_lb60_init_platform_devices(void) { @@ -469,6 +494,7 @@ static int __init qi_lb60_init_platform_devices(void) ARRAY_SIZE(qi_lb60_spi_board_info)); pwm_add_table(qi_lb60_pwm_lookup, ARRAY_SIZE(qi_lb60_pwm_lookup)); + pinctrl_register_mappings(pin_map, ARRAY_SIZE(pin_map)); return platform_add_devices(jz_platform_devices, ARRAY_SIZE(jz_platform_devices)); @@ -479,8 +505,6 @@ static int __init qi_lb60_board_setup(void) { printk(KERN_INFO "Qi Hardware JZ4740 QI LB60 setup\n"); - board_gpio_setup(); - if (qi_lb60_init_platform_devices()) panic("Failed to initialize platform devices"); diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c deleted file mode 100644 index cac1ccde2214..000000000000 --- a/arch/mips/jz4740/gpio.c +++ /dev/null @@ -1,519 +0,0 @@ -/* - * Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 platform GPIO support - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <linux/kernel.h> -#include <linux/export.h> -#include <linux/init.h> - -#include <linux/io.h> -#include <linux/gpio/driver.h> -/* FIXME: needed for gpio_request(), try to remove consumer API from driver */ -#include <linux/gpio.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/irqchip/ingenic.h> -#include <linux/bitops.h> - -#include <linux/debugfs.h> -#include <linux/seq_file.h> - -#include <asm/mach-jz4740/base.h> -#include <asm/mach-jz4740/gpio.h> - -#define JZ4740_GPIO_BASE_A (32*0) -#define JZ4740_GPIO_BASE_B (32*1) -#define JZ4740_GPIO_BASE_C (32*2) -#define JZ4740_GPIO_BASE_D (32*3) - -#define JZ4740_GPIO_NUM_A 32 -#define JZ4740_GPIO_NUM_B 32 -#define JZ4740_GPIO_NUM_C 31 -#define JZ4740_GPIO_NUM_D 32 - -#define JZ4740_IRQ_GPIO_BASE_A (JZ4740_IRQ_GPIO(0) + JZ4740_GPIO_BASE_A) -#define JZ4740_IRQ_GPIO_BASE_B (JZ4740_IRQ_GPIO(0) + JZ4740_GPIO_BASE_B) -#define JZ4740_IRQ_GPIO_BASE_C (JZ4740_IRQ_GPIO(0) + JZ4740_GPIO_BASE_C) -#define JZ4740_IRQ_GPIO_BASE_D (JZ4740_IRQ_GPIO(0) + JZ4740_GPIO_BASE_D) - -#define JZ_REG_GPIO_PIN 0x00 -#define JZ_REG_GPIO_DATA 0x10 -#define JZ_REG_GPIO_DATA_SET 0x14 -#define JZ_REG_GPIO_DATA_CLEAR 0x18 -#define JZ_REG_GPIO_MASK 0x20 -#define JZ_REG_GPIO_MASK_SET 0x24 -#define JZ_REG_GPIO_MASK_CLEAR 0x28 -#define JZ_REG_GPIO_PULL 0x30 -#define JZ_REG_GPIO_PULL_SET 0x34 -#define JZ_REG_GPIO_PULL_CLEAR 0x38 -#define JZ_REG_GPIO_FUNC 0x40 -#define JZ_REG_GPIO_FUNC_SET 0x44 -#define JZ_REG_GPIO_FUNC_CLEAR 0x48 -#define JZ_REG_GPIO_SELECT 0x50 -#define JZ_REG_GPIO_SELECT_SET 0x54 -#define JZ_REG_GPIO_SELECT_CLEAR 0x58 -#define JZ_REG_GPIO_DIRECTION 0x60 -#define JZ_REG_GPIO_DIRECTION_SET 0x64 -#define JZ_REG_GPIO_DIRECTION_CLEAR 0x68 -#define JZ_REG_GPIO_TRIGGER 0x70 -#define JZ_REG_GPIO_TRIGGER_SET 0x74 -#define JZ_REG_GPIO_TRIGGER_CLEAR 0x78 -#define JZ_REG_GPIO_FLAG 0x80 -#define JZ_REG_GPIO_FLAG_CLEAR 0x14 - -#define GPIO_TO_BIT(gpio) BIT(gpio & 0x1f) -#define GPIO_TO_REG(gpio, reg) (gpio_to_jz_gpio_chip(gpio)->base + (reg)) -#define CHIP_TO_REG(chip, reg) (gpio_chip_to_jz_gpio_chip(chip)->base + (reg)) - -struct jz_gpio_chip { - unsigned int irq; - unsigned int irq_base; - uint32_t edge_trigger_both; - - void __iomem *base; - - struct gpio_chip gpio_chip; -}; - -static struct jz_gpio_chip jz4740_gpio_chips[]; - -static inline struct jz_gpio_chip *gpio_to_jz_gpio_chip(unsigned int gpio) -{ - return &jz4740_gpio_chips[gpio >> 5]; -} - -static inline struct jz_gpio_chip *gpio_chip_to_jz_gpio_chip(struct gpio_chip *gc) -{ - return gpiochip_get_data(gc); -} - -static inline struct jz_gpio_chip *irq_to_jz_gpio_chip(struct irq_data *data) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); - return gc->private; -} - -static inline void jz_gpio_write_bit(unsigned int gpio, unsigned int reg) -{ - writel(GPIO_TO_BIT(gpio), GPIO_TO_REG(gpio, reg)); -} - -int jz_gpio_set_function(int gpio, enum jz_gpio_function function) -{ - if (function == JZ_GPIO_FUNC_NONE) { - jz_gpio_write_bit(gpio, JZ_REG_GPIO_FUNC_CLEAR); - jz_gpio_write_bit(gpio, JZ_REG_GPIO_SELECT_CLEAR); - jz_gpio_write_bit(gpio, JZ_REG_GPIO_TRIGGER_CLEAR); - } else { - jz_gpio_write_bit(gpio, JZ_REG_GPIO_FUNC_SET); - jz_gpio_write_bit(gpio, JZ_REG_GPIO_TRIGGER_CLEAR); - switch (function) { - case JZ_GPIO_FUNC1: - jz_gpio_write_bit(gpio, JZ_REG_GPIO_SELECT_CLEAR); - break; - case JZ_GPIO_FUNC3: - jz_gpio_write_bit(gpio, JZ_REG_GPIO_TRIGGER_SET); - case JZ_GPIO_FUNC2: /* Falltrough */ - jz_gpio_write_bit(gpio, JZ_REG_GPIO_SELECT_SET); - break; - default: - BUG(); - break; - } - } - - return 0; -} -EXPORT_SYMBOL_GPL(jz_gpio_set_function); - -int jz_gpio_bulk_request(const struct jz_gpio_bulk_request *request, size_t num) -{ - size_t i; - int ret; - - for (i = 0; i < num; ++i, ++request) { - ret = gpio_request(request->gpio, request->name); - if (ret) - goto err; - jz_gpio_set_function(request->gpio, request->function); - } - - return 0; - -err: - for (--request; i > 0; --i, --request) { - gpio_free(request->gpio); - jz_gpio_set_function(request->gpio, JZ_GPIO_FUNC_NONE); - } - - return ret; -} -EXPORT_SYMBOL_GPL(jz_gpio_bulk_request); - -void jz_gpio_bulk_free(const struct jz_gpio_bulk_request *request, size_t num) -{ - size_t i; - - for (i = 0; i < num; ++i, ++request) { - gpio_free(request->gpio); - jz_gpio_set_function(request->gpio, JZ_GPIO_FUNC_NONE); - } - -} -EXPORT_SYMBOL_GPL(jz_gpio_bulk_free); - -void jz_gpio_bulk_suspend(const struct jz_gpio_bulk_request *request, size_t num) -{ - size_t i; - - for (i = 0; i < num; ++i, ++request) { - jz_gpio_set_function(request->gpio, JZ_GPIO_FUNC_NONE); - jz_gpio_write_bit(request->gpio, JZ_REG_GPIO_DIRECTION_CLEAR); - jz_gpio_write_bit(request->gpio, JZ_REG_GPIO_PULL_SET); - } -} -EXPORT_SYMBOL_GPL(jz_gpio_bulk_suspend); - -void jz_gpio_bulk_resume(const struct jz_gpio_bulk_request *request, size_t num) -{ - size_t i; - - for (i = 0; i < num; ++i, ++request) - jz_gpio_set_function(request->gpio, request->function); -} -EXPORT_SYMBOL_GPL(jz_gpio_bulk_resume); - -void jz_gpio_enable_pullup(unsigned gpio) -{ - jz_gpio_write_bit(gpio, JZ_REG_GPIO_PULL_CLEAR); -} -EXPORT_SYMBOL_GPL(jz_gpio_enable_pullup); - -void jz_gpio_disable_pullup(unsigned gpio) -{ - jz_gpio_write_bit(gpio, JZ_REG_GPIO_PULL_SET); -} -EXPORT_SYMBOL_GPL(jz_gpio_disable_pullup); - -static int jz_gpio_get_value(struct gpio_chip *chip, unsigned gpio) -{ - return !!(readl(CHIP_TO_REG(chip, JZ_REG_GPIO_PIN)) & BIT(gpio)); -} - -static void jz_gpio_set_value(struct gpio_chip *chip, unsigned gpio, int value) -{ - uint32_t __iomem *reg = CHIP_TO_REG(chip, JZ_REG_GPIO_DATA_SET); - reg += !value; - writel(BIT(gpio), reg); -} - -static int jz_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, - int value) -{ - writel(BIT(gpio), CHIP_TO_REG(chip, JZ_REG_GPIO_DIRECTION_SET)); - jz_gpio_set_value(chip, gpio, value); - - return 0; -} - -static int jz_gpio_direction_input(struct gpio_chip *chip, unsigned gpio) -{ - writel(BIT(gpio), CHIP_TO_REG(chip, JZ_REG_GPIO_DIRECTION_CLEAR)); - - return 0; -} - -static int jz_gpio_to_irq(struct gpio_chip *chip, unsigned gpio) -{ - struct jz_gpio_chip *jz_gpio = gpiochip_get_data(chip); - - return jz_gpio->irq_base + gpio; -} - -int jz_gpio_port_direction_input(int port, uint32_t mask) -{ - writel(mask, GPIO_TO_REG(port, JZ_REG_GPIO_DIRECTION_CLEAR)); - - return 0; -} -EXPORT_SYMBOL(jz_gpio_port_direction_input); - -int jz_gpio_port_direction_output(int port, uint32_t mask) -{ - writel(mask, GPIO_TO_REG(port, JZ_REG_GPIO_DIRECTION_SET)); - - return 0; -} -EXPORT_SYMBOL(jz_gpio_port_direction_output); - -void jz_gpio_port_set_value(int port, uint32_t value, uint32_t mask) -{ - writel(~value & mask, GPIO_TO_REG(port, JZ_REG_GPIO_DATA_CLEAR)); - writel(value & mask, GPIO_TO_REG(port, JZ_REG_GPIO_DATA_SET)); -} -EXPORT_SYMBOL(jz_gpio_port_set_value); - -uint32_t jz_gpio_port_get_value(int port, uint32_t mask) -{ - uint32_t value = readl(GPIO_TO_REG(port, JZ_REG_GPIO_PIN)); - - return value & mask; -} -EXPORT_SYMBOL(jz_gpio_port_get_value); - -#define IRQ_TO_BIT(irq) BIT((irq - JZ4740_IRQ_GPIO(0)) & 0x1f) - -static void jz_gpio_check_trigger_both(struct jz_gpio_chip *chip, unsigned int irq) -{ - uint32_t value; - void __iomem *reg; - uint32_t mask = IRQ_TO_BIT(irq); - - if (!(chip->edge_trigger_both & mask)) - return; - - reg = chip->base; - - value = readl(chip->base + JZ_REG_GPIO_PIN); - if (value & mask) - reg += JZ_REG_GPIO_DIRECTION_CLEAR; - else - reg += JZ_REG_GPIO_DIRECTION_SET; - - writel(mask, reg); -} - -static void jz_gpio_irq_demux_handler(struct irq_desc *desc) -{ - uint32_t flag; - unsigned int gpio_irq; - struct jz_gpio_chip *chip = irq_desc_get_handler_data(desc); - - flag = readl(chip->base + JZ_REG_GPIO_FLAG); - if (!flag) - return; - - gpio_irq = chip->irq_base + __fls(flag); - - jz_gpio_check_trigger_both(chip, gpio_irq); - - generic_handle_irq(gpio_irq); -}; - -static inline void jz_gpio_set_irq_bit(struct irq_data *data, unsigned int reg) -{ - struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data); - writel(IRQ_TO_BIT(data->irq), chip->base + reg); -} - -static void jz_gpio_irq_unmask(struct irq_data *data) -{ - struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data); - - jz_gpio_check_trigger_both(chip, data->irq); - irq_gc_unmask_enable_reg(data); -}; - -/* TODO: Check if function is gpio */ -static unsigned int jz_gpio_irq_startup(struct irq_data *data) -{ - jz_gpio_set_irq_bit(data, JZ_REG_GPIO_SELECT_SET); - jz_gpio_irq_unmask(data); - return 0; -} - -static void jz_gpio_irq_shutdown(struct irq_data *data) -{ - irq_gc_mask_disable_reg(data); - - /* Set direction to input */ - jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_CLEAR); - jz_gpio_set_irq_bit(data, JZ_REG_GPIO_SELECT_CLEAR); -} - -static int jz_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type) -{ - struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data); - unsigned int irq = data->irq; - - if (flow_type == IRQ_TYPE_EDGE_BOTH) { - uint32_t value = readl(chip->base + JZ_REG_GPIO_PIN); - if (value & IRQ_TO_BIT(irq)) - flow_type = IRQ_TYPE_EDGE_FALLING; - else - flow_type = IRQ_TYPE_EDGE_RISING; - chip->edge_trigger_both |= IRQ_TO_BIT(irq); - } else { - chip->edge_trigger_both &= ~IRQ_TO_BIT(irq); - } - - switch (flow_type) { - case IRQ_TYPE_EDGE_RISING: - jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_SET); - jz_gpio_set_irq_bit(data, JZ_REG_GPIO_TRIGGER_SET); - break; - case IRQ_TYPE_EDGE_FALLING: - jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_CLEAR); - jz_gpio_set_irq_bit(data, JZ_REG_GPIO_TRIGGER_SET); - break; - case IRQ_TYPE_LEVEL_HIGH: - jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_SET); - jz_gpio_set_irq_bit(data, JZ_REG_GPIO_TRIGGER_CLEAR); - break; - case IRQ_TYPE_LEVEL_LOW: - jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_CLEAR); - jz_gpio_set_irq_bit(data, JZ_REG_GPIO_TRIGGER_CLEAR); - break; - default: - return -EINVAL; - } - - return 0; -} - -static int jz_gpio_irq_set_wake(struct irq_data *data, unsigned int on) -{ - struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data); - - irq_gc_set_wake(data, on); - irq_set_irq_wake(chip->irq, on); - - return 0; -} - -#define JZ4740_GPIO_CHIP(_bank) { \ - .irq_base = JZ4740_IRQ_GPIO_BASE_ ## _bank, \ - .gpio_chip = { \ - .label = "Bank " # _bank, \ - .owner = THIS_MODULE, \ - .set = jz_gpio_set_value, \ - .get = jz_gpio_get_value, \ - .direction_output = jz_gpio_direction_output, \ - .direction_input = jz_gpio_direction_input, \ - .to_irq = jz_gpio_to_irq, \ - .base = JZ4740_GPIO_BASE_ ## _bank, \ - .ngpio = JZ4740_GPIO_NUM_ ## _bank, \ - }, \ -} - -static struct jz_gpio_chip jz4740_gpio_chips[] = { - JZ4740_GPIO_CHIP(A), - JZ4740_GPIO_CHIP(B), - JZ4740_GPIO_CHIP(C), - JZ4740_GPIO_CHIP(D), -}; - -static void jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id) -{ - struct irq_chip_generic *gc; - struct irq_chip_type *ct; - - chip->base = ioremap(JZ4740_GPIO_BASE_ADDR + (id * 0x100), 0x100); - - chip->irq = JZ4740_IRQ_INTC_GPIO(id); - irq_set_chained_handler_and_data(chip->irq, - jz_gpio_irq_demux_handler, chip); - - gc = irq_alloc_generic_chip(chip->gpio_chip.label, 1, chip->irq_base, - chip->base, handle_level_irq); - - gc->wake_enabled = IRQ_MSK(chip->gpio_chip.ngpio); - gc->private = chip; - - ct = gc->chip_types; - ct->regs.enable = JZ_REG_GPIO_MASK_CLEAR; - ct->regs.disable = JZ_REG_GPIO_MASK_SET; - ct->regs.ack = JZ_REG_GPIO_FLAG_CLEAR; - - ct->chip.name = "GPIO"; - ct->chip.irq_mask = irq_gc_mask_disable_reg; - ct->chip.irq_unmask = jz_gpio_irq_unmask; - ct->chip.irq_ack = irq_gc_ack_set_bit; - ct->chip.irq_suspend = ingenic_intc_irq_suspend; - ct->chip.irq_resume = ingenic_intc_irq_resume; - ct->chip.irq_startup = jz_gpio_irq_startup; - ct->chip.irq_shutdown = jz_gpio_irq_shutdown; - ct->chip.irq_set_type = jz_gpio_irq_set_type; - ct->chip.irq_set_wake = jz_gpio_irq_set_wake; - ct->chip.flags = IRQCHIP_SET_TYPE_MASKED; - - irq_setup_generic_chip(gc, IRQ_MSK(chip->gpio_chip.ngpio), - IRQ_GC_INIT_NESTED_LOCK, 0, IRQ_NOPROBE | IRQ_LEVEL); - - gpiochip_add_data(&chip->gpio_chip, chip); -} - -static int __init jz4740_gpio_init(void) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(jz4740_gpio_chips); ++i) - jz4740_gpio_chip_init(&jz4740_gpio_chips[i], i); - - printk(KERN_INFO "JZ4740 GPIO initialized\n"); - - return 0; -} -arch_initcall(jz4740_gpio_init); - -#ifdef CONFIG_DEBUG_FS - -static inline void gpio_seq_reg(struct seq_file *s, struct jz_gpio_chip *chip, - const char *name, unsigned int reg) -{ - seq_printf(s, "\t%s: %08x\n", name, readl(chip->base + reg)); -} - -static int gpio_regs_show(struct seq_file *s, void *unused) -{ - struct jz_gpio_chip *chip = jz4740_gpio_chips; - int i; - - for (i = 0; i < ARRAY_SIZE(jz4740_gpio_chips); ++i, ++chip) { - seq_printf(s, "==GPIO %d==\n", i); - gpio_seq_reg(s, chip, "Pin", JZ_REG_GPIO_PIN); - gpio_seq_reg(s, chip, "Data", JZ_REG_GPIO_DATA); - gpio_seq_reg(s, chip, "Mask", JZ_REG_GPIO_MASK); - gpio_seq_reg(s, chip, "Pull", JZ_REG_GPIO_PULL); - gpio_seq_reg(s, chip, "Func", JZ_REG_GPIO_FUNC); - gpio_seq_reg(s, chip, "Select", JZ_REG_GPIO_SELECT); - gpio_seq_reg(s, chip, "Direction", JZ_REG_GPIO_DIRECTION); - gpio_seq_reg(s, chip, "Trigger", JZ_REG_GPIO_TRIGGER); - gpio_seq_reg(s, chip, "Flag", JZ_REG_GPIO_FLAG); - } - - return 0; -} - -static int gpio_regs_open(struct inode *inode, struct file *file) -{ - return single_open(file, gpio_regs_show, NULL); -} - -static const struct file_operations gpio_regs_operations = { - .open = gpio_regs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __init gpio_debugfs_init(void) -{ - (void) debugfs_create_file("jz_regs_gpio", S_IFREG | S_IRUGO, - NULL, NULL, &gpio_regs_operations); - return 0; -} -subsys_initcall(gpio_debugfs_init); - -#endif diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index a563759fd142..6a0d7040d882 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -1094,7 +1094,7 @@ static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu, struct mm_struct *mm; int i; - if (likely(!vcpu->requests)) + if (likely(!kvm_request_pending(vcpu))) return; if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 71d8856ade64..74805035edc8 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -2337,7 +2337,7 @@ static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu) int ret = 0; int i; - if (!vcpu->requests) + if (!kvm_request_pending(vcpu)) return 0; if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 0344e575f522..a37fe3d1ee2f 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -3,7 +3,7 @@ # lib-y += bitops.o csum_partial.o delay.o memcpy.o memset.o \ - mips-atomic.o strlen_user.o strncpy_user.o \ + mips-atomic.o strncpy_user.o \ strnlen_user.o uncached.o obj-y += iomap.o diff --git a/arch/mips/lib/strlen_user.S b/arch/mips/lib/strlen_user.S deleted file mode 100644 index 40be22625bc5..000000000000 --- a/arch/mips/lib/strlen_user.S +++ /dev/null @@ -1,65 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1996, 1998, 1999, 2004 by Ralf Baechle - * Copyright (C) 1999 Silicon Graphics, Inc. - * Copyright (C) 2011 MIPS Technologies, Inc. - */ -#include <asm/asm.h> -#include <asm/asm-offsets.h> -#include <asm/export.h> -#include <asm/regdef.h> - -#define EX(insn,reg,addr,handler) \ -9: insn reg, addr; \ - .section __ex_table,"a"; \ - PTR 9b, handler; \ - .previous - -/* - * Return the size of a string (including the ending 0) - * - * Return 0 for error - */ - .macro __BUILD_STRLEN_ASM func -LEAF(__strlen_\func\()_asm) - LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? - and v0, a0 - bnez v0, .Lfault\@ - - move v0, a0 -.ifeqs "\func", "kernel" -1: EX(lbu, v1, (v0), .Lfault\@) -.else -1: EX(lbue, v1, (v0), .Lfault\@) -.endif - PTR_ADDIU v0, 1 - bnez v1, 1b - PTR_SUBU v0, a0 - jr ra - END(__strlen_\func\()_asm) - -.Lfault\@: move v0, zero - jr ra - .endm - -#ifndef CONFIG_EVA - /* Set aliases */ - .global __strlen_user_asm - .set __strlen_user_asm, __strlen_kernel_asm -EXPORT_SYMBOL(__strlen_user_asm) -#endif - -__BUILD_STRLEN_ASM kernel -EXPORT_SYMBOL(__strlen_kernel_asm) - -#ifdef CONFIG_EVA - - .set push - .set eva -__BUILD_STRLEN_ASM user - .set pop -EXPORT_SYMBOL(__strlen_user_asm) -#endif diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S index 5267ca800b84..acdff66bd5d2 100644 --- a/arch/mips/lib/strncpy_user.S +++ b/arch/mips/lib/strncpy_user.S @@ -35,7 +35,6 @@ LEAF(__strncpy_from_\func\()_asm) and v0, a1 bnez v0, .Lfault\@ -FEXPORT(__strncpy_from_\func\()_nocheck_asm) move t0, zero move v1, a1 .ifeqs "\func","kernel" @@ -70,16 +69,12 @@ FEXPORT(__strncpy_from_\func\()_nocheck_asm) #ifndef CONFIG_EVA /* Set aliases */ .global __strncpy_from_user_asm - .global __strncpy_from_user_nocheck_asm .set __strncpy_from_user_asm, __strncpy_from_kernel_asm - .set __strncpy_from_user_nocheck_asm, __strncpy_from_kernel_nocheck_asm EXPORT_SYMBOL(__strncpy_from_user_asm) -EXPORT_SYMBOL(__strncpy_from_user_nocheck_asm) #endif __BUILD_STRNCPY_ASM kernel EXPORT_SYMBOL(__strncpy_from_kernel_asm) -EXPORT_SYMBOL(__strncpy_from_kernel_nocheck_asm) #ifdef CONFIG_EVA .set push @@ -87,5 +82,4 @@ EXPORT_SYMBOL(__strncpy_from_kernel_nocheck_asm) __BUILD_STRNCPY_ASM user .set pop EXPORT_SYMBOL(__strncpy_from_user_asm) -EXPORT_SYMBOL(__strncpy_from_user_nocheck_asm) #endif diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S index 860ea99fd70c..e1bacf5a3abe 100644 --- a/arch/mips/lib/strnlen_user.S +++ b/arch/mips/lib/strnlen_user.S @@ -32,7 +32,6 @@ LEAF(__strnlen_\func\()_asm) and v0, a0 bnez v0, .Lfault\@ -FEXPORT(__strnlen_\func\()_nocheck_asm) move v0, a0 PTR_ADDU a1, a0 # stop pointer 1: @@ -68,16 +67,12 @@ FEXPORT(__strnlen_\func\()_nocheck_asm) #ifndef CONFIG_EVA /* Set aliases */ .global __strnlen_user_asm - .global __strnlen_user_nocheck_asm .set __strnlen_user_asm, __strnlen_kernel_asm - .set __strnlen_user_nocheck_asm, __strnlen_kernel_nocheck_asm EXPORT_SYMBOL(__strnlen_user_asm) -EXPORT_SYMBOL(__strnlen_user_nocheck_asm) #endif __BUILD_STRNLEN_ASM kernel EXPORT_SYMBOL(__strnlen_kernel_asm) -EXPORT_SYMBOL(__strnlen_kernel_nocheck_asm) #ifdef CONFIG_EVA @@ -86,5 +81,4 @@ EXPORT_SYMBOL(__strnlen_kernel_nocheck_asm) __BUILD_STRNLEN_ASM user .set pop EXPORT_SYMBOL(__strnlen_user_asm) -EXPORT_SYMBOL(__strnlen_user_nocheck_asm) #endif diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c index 178ca17a5667..34486c138206 100644 --- a/arch/mips/loongson64/common/dma-swiotlb.c +++ b/arch/mips/loongson64/common/dma-swiotlb.c @@ -75,19 +75,11 @@ static void loongson_dma_sync_sg_for_device(struct device *dev, mb(); } -static int loongson_dma_set_mask(struct device *dev, u64 mask) +static int loongson_dma_supported(struct device *dev, u64 mask) { - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - - if (mask > DMA_BIT_MASK(loongson_sysconf.dma_mask_bits)) { - *dev->dma_mask = DMA_BIT_MASK(loongson_sysconf.dma_mask_bits); - return -EIO; - } - - *dev->dma_mask = mask; - - return 0; + if (mask > DMA_BIT_MASK(loongson_sysconf.dma_mask_bits)) + return 0; + return swiotlb_dma_supported(dev, mask); } dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) @@ -126,8 +118,7 @@ static const struct dma_map_ops loongson_dma_map_ops = { .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = loongson_dma_sync_sg_for_device, .mapping_error = swiotlb_dma_mapping_error, - .dma_supported = swiotlb_dma_supported, - .set_dma_mask = loongson_dma_set_mask + .dma_supported = loongson_dma_supported, }; void __init plat_swiotlb_setup(void) diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h index c6966474827f..5af468fd1359 100644 --- a/arch/mn10300/include/asm/uaccess.h +++ b/arch/mn10300/include/asm/uaccess.h @@ -290,9 +290,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) } extern long strncpy_from_user(char *dst, const char __user *src, long count); -extern long __strncpy_from_user(char *dst, const char __user *src, long count); extern long strnlen_user(const char __user *str, long n); -#define strlen_user(str) strnlen_user(str, ~0UL >> 1) extern unsigned long clear_user(void __user *mem, unsigned long len); extern unsigned long __clear_user(void __user *mem, unsigned long len); diff --git a/arch/mn10300/kernel/mn10300_ksyms.c b/arch/mn10300/kernel/mn10300_ksyms.c index 5e9f919635f0..66fb68d0ca8a 100644 --- a/arch/mn10300/kernel/mn10300_ksyms.c +++ b/arch/mn10300/kernel/mn10300_ksyms.c @@ -23,7 +23,6 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(clear_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(strnlen_user); diff --git a/arch/mn10300/lib/usercopy.c b/arch/mn10300/lib/usercopy.c index cece1799cc32..39626912de98 100644 --- a/arch/mn10300/lib/usercopy.c +++ b/arch/mn10300/lib/usercopy.c @@ -50,14 +50,6 @@ do { \ } while (0) long -__strncpy_from_user(char *dst, const char *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} - -long strncpy_from_user(char *dst, const char *src, long count) { long res = -EFAULT; diff --git a/arch/openrisc/include/asm/dma-mapping.h b/arch/openrisc/include/asm/dma-mapping.h index 0c0075f17145..f41bd3cb76d9 100644 --- a/arch/openrisc/include/asm/dma-mapping.h +++ b/arch/openrisc/include/asm/dma-mapping.h @@ -26,8 +26,6 @@ #include <linux/kmemcheck.h> #include <linux/dma-mapping.h> -#define DMA_ERROR_CODE (~(dma_addr_t)0x0) - extern const struct dma_map_ops or1k_dma_map_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) @@ -35,11 +33,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return &or1k_dma_map_ops; } -#define HAVE_ARCH_DMA_SUPPORTED 1 -static inline int dma_supported(struct device *dev, u64 dma_mask) -{ - /* Support 32 bit DMA mask exclusively */ - return dma_mask == DMA_BIT_MASK(32); -} - #endif /* __ASM_OPENRISC_DMA_MAPPING_H */ diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index a557a7cd0232..bbf5c79cce7a 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -264,7 +264,6 @@ clear_user(void *addr, unsigned long size) extern long strncpy_from_user(char *dest, const char __user *src, long count); -extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); #endif /* __ASM_OPENRISC_UACCESS_H */ diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index c3e114f67485..1fd962a07f52 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -209,7 +209,6 @@ extern long lstrnlen_user(const char __user *, long); #define user_addr_max() (~0UL) #define strnlen_user lstrnlen_user -#define strlen_user(str) lstrnlen_user(str, 0x7fffffffL) #define clear_user lclear_user #define __clear_user lclear_user diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index 5f4c68daa261..7dc31c84dd37 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -156,7 +156,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_LLSEEK #define __ARCH_WANT_SYS_NICE -#define __ARCH_WANT_SYS_OLD_GETRLIMIT #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 4f2df589ec1d..f256e1d14a14 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -109,7 +109,6 @@ struct compat_statfs { int f_spare[4]; }; -#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff #define COMPAT_RLIM_INFINITY 0xffffffff typedef u32 compat_old_sigset_t; diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 181a095468e4..eaece3d3e225 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -17,10 +17,6 @@ #include <asm/io.h> #include <asm/swiotlb.h> -#ifdef CONFIG_PPC64 -#define DMA_ERROR_CODE (~(dma_addr_t)0x0) -#endif - /* Some dma direct funcs must be visible for use in other dma_ops */ extern void *__dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, @@ -116,7 +112,6 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off) #define HAVE_ARCH_DMA_SET_MASK 1 extern int dma_set_mask(struct device *dev, u64 dma_mask); -extern int __dma_set_mask(struct device *dev, u64 dma_mask); extern u64 __dma_get_required_mask(struct device *dev); static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 8a8ce220d7d0..20febe0b7f32 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -139,6 +139,8 @@ struct scatterlist; #ifdef CONFIG_PPC64 +#define IOMMU_MAPPING_ERROR (~(dma_addr_t)0x0) + static inline void set_iommu_table_base(struct device *dev, struct iommu_table *base) { @@ -238,6 +240,8 @@ static inline int __init tce_iommu_bus_notifier_init(void) } #endif /* !CONFIG_IOMMU_API */ +int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr); + #else static inline void *get_iommu_table_base(struct device *dev) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 2bf35017ffc0..b8d5b8e35244 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -86,7 +86,6 @@ struct kvmppc_vcore { u16 last_cpu; u8 vcore_state; u8 in_guest; - struct kvmppc_vcore *master_vcore; struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS]; struct list_head preempt_list; spinlock_t lock; diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index b148496ffe36..7cea76f11c26 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -81,7 +81,7 @@ struct kvm_split_mode { u8 subcore_size; u8 do_nap; u8 napped[MAX_SMT_THREADS]; - struct kvmppc_vcore *master_vcs[MAX_SUBCORES]; + struct kvmppc_vcore *vc[MAX_SUBCORES]; }; /* diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 9c51ac4b8f36..8b3f1238d07f 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -35,6 +35,7 @@ #include <asm/page.h> #include <asm/cacheflush.h> #include <asm/hvcall.h> +#include <asm/mce.h> #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS @@ -52,8 +53,8 @@ #define KVM_IRQCHIP_NUM_PINS 256 /* PPC-specific vcpu->requests bit members */ -#define KVM_REQ_WATCHDOG 8 -#define KVM_REQ_EPR_EXIT 9 +#define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0) +#define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1) #include <linux/mmu_notifier.h> @@ -267,6 +268,8 @@ struct kvm_resize_hpt; struct kvm_arch { unsigned int lpid; + unsigned int smt_mode; /* # vcpus per virtual core */ + unsigned int emul_smt_mode; /* emualted SMT mode, on P9 */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE unsigned int tlb_sets; struct kvm_hpt_info hpt; @@ -285,6 +288,7 @@ struct kvm_arch { cpumask_t need_tlb_flush; cpumask_t cpu_in_guest; u8 radix; + u8 fwnmi_enabled; pgd_t *pgtable; u64 process_table; struct dentry *debugfs_dir; @@ -566,6 +570,7 @@ struct kvm_vcpu_arch { ulong wort; ulong tid; ulong psscr; + ulong hfscr; ulong shadow_srr1; #endif u32 vrsave; /* also USPRG0 */ @@ -579,7 +584,7 @@ struct kvm_vcpu_arch { ulong mcsrr0; ulong mcsrr1; ulong mcsr; - u32 dec; + ulong dec; #ifdef CONFIG_BOOKE u32 decar; #endif @@ -710,6 +715,7 @@ struct kvm_vcpu_arch { unsigned long pending_exceptions; u8 ceded; u8 prodded; + u8 doorbell_request; u32 last_inst; struct swait_queue_head *wqp; @@ -722,6 +728,7 @@ struct kvm_vcpu_arch { int prev_cpu; bool timer_running; wait_queue_head_t cpu_run; + struct machine_check_event mce_evt; /* Valid if trap == 0x200 */ struct kvm_vcpu_arch_shared *shared; #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e0d88c38602b..ba5fadd6f3c9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -315,6 +315,8 @@ struct kvmppc_ops { struct irq_bypass_producer *); int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg); int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); + int (*set_smt_mode)(struct kvm *kvm, unsigned long mode, + unsigned long flags); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 3a8d278e7421..1a9b45198c06 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -103,6 +103,8 @@ #define OP_31_XOP_STBUX 247 #define OP_31_XOP_LHZX 279 #define OP_31_XOP_LHZUX 311 +#define OP_31_XOP_MSGSNDP 142 +#define OP_31_XOP_MSGCLRP 174 #define OP_31_XOP_MFSPR 339 #define OP_31_XOP_LWAX 341 #define OP_31_XOP_LHAX 343 diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 41e88d3ce36b..4cf57f2126e6 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -340,7 +340,6 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) } extern long strncpy_from_user(char *dst, const char __user *src, long count); -extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); #endif /* _ARCH_POWERPC_UACCESS_H */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 07fbeb927834..8cf8f0c96906 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -60,6 +60,12 @@ struct kvm_regs { #define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */ +/* flags for kvm_run.flags */ +#define KVM_RUN_PPC_NMI_DISP_MASK (3 << 0) +#define KVM_RUN_PPC_NMI_DISP_FULLY_RECOV (1 << 0) +#define KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV (2 << 0) +#define KVM_RUN_PPC_NMI_DISP_NOT_RECOV (3 << 0) + /* * Feature bits indicate which sections of the sregs struct are valid, * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 709e23425317..ae8e89e0d083 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -485,6 +485,7 @@ int main(void) OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); OFFSET(KVM_RADIX, kvm, arch.radix); + OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled); OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); @@ -513,6 +514,7 @@ int main(void) OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded); + OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc); @@ -542,6 +544,7 @@ int main(void) OFFSET(VCPU_WORT, kvm_vcpu, arch.wort); OFFSET(VCPU_TID, kvm_vcpu, arch.tid); OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr); + OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr); OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map); OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest); OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads); diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index fb7cbaa37658..8f7abf9baa63 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -105,6 +105,11 @@ static u64 dma_iommu_get_required_mask(struct device *dev) return mask; } +int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == IOMMU_MAPPING_ERROR; +} + struct dma_map_ops dma_iommu_ops = { .alloc = dma_iommu_alloc_coherent, .free = dma_iommu_free_coherent, @@ -115,5 +120,6 @@ struct dma_map_ops dma_iommu_ops = { .map_page = dma_iommu_map_page, .unmap_page = dma_iommu_unmap_page, .get_required_mask = dma_iommu_get_required_mask, + .mapping_error = dma_iommu_mapping_error, }; EXPORT_SYMBOL(dma_iommu_ops); diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 41c749586bd2..4194bbbbdb10 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -314,18 +314,6 @@ EXPORT_SYMBOL(dma_set_coherent_mask); #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) -int __dma_set_mask(struct device *dev, u64 dma_mask) -{ - const struct dma_map_ops *dma_ops = get_dma_ops(dev); - - if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL)) - return dma_ops->set_dma_mask(dev, dma_mask); - if (!dev->dma_mask || !dma_supported(dev, dma_mask)) - return -EIO; - *dev->dma_mask = dma_mask; - return 0; -} - int dma_set_mask(struct device *dev, u64 dma_mask) { if (ppc_md.dma_set_mask) @@ -338,7 +326,10 @@ int dma_set_mask(struct device *dev, u64 dma_mask) return phb->controller_ops.dma_set_mask(pdev, dma_mask); } - return __dma_set_mask(dev, dma_mask); + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + *dev->dma_mask = dma_mask; + return 0; } EXPORT_SYMBOL(dma_set_mask); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index f2b724cd9e64..233ca3fe4754 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -198,11 +198,11 @@ static unsigned long iommu_range_alloc(struct device *dev, if (unlikely(npages == 0)) { if (printk_ratelimit()) WARN_ON(1); - return DMA_ERROR_CODE; + return IOMMU_MAPPING_ERROR; } if (should_fail_iommu(dev)) - return DMA_ERROR_CODE; + return IOMMU_MAPPING_ERROR; /* * We don't need to disable preemption here because any CPU can @@ -278,7 +278,7 @@ again: } else { /* Give up */ spin_unlock_irqrestore(&(pool->lock), flags); - return DMA_ERROR_CODE; + return IOMMU_MAPPING_ERROR; } } @@ -310,13 +310,13 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, unsigned long attrs) { unsigned long entry; - dma_addr_t ret = DMA_ERROR_CODE; + dma_addr_t ret = IOMMU_MAPPING_ERROR; int build_fail; entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order); - if (unlikely(entry == DMA_ERROR_CODE)) - return DMA_ERROR_CODE; + if (unlikely(entry == IOMMU_MAPPING_ERROR)) + return IOMMU_MAPPING_ERROR; entry += tbl->it_offset; /* Offset into real TCE table */ ret = entry << tbl->it_page_shift; /* Set the return dma address */ @@ -328,12 +328,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, /* tbl->it_ops->set() only returns non-zero for transient errors. * Clean up the table bitmap in this case and return - * DMA_ERROR_CODE. For all other errors the functionality is + * IOMMU_MAPPING_ERROR. For all other errors the functionality is * not altered. */ if (unlikely(build_fail)) { __iommu_free(tbl, ret, npages); - return DMA_ERROR_CODE; + return IOMMU_MAPPING_ERROR; } /* Flush/invalidate TLB caches if necessary */ @@ -478,7 +478,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); /* Handle failure */ - if (unlikely(entry == DMA_ERROR_CODE)) { + if (unlikely(entry == IOMMU_MAPPING_ERROR)) { if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) dev_info(dev, "iommu_alloc failed, tbl %p " @@ -545,7 +545,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, */ if (outcount < incount) { outs = sg_next(outs); - outs->dma_address = DMA_ERROR_CODE; + outs->dma_address = IOMMU_MAPPING_ERROR; outs->dma_length = 0; } @@ -563,7 +563,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, npages = iommu_num_pages(s->dma_address, s->dma_length, IOMMU_PAGE_SIZE(tbl)); __iommu_free(tbl, vaddr, npages); - s->dma_address = DMA_ERROR_CODE; + s->dma_address = IOMMU_MAPPING_ERROR; s->dma_length = 0; } if (s == outs) @@ -777,7 +777,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, unsigned long mask, enum dma_data_direction direction, unsigned long attrs) { - dma_addr_t dma_handle = DMA_ERROR_CODE; + dma_addr_t dma_handle = IOMMU_MAPPING_ERROR; void *vaddr; unsigned long uaddr; unsigned int npages, align; @@ -797,7 +797,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, mask >> tbl->it_page_shift, align, attrs); - if (dma_handle == DMA_ERROR_CODE) { + if (dma_handle == IOMMU_MAPPING_ERROR) { if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { dev_info(dev, "iommu_alloc failed, tbl %p " @@ -869,7 +869,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); - if (mapping == DMA_ERROR_CODE) { + if (mapping == IOMMU_MAPPING_ERROR) { free_pages((unsigned long)ret, order); return NULL; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 5f9eada3519b..a9bfa49f3698 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -405,6 +405,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, break; } } +EXPORT_SYMBOL_GPL(machine_check_print_event_info); uint64_t get_mce_fault_addr(struct machine_check_event *evt) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 773b35d16a0b..0b436df746fc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -46,6 +46,8 @@ #include <linux/of.h> #include <asm/reg.h> +#include <asm/ppc-opcode.h> +#include <asm/disassemble.h> #include <asm/cputable.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> @@ -645,6 +647,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, unsigned long stolen; unsigned long core_stolen; u64 now; + unsigned long flags; dt = vcpu->arch.dtl_ptr; vpa = vcpu->arch.vpa.pinned_addr; @@ -652,10 +655,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, core_stolen = vcore_stolen_time(vc, now); stolen = core_stolen - vcpu->arch.stolen_logged; vcpu->arch.stolen_logged = core_stolen; - spin_lock_irq(&vcpu->arch.tbacct_lock); + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); stolen += vcpu->arch.busy_stolen; vcpu->arch.busy_stolen = 0; - spin_unlock_irq(&vcpu->arch.tbacct_lock); + spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); if (!dt || !vpa) return; memset(dt, 0, sizeof(struct dtl_entry)); @@ -675,6 +678,26 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, vcpu->arch.dtl.dirty = true; } +/* See if there is a doorbell interrupt pending for a vcpu */ +static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) +{ + int thr; + struct kvmppc_vcore *vc; + + if (vcpu->arch.doorbell_request) + return true; + /* + * Ensure that the read of vcore->dpdes comes after the read + * of vcpu->doorbell_request. This barrier matches the + * lwsync in book3s_hv_rmhandlers.S just before the + * fast_guest_return label. + */ + smp_rmb(); + vc = vcpu->arch.vcore; + thr = vcpu->vcpu_id - vc->first_vcpuid; + return !!(vc->dpdes & (1 << thr)); +} + static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu) { if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207) @@ -926,6 +949,101 @@ static int kvmppc_emulate_debug_inst(struct kvm_run *run, } } +static void do_nothing(void *x) +{ +} + +static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu) +{ + int thr, cpu, pcpu, nthreads; + struct kvm_vcpu *v; + unsigned long dpdes; + + nthreads = vcpu->kvm->arch.emul_smt_mode; + dpdes = 0; + cpu = vcpu->vcpu_id & ~(nthreads - 1); + for (thr = 0; thr < nthreads; ++thr, ++cpu) { + v = kvmppc_find_vcpu(vcpu->kvm, cpu); + if (!v) + continue; + /* + * If the vcpu is currently running on a physical cpu thread, + * interrupt it in order to pull it out of the guest briefly, + * which will update its vcore->dpdes value. + */ + pcpu = READ_ONCE(v->cpu); + if (pcpu >= 0) + smp_call_function_single(pcpu, do_nothing, NULL, 1); + if (kvmppc_doorbell_pending(v)) + dpdes |= 1 << thr; + } + return dpdes; +} + +/* + * On POWER9, emulate doorbell-related instructions in order to + * give the guest the illusion of running on a multi-threaded core. + * The instructions emulated are msgsndp, msgclrp, mfspr TIR, + * and mfspr DPDES. + */ +static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) +{ + u32 inst, rb, thr; + unsigned long arg; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tvcpu; + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return EMULATE_FAIL; + if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE) + return RESUME_GUEST; + if (get_op(inst) != 31) + return EMULATE_FAIL; + rb = get_rb(inst); + thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1); + switch (get_xop(inst)) { + case OP_31_XOP_MSGSNDP: + arg = kvmppc_get_gpr(vcpu, rb); + if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER) + break; + arg &= 0x3f; + if (arg >= kvm->arch.emul_smt_mode) + break; + tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg); + if (!tvcpu) + break; + if (!tvcpu->arch.doorbell_request) { + tvcpu->arch.doorbell_request = 1; + kvmppc_fast_vcpu_kick_hv(tvcpu); + } + break; + case OP_31_XOP_MSGCLRP: + arg = kvmppc_get_gpr(vcpu, rb); + if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER) + break; + vcpu->arch.vcore->dpdes = 0; + vcpu->arch.doorbell_request = 0; + break; + case OP_31_XOP_MFSPR: + switch (get_sprn(inst)) { + case SPRN_TIR: + arg = thr; + break; + case SPRN_DPDES: + arg = kvmppc_read_dpdes(vcpu); + break; + default: + return EMULATE_FAIL; + } + kvmppc_set_gpr(vcpu, get_rt(inst), arg); + break; + default: + return EMULATE_FAIL; + } + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); + return RESUME_GUEST; +} + static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -971,15 +1089,20 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_MACHINE_CHECK: - /* - * Deliver a machine check interrupt to the guest. - * We have to do this, even if the host has handled the - * machine check, because machine checks use SRR0/1 and - * the interrupt might have trashed guest state in them. - */ - kvmppc_book3s_queue_irqprio(vcpu, - BOOK3S_INTERRUPT_MACHINE_CHECK); - r = RESUME_GUEST; + /* Exit to guest with KVM_EXIT_NMI as exit reason */ + run->exit_reason = KVM_EXIT_NMI; + run->hw.hardware_exit_reason = vcpu->arch.trap; + /* Clear out the old NMI status from run->flags */ + run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK; + /* Now set the NMI status */ + if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED) + run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV; + else + run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV; + + r = RESUME_HOST; + /* Print the MCE event to host console. */ + machine_check_print_event_info(&vcpu->arch.mce_evt, false); break; case BOOK3S_INTERRUPT_PROGRAM: { @@ -1048,12 +1171,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, break; /* * This occurs if the guest (kernel or userspace), does something that - * is prohibited by HFSCR. We just generate a program interrupt to - * the guest. + * is prohibited by HFSCR. + * On POWER9, this could be a doorbell instruction that we need + * to emulate. + * Otherwise, we just generate a program interrupt to the guest. */ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - r = RESUME_GUEST; + r = EMULATE_FAIL; + if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) + r = kvmppc_emulate_doorbell_instr(vcpu); + if (r == EMULATE_FAIL) { + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + r = RESUME_GUEST; + } break; case BOOK3S_INTERRUPT_HV_RM_HARD: r = RESUME_PASSTHROUGH; @@ -1143,6 +1273,12 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; if (cpu_has_feature(CPU_FTR_ARCH_207S)) mask |= LPCR_AIL; + /* + * On POWER9, allow userspace to enable large decrementer for the + * guest, whether or not the host has it enabled. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + mask |= LPCR_LD; /* Broken 32-bit version of LPCR must not clear top bits */ if (preserve_top32) @@ -1611,7 +1747,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) init_swait_queue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; - vcore->first_vcpuid = core * threads_per_vcore(); + vcore->first_vcpuid = core * kvm->arch.smt_mode; vcore->kvm = kvm; INIT_LIST_HEAD(&vcore->preempt_list); @@ -1770,14 +1906,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; - int err = -EINVAL; + int err; int core; struct kvmppc_vcore *vcore; - core = id / threads_per_vcore(); - if (core >= KVM_MAX_VCORES) - goto out; - err = -ENOMEM; vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu) @@ -1808,6 +1940,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, vcpu->arch.busy_preempt = TB_NIL; vcpu->arch.intr_msr = MSR_SF | MSR_ME; + /* + * Set the default HFSCR for the guest from the host value. + * This value is only used on POWER9. + * On POWER9 DD1, TM doesn't work, so we make sure to + * prevent the guest from using it. + * On POWER9, we want to virtualize the doorbell facility, so we + * turn off the HFSCR bit, which causes those instructions to trap. + */ + vcpu->arch.hfscr = mfspr(SPRN_HFSCR); + if (!cpu_has_feature(CPU_FTR_TM)) + vcpu->arch.hfscr &= ~HFSCR_TM; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + vcpu->arch.hfscr &= ~HFSCR_MSGP; + kvmppc_mmu_book3s_hv_init(vcpu); vcpu->arch.state = KVMPPC_VCPU_NOTREADY; @@ -1815,11 +1961,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, init_waitqueue_head(&vcpu->arch.cpu_run); mutex_lock(&kvm->lock); - vcore = kvm->arch.vcores[core]; - if (!vcore) { - vcore = kvmppc_vcore_create(kvm, core); - kvm->arch.vcores[core] = vcore; - kvm->arch.online_vcores++; + vcore = NULL; + err = -EINVAL; + core = id / kvm->arch.smt_mode; + if (core < KVM_MAX_VCORES) { + vcore = kvm->arch.vcores[core]; + if (!vcore) { + err = -ENOMEM; + vcore = kvmppc_vcore_create(kvm, core); + kvm->arch.vcores[core] = vcore; + kvm->arch.online_vcores++; + } } mutex_unlock(&kvm->lock); @@ -1847,6 +1999,43 @@ out: return ERR_PTR(err); } +static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode, + unsigned long flags) +{ + int err; + int esmt = 0; + + if (flags) + return -EINVAL; + if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode)) + return -EINVAL; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + /* + * On POWER8 (or POWER7), the threading mode is "strict", + * so we pack smt_mode vcpus per vcore. + */ + if (smt_mode > threads_per_subcore) + return -EINVAL; + } else { + /* + * On POWER9, the threading mode is "loose", + * so each vcpu gets its own vcore. + */ + esmt = smt_mode; + smt_mode = 1; + } + mutex_lock(&kvm->lock); + err = -EBUSY; + if (!kvm->arch.online_vcores) { + kvm->arch.smt_mode = smt_mode; + kvm->arch.emul_smt_mode = esmt; + err = 0; + } + mutex_unlock(&kvm->lock); + + return err; +} + static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa) { if (vpa->pinned_addr) @@ -1897,7 +2086,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) } } -extern void __kvmppc_vcore_entry(void); +extern int __kvmppc_vcore_entry(void); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, struct kvm_vcpu *vcpu) @@ -1962,10 +2151,6 @@ static void kvmppc_release_hwthread(int cpu) tpaca->kvm_hstate.kvm_split_mode = NULL; } -static void do_nothing(void *x) -{ -} - static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) { int i; @@ -1983,11 +2168,35 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) smp_call_function_single(cpu + i, do_nothing, NULL, 1); } +static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) +{ + struct kvm *kvm = vcpu->kvm; + + /* + * With radix, the guest can do TLB invalidations itself, + * and it could choose to use the local form (tlbiel) if + * it is invalidating a translation that has only ever been + * used on one vcpu. However, that doesn't mean it has + * only ever been used on one physical cpu, since vcpus + * can move around between pcpus. To cope with this, when + * a vcpu moves from one pcpu to another, we need to tell + * any vcpus running on the same core as this vcpu previously + * ran to flush the TLB. The TLB is shared between threads, + * so we use a single bit in .need_tlb_flush for all 4 threads. + */ + if (vcpu->arch.prev_cpu != pcpu) { + if (vcpu->arch.prev_cpu >= 0 && + cpu_first_thread_sibling(vcpu->arch.prev_cpu) != + cpu_first_thread_sibling(pcpu)) + radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu); + vcpu->arch.prev_cpu = pcpu; + } +} + static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) { int cpu; struct paca_struct *tpaca; - struct kvmppc_vcore *mvc = vc->master_vcore; struct kvm *kvm = vc->kvm; cpu = vc->pcpu; @@ -1997,36 +2206,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) vcpu->arch.timer_running = 0; } cpu += vcpu->arch.ptid; - vcpu->cpu = mvc->pcpu; + vcpu->cpu = vc->pcpu; vcpu->arch.thread_cpu = cpu; - - /* - * With radix, the guest can do TLB invalidations itself, - * and it could choose to use the local form (tlbiel) if - * it is invalidating a translation that has only ever been - * used on one vcpu. However, that doesn't mean it has - * only ever been used on one physical cpu, since vcpus - * can move around between pcpus. To cope with this, when - * a vcpu moves from one pcpu to another, we need to tell - * any vcpus running on the same core as this vcpu previously - * ran to flush the TLB. The TLB is shared between threads, - * so we use a single bit in .need_tlb_flush for all 4 threads. - */ - if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) { - if (vcpu->arch.prev_cpu >= 0 && - cpu_first_thread_sibling(vcpu->arch.prev_cpu) != - cpu_first_thread_sibling(cpu)) - radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu); - vcpu->arch.prev_cpu = cpu; - } cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); } tpaca = &paca[cpu]; tpaca->kvm_hstate.kvm_vcpu = vcpu; - tpaca->kvm_hstate.ptid = cpu - mvc->pcpu; + tpaca->kvm_hstate.ptid = cpu - vc->pcpu; /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */ smp_wmb(); - tpaca->kvm_hstate.kvm_vcore = mvc; + tpaca->kvm_hstate.kvm_vcore = vc; if (cpu != smp_processor_id()) kvmppc_ipi_thread(cpu); } @@ -2155,8 +2344,7 @@ struct core_info { int max_subcore_threads; int total_threads; int subcore_threads[MAX_SUBCORES]; - struct kvm *subcore_vm[MAX_SUBCORES]; - struct list_head vcs[MAX_SUBCORES]; + struct kvmppc_vcore *vc[MAX_SUBCORES]; }; /* @@ -2167,17 +2355,12 @@ static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 }; static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc) { - int sub; - memset(cip, 0, sizeof(*cip)); cip->n_subcores = 1; cip->max_subcore_threads = vc->num_threads; cip->total_threads = vc->num_threads; cip->subcore_threads[0] = vc->num_threads; - cip->subcore_vm[0] = vc->kvm; - for (sub = 0; sub < MAX_SUBCORES; ++sub) - INIT_LIST_HEAD(&cip->vcs[sub]); - list_add_tail(&vc->preempt_list, &cip->vcs[0]); + cip->vc[0] = vc; } static bool subcore_config_ok(int n_subcores, int n_threads) @@ -2197,9 +2380,8 @@ static bool subcore_config_ok(int n_subcores, int n_threads) return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS; } -static void init_master_vcore(struct kvmppc_vcore *vc) +static void init_vcore_to_run(struct kvmppc_vcore *vc) { - vc->master_vcore = vc; vc->entry_exit_map = 0; vc->in_guest = 0; vc->napping_threads = 0; @@ -2224,9 +2406,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) ++cip->n_subcores; cip->total_threads += vc->num_threads; cip->subcore_threads[sub] = vc->num_threads; - cip->subcore_vm[sub] = vc->kvm; - init_master_vcore(vc); - list_move_tail(&vc->preempt_list, &cip->vcs[sub]); + cip->vc[sub] = vc; + init_vcore_to_run(vc); + list_del_init(&vc->preempt_list); return true; } @@ -2294,6 +2476,18 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) spin_unlock(&lp->lock); } +static bool recheck_signals(struct core_info *cip) +{ + int sub, i; + struct kvm_vcpu *vcpu; + + for (sub = 0; sub < cip->n_subcores; ++sub) + for_each_runnable_thread(i, vcpu, cip->vc[sub]) + if (signal_pending(vcpu->arch.run_task)) + return true; + return false; +} + static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) { int still_running = 0, i; @@ -2331,7 +2525,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) wake_up(&vcpu->arch.cpu_run); } } - list_del_init(&vc->preempt_list); if (!is_master) { if (still_running > 0) { kvmppc_vcore_preempt(vc); @@ -2393,6 +2586,21 @@ static inline int kvmppc_set_host_core(unsigned int cpu) return 0; } +static void set_irq_happened(int trap) +{ + switch (trap) { + case BOOK3S_INTERRUPT_EXTERNAL: + local_paca->irq_happened |= PACA_IRQ_EE; + break; + case BOOK3S_INTERRUPT_H_DOORBELL: + local_paca->irq_happened |= PACA_IRQ_DBELL; + break; + case BOOK3S_INTERRUPT_HMI: + local_paca->irq_happened |= PACA_IRQ_HMI; + break; + } +} + /* * Run a set of guest threads on a physical core. * Called with vc->lock held. @@ -2403,7 +2611,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) int i; int srcu_idx; struct core_info core_info; - struct kvmppc_vcore *pvc, *vcnext; + struct kvmppc_vcore *pvc; struct kvm_split_mode split_info, *sip; int split, subcore_size, active; int sub; @@ -2412,6 +2620,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) int pcpu, thr; int target_threads; int controlled_threads; + int trap; /* * Remove from the list any threads that have a signal pending @@ -2426,7 +2635,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) /* * Initialize *vc. */ - init_master_vcore(vc); + init_vcore_to_run(vc); vc->preempt_tb = TB_NIL; /* @@ -2463,6 +2672,43 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) if (vc->num_threads < target_threads) collect_piggybacks(&core_info, target_threads); + /* + * On radix, arrange for TLB flushing if necessary. + * This has to be done before disabling interrupts since + * it uses smp_call_function(). + */ + pcpu = smp_processor_id(); + if (kvm_is_radix(vc->kvm)) { + for (sub = 0; sub < core_info.n_subcores; ++sub) + for_each_runnable_thread(i, vcpu, core_info.vc[sub]) + kvmppc_prepare_radix_vcpu(vcpu, pcpu); + } + + /* + * Hard-disable interrupts, and check resched flag and signals. + * If we need to reschedule or deliver a signal, clean up + * and return without going into the guest(s). + */ + local_irq_disable(); + hard_irq_disable(); + if (lazy_irq_pending() || need_resched() || + recheck_signals(&core_info)) { + local_irq_enable(); + vc->vcore_state = VCORE_INACTIVE; + /* Unlock all except the primary vcore */ + for (sub = 1; sub < core_info.n_subcores; ++sub) { + pvc = core_info.vc[sub]; + /* Put back on to the preempted vcores list */ + kvmppc_vcore_preempt(pvc); + spin_unlock(&pvc->lock); + } + for (i = 0; i < controlled_threads; ++i) + kvmppc_release_hwthread(pcpu + i); + return; + } + + kvmppc_clear_host_core(pcpu); + /* Decide on micro-threading (split-core) mode */ subcore_size = threads_per_subcore; cmd_bit = stat_bit = 0; @@ -2486,13 +2732,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) split_info.ldbar = mfspr(SPRN_LDBAR); split_info.subcore_size = subcore_size; for (sub = 0; sub < core_info.n_subcores; ++sub) - split_info.master_vcs[sub] = - list_first_entry(&core_info.vcs[sub], - struct kvmppc_vcore, preempt_list); + split_info.vc[sub] = core_info.vc[sub]; /* order writes to split_info before kvm_split_mode pointer */ smp_wmb(); } - pcpu = smp_processor_id(); for (thr = 0; thr < controlled_threads; ++thr) paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; @@ -2512,32 +2755,29 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) } } - kvmppc_clear_host_core(pcpu); - /* Start all the threads */ active = 0; for (sub = 0; sub < core_info.n_subcores; ++sub) { thr = subcore_thread_map[sub]; thr0_done = false; active |= 1 << thr; - list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) { - pvc->pcpu = pcpu + thr; - for_each_runnable_thread(i, vcpu, pvc) { - kvmppc_start_thread(vcpu, pvc); - kvmppc_create_dtl_entry(vcpu, pvc); - trace_kvm_guest_enter(vcpu); - if (!vcpu->arch.ptid) - thr0_done = true; - active |= 1 << (thr + vcpu->arch.ptid); - } - /* - * We need to start the first thread of each subcore - * even if it doesn't have a vcpu. - */ - if (pvc->master_vcore == pvc && !thr0_done) - kvmppc_start_thread(NULL, pvc); - thr += pvc->num_threads; + pvc = core_info.vc[sub]; + pvc->pcpu = pcpu + thr; + for_each_runnable_thread(i, vcpu, pvc) { + kvmppc_start_thread(vcpu, pvc); + kvmppc_create_dtl_entry(vcpu, pvc); + trace_kvm_guest_enter(vcpu); + if (!vcpu->arch.ptid) + thr0_done = true; + active |= 1 << (thr + vcpu->arch.ptid); } + /* + * We need to start the first thread of each subcore + * even if it doesn't have a vcpu. + */ + if (!thr0_done) + kvmppc_start_thread(NULL, pvc); + thr += pvc->num_threads; } /* @@ -2564,17 +2804,27 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) trace_kvmppc_run_core(vc, 0); for (sub = 0; sub < core_info.n_subcores; ++sub) - list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) - spin_unlock(&pvc->lock); + spin_unlock(&core_info.vc[sub]->lock); + + /* + * Interrupts will be enabled once we get into the guest, + * so tell lockdep that we're about to enable interrupts. + */ + trace_hardirqs_on(); guest_enter(); srcu_idx = srcu_read_lock(&vc->kvm->srcu); - __kvmppc_vcore_entry(); + trap = __kvmppc_vcore_entry(); srcu_read_unlock(&vc->kvm->srcu, srcu_idx); + guest_exit(); + + trace_hardirqs_off(); + set_irq_happened(trap); + spin_lock(&vc->lock); /* prevent other vcpu threads from doing kvmppc_start_thread() now */ vc->vcore_state = VCORE_EXITING; @@ -2602,6 +2852,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) split_info.do_nap = 0; } + kvmppc_set_host_core(pcpu); + + local_irq_enable(); + /* Let secondaries go back to the offline loop */ for (i = 0; i < controlled_threads; ++i) { kvmppc_release_hwthread(pcpu + i); @@ -2610,18 +2864,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest); } - kvmppc_set_host_core(pcpu); - spin_unlock(&vc->lock); /* make sure updates to secondary vcpu structs are visible now */ smp_mb(); - guest_exit(); - for (sub = 0; sub < core_info.n_subcores; ++sub) - list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub], - preempt_list) - post_guest_process(pvc, pvc == vc); + for (sub = 0; sub < core_info.n_subcores; ++sub) { + pvc = core_info.vc[sub]; + post_guest_process(pvc, pvc == vc); + } spin_lock(&vc->lock); preempt_enable(); @@ -2666,6 +2917,30 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) vc->halt_poll_ns /= halt_poll_ns_shrink; } +#ifdef CONFIG_KVM_XICS +static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) +{ + if (!xive_enabled()) + return false; + return vcpu->arch.xive_saved_state.pipr < + vcpu->arch.xive_saved_state.cppr; +} +#else +static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) +{ + return false; +} +#endif /* CONFIG_KVM_XICS */ + +static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.pending_exceptions || vcpu->arch.prodded || + kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu)) + return true; + + return false; +} + /* * Check to see if any of the runnable vcpus on the vcore have pending * exceptions or are no longer ceded @@ -2676,8 +2951,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) int i; for_each_runnable_thread(i, vcpu, vc) { - if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded || - vcpu->arch.prodded) + if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu)) return 1; } @@ -2819,15 +3093,14 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) */ if (!signal_pending(current)) { if (vc->vcore_state == VCORE_PIGGYBACK) { - struct kvmppc_vcore *mvc = vc->master_vcore; - if (spin_trylock(&mvc->lock)) { - if (mvc->vcore_state == VCORE_RUNNING && - !VCORE_IS_EXITING(mvc)) { + if (spin_trylock(&vc->lock)) { + if (vc->vcore_state == VCORE_RUNNING && + !VCORE_IS_EXITING(vc)) { kvmppc_create_dtl_entry(vcpu, vc); kvmppc_start_thread(vcpu, vc); trace_kvm_guest_enter(vcpu); } - spin_unlock(&mvc->lock); + spin_unlock(&vc->lock); } } else if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) { @@ -2863,7 +3136,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) break; n_ceded = 0; for_each_runnable_thread(i, v, vc) { - if (!v->arch.pending_exceptions && !v->arch.prodded) + if (!kvmppc_vcpu_woken(v)) n_ceded += v->arch.ceded; else v->arch.ceded = 0; @@ -3519,6 +3792,19 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvm_hv_vm_activated(); /* + * Initialize smt_mode depending on processor. + * POWER8 and earlier have to use "strict" threading, where + * all vCPUs in a vcore have to run on the same (sub)core, + * whereas on POWER9 the threads can each run a different + * guest. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + kvm->arch.smt_mode = threads_per_subcore; + else + kvm->arch.smt_mode = 1; + kvm->arch.emul_smt_mode = 1; + + /* * Create a debugfs directory for the VM */ snprintf(buf, sizeof(buf), "vm%d", current->pid); @@ -3947,6 +4233,7 @@ static struct kvmppc_ops kvm_ops_hv = { #endif .configure_mmu = kvmhv_configure_mmu, .get_rmmu_info = kvmhv_get_rmmu_info, + .set_smt_mode = kvmhv_set_smt_mode, }; static int kvm_init_subcore_bitmap(void) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index ee4c2558c305..90644db9d38e 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -307,7 +307,7 @@ void kvmhv_commence_exit(int trap) return; for (i = 0; i < MAX_SUBCORES; ++i) { - vc = sip->master_vcs[i]; + vc = sip->vc[i]; if (!vc) break; do { diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 404deb512844..dc54373c8780 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -61,13 +61,6 @@ BEGIN_FTR_SECTION std r3, HSTATE_DABR(r13) END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) - /* Hard-disable interrupts */ - mfmsr r10 - std r10, HSTATE_HOST_MSR(r13) - rldicl r10,r10,48,1 - rotldi r10,r10,16 - mtmsrd r10,1 - /* Save host PMU registers */ BEGIN_FTR_SECTION /* Work around P8 PMAE bug */ @@ -153,6 +146,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) * * R1 = host R1 * R2 = host R2 + * R3 = trap number on this thread * R12 = exit handler id * R13 = PACA */ diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 7ef0993214f3..c356f9a40b24 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -130,12 +130,28 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) out: /* + * For guest that supports FWNMI capability, hook the MCE event into + * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI + * exit reason. On our way to exit we will pull this event from vcpu + * structure and print it from thread 0 of the core/subcore. + * + * For guest that does not support FWNMI capability (old QEMU): * We are now going enter guest either through machine check * interrupt (for unhandled errors) or will continue from * current HSRR0 (for handled errors) in guest. Hence * queue up the event so that we can log it from host console later. */ - machine_check_queue_event(); + if (vcpu->kvm->arch.fwnmi_enabled) { + /* + * Hook up the mce event on to vcpu structure. + * First clear the old event. + */ + memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt)); + if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) { + vcpu->arch.mce_evt = mce_evt; + } + } else + machine_check_queue_event(); return handled; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 4888dd494604..6ea4b53f4b16 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -45,7 +45,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define NAPPING_NOVCPU 2 /* Stack frame offsets for kvmppc_hv_entry */ -#define SFS 144 +#define SFS 160 #define STACK_SLOT_TRAP (SFS-4) #define STACK_SLOT_TID (SFS-16) #define STACK_SLOT_PSSCR (SFS-24) @@ -54,6 +54,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define STACK_SLOT_CIABR (SFS-48) #define STACK_SLOT_DAWR (SFS-56) #define STACK_SLOT_DAWRX (SFS-64) +#define STACK_SLOT_HFSCR (SFS-72) /* * Call kvmppc_hv_entry in real mode. @@ -68,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline) std r0, PPC_LR_STKOFF(r1) stdu r1, -112(r1) mfmsr r10 + std r10, HSTATE_HOST_MSR(r13) LOAD_REG_ADDR(r5, kvmppc_call_hv_entry) li r0,MSR_RI andc r0,r10,r0 @@ -152,20 +154,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) stb r0, HSTATE_HWTHREAD_REQ(r13) /* - * For external and machine check interrupts, we need - * to call the Linux handler to process the interrupt. - * We do that by jumping to absolute address 0x500 for - * external interrupts, or the machine_check_fwnmi label - * for machine checks (since firmware might have patched - * the vector area at 0x200). The [h]rfid at the end of the - * handler will return to the book3s_hv_interrupts.S code. - * For other interrupts we do the rfid to get back - * to the book3s_hv_interrupts.S code here. + * For external interrupts we need to call the Linux + * handler to process the interrupt. We do that by jumping + * to absolute address 0x500 for external interrupts. + * The [h]rfid at the end of the handler will return to + * the book3s_hv_interrupts.S code. For other interrupts + * we do the rfid to get back to the book3s_hv_interrupts.S + * code here. */ ld r8, 112+PPC_LR_STKOFF(r1) addi r1, r1, 112 ld r7, HSTATE_HOST_MSR(r13) + /* Return the trap number on this thread as the return value */ + mr r3, r12 + /* * If we came back from the guest via a relocation-on interrupt, * we will be in virtual mode at this point, which makes it a @@ -175,59 +178,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) andi. r0, r0, MSR_IR /* in real mode? */ bne .Lvirt_return - cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK - cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL - beq 11f - cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL - beq 15f /* Invoke the H_DOORBELL handler */ - cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI - beq cr2, 14f /* HMI check */ - - /* RFI into the highmem handler, or branch to interrupt handler */ + /* RFI into the highmem handler */ mfmsr r6 li r0, MSR_RI andc r6, r6, r0 mtmsrd r6, 1 /* Clear RI in MSR */ mtsrr0 r8 mtsrr1 r7 - beq cr1, 13f /* machine check */ RFI - /* On POWER7, we have external interrupts set to use HSRR0/1 */ -11: mtspr SPRN_HSRR0, r8 - mtspr SPRN_HSRR1, r7 - ba 0x500 - -13: b machine_check_fwnmi - -14: mtspr SPRN_HSRR0, r8 - mtspr SPRN_HSRR1, r7 - b hmi_exception_after_realmode - -15: mtspr SPRN_HSRR0, r8 - mtspr SPRN_HSRR1, r7 - ba 0xe80 - - /* Virtual-mode return - can't get here for HMI or machine check */ + /* Virtual-mode return */ .Lvirt_return: - cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL - beq 16f - cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL - beq 17f - andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */ - beq 18f - mtmsrd r7, 1 /* if so then re-enable them */ -18: mtlr r8 + mtlr r8 blr -16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */ - mtspr SPRN_HSRR1, r7 - b exc_virt_0x4500_hardware_interrupt - -17: mtspr SPRN_HSRR0, r8 - mtspr SPRN_HSRR1, r7 - b exc_virt_0x4e80_h_doorbell - kvmppc_primary_no_guest: /* We handle this much like a ceded vcpu */ /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ @@ -769,6 +733,8 @@ BEGIN_FTR_SECTION std r6, STACK_SLOT_PSSCR(r1) std r7, STACK_SLOT_PID(r1) std r8, STACK_SLOT_IAMR(r1) + mfspr r5, SPRN_HFSCR + std r5, STACK_SLOT_HFSCR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION mfspr r5, SPRN_CIABR @@ -920,8 +886,10 @@ FTR_SECTION_ELSE ld r5, VCPU_TID(r4) ld r6, VCPU_PSSCR(r4) oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */ + ld r7, VCPU_HFSCR(r4) mtspr SPRN_TIDR, r5 mtspr SPRN_PSSCR, r6 + mtspr SPRN_HFSCR, r7 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 8: @@ -936,7 +904,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) mftb r7 subf r3,r7,r8 mtspr SPRN_DEC,r3 - stw r3,VCPU_DEC(r4) + std r3,VCPU_DEC(r4) ld r5, VCPU_SPRG0(r4) ld r6, VCPU_SPRG1(r4) @@ -1048,7 +1016,13 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ li r0, BOOK3S_INTERRUPT_EXTERNAL bne cr1, 12f mfspr r0, SPRN_DEC - cmpwi r0, 0 +BEGIN_FTR_SECTION + /* On POWER9 check whether the guest has large decrementer enabled */ + andis. r8, r8, LPCR_LD@h + bne 15f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + extsw r0, r0 +15: cmpdi r0, 0 li r0, BOOK3S_INTERRUPT_DECREMENTER bge 5f @@ -1058,6 +1032,23 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ mr r9, r4 bl kvmppc_msr_interrupt 5: +BEGIN_FTR_SECTION + b fast_guest_return +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + /* On POWER9, check for pending doorbell requests */ + lbz r0, VCPU_DBELL_REQ(r4) + cmpwi r0, 0 + beq fast_guest_return + ld r5, HSTATE_KVM_VCORE(r13) + /* Set DPDES register so the CPU will take a doorbell interrupt */ + li r0, 1 + mtspr SPRN_DPDES, r0 + std r0, VCORE_DPDES(r5) + /* Make sure other cpus see vcore->dpdes set before dbell req clear */ + lwsync + /* Clear the pending doorbell request */ + li r0, 0 + stb r0, VCPU_DBELL_REQ(r4) /* * Required state: @@ -1232,6 +1223,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) stw r12,VCPU_TRAP(r9) + /* + * Now that we have saved away SRR0/1 and HSRR0/1, + * interrupts are recoverable in principle, so set MSR_RI. + * This becomes important for relocation-on interrupts from + * the guest, which we can get in radix mode on POWER9. + */ + li r0, MSR_RI + mtmsrd r0, 1 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING addi r3, r9, VCPU_TB_RMINTR mr r4, r9 @@ -1288,6 +1288,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) beq 4f b guest_exit_cont 3: + /* If it's a hypervisor facility unavailable interrupt, save HFSCR */ + cmpwi r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL + bne 14f + mfspr r3, SPRN_HFSCR + std r3, VCPU_HFSCR(r9) + b guest_exit_cont +14: /* External interrupt ? */ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL bne+ guest_exit_cont @@ -1475,12 +1482,18 @@ mc_cont: mtspr SPRN_SPURR,r4 /* Save DEC */ + ld r3, HSTATE_KVM_VCORE(r13) mfspr r5,SPRN_DEC mftb r6 + /* On P9, if the guest has large decr enabled, don't sign extend */ +BEGIN_FTR_SECTION + ld r4, VCORE_LPCR(r3) + andis. r4, r4, LPCR_LD@h + bne 16f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) extsw r5,r5 - add r5,r5,r6 +16: add r5,r5,r6 /* r5 is a guest timebase value here, convert to host TB */ - ld r3,HSTATE_KVM_VCORE(r13) ld r4,VCORE_TB_OFFSET(r3) subf r5,r4,r5 std r5,VCPU_DEC_EXPIRES(r9) @@ -1525,6 +1538,9 @@ FTR_SECTION_ELSE rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */ rotldi r6, r6, 60 std r6, VCPU_PSSCR(r9) + /* Restore host HFSCR value */ + ld r7, STACK_SLOT_HFSCR(r1) + mtspr SPRN_HFSCR, r7 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) /* * Restore various registers to 0, where non-zero values @@ -2402,8 +2418,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) mfspr r3, SPRN_DEC mfspr r4, SPRN_HDEC mftb r5 +BEGIN_FTR_SECTION + /* On P9 check whether the guest has large decrementer mode enabled */ + ld r6, HSTATE_KVM_VCORE(r13) + ld r6, VCORE_LPCR(r6) + andis. r6, r6, LPCR_LD@h + bne 68f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) extsw r3, r3 - EXTEND_HDEC(r4) +68: EXTEND_HDEC(r4) cmpd r3, r4 ble 67f mtspr SPRN_DEC, r4 @@ -2589,22 +2612,32 @@ machine_check_realmode: ld r9, HSTATE_KVM_VCPU(r13) li r12, BOOK3S_INTERRUPT_MACHINE_CHECK /* - * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through - * machine check interrupt (set HSRR0 to 0x200). And for handled - * errors (no-fatal), just go back to guest execution with current - * HSRR0 instead of exiting guest. This new approach will inject - * machine check to guest for fatal error causing guest to crash. - * - * The old code used to return to host for unhandled errors which - * was causing guest to hang with soft lockups inside guest and - * makes it difficult to recover guest instance. + * For the guest that is FWNMI capable, deliver all the MCE errors + * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit + * reason. This new approach injects machine check errors in guest + * address space to guest with additional information in the form + * of RTAS event, thus enabling guest kernel to suitably handle + * such errors. * + * For the guest that is not FWNMI capable (old QEMU) fallback + * to old behaviour for backward compatibility: + * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either + * through machine check interrupt (set HSRR0 to 0x200). + * For handled errors (no-fatal), just go back to guest execution + * with current HSRR0. * if we receive machine check with MSR(RI=0) then deliver it to * guest as machine check causing guest to crash. */ ld r11, VCPU_MSR(r9) rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */ bne mc_cont /* if so, exit to host */ + /* Check if guest is capable of handling NMI exit */ + ld r10, VCPU_KVM(r9) + lbz r10, KVM_FWNMI(r10) + cmpdi r10, 1 /* FWNMI capable? */ + beq mc_cont /* if so, exit with KVM_EXIT_NMI. */ + + /* if not, fall through for backward compatibility. */ andi. r10, r11, MSR_RI /* check for unrecoverable exception */ beq 1f /* Deliver a machine check to guest */ ld r10, VCPU_PC(r9) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index ffe1da95033a..08b200a0bbce 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1257,8 +1257,8 @@ static void xive_pre_save_scan(struct kvmppc_xive *xive) if (!xc) continue; for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) { - if (xc->queues[i].qpage) - xive_pre_save_queue(xive, &xc->queues[i]); + if (xc->queues[j].qpage) + xive_pre_save_queue(xive, &xc->queues[j]); } } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3eaac3809977..071b87ee682f 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -687,7 +687,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) kvmppc_core_check_exceptions(vcpu); - if (vcpu->requests) { + if (kvm_request_pending(vcpu)) { /* Exception delivery raised request; start over */ return 1; } diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index c873ffe55362..4d8b4d6cebff 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -39,7 +39,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) unsigned long dec_nsec; unsigned long long dec_time; - pr_debug("mtDEC: %x\n", vcpu->arch.dec); + pr_debug("mtDEC: %lx\n", vcpu->arch.dec); hrtimer_try_to_cancel(&vcpu->arch.dec_timer); #ifdef CONFIG_PPC_BOOK3S @@ -109,7 +109,7 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_TBWU: break; case SPRN_DEC: - vcpu->arch.dec = spr_val; + vcpu->arch.dec = (u32) spr_val; kvmppc_emulate_dec(vcpu); break; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7f71ab5fcad1..1a75c0b5f4ca 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -55,8 +55,7 @@ EXPORT_SYMBOL_GPL(kvmppc_pr_ops); int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !!(v->arch.pending_exceptions) || - v->requests; + return !!(v->arch.pending_exceptions) || kvm_request_pending(v); } int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) @@ -108,7 +107,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) */ smp_mb(); - if (vcpu->requests) { + if (kvm_request_pending(vcpu)) { /* Make sure we process requests preemptable */ local_irq_enable(); trace_kvm_check_requests(vcpu); @@ -554,13 +553,28 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_SMT: r = 0; - if (hv_enabled) { + if (kvm) { + if (kvm->arch.emul_smt_mode > 1) + r = kvm->arch.emul_smt_mode; + else + r = kvm->arch.smt_mode; + } else if (hv_enabled) { if (cpu_has_feature(CPU_FTR_ARCH_300)) r = 1; else r = threads_per_subcore; } break; + case KVM_CAP_PPC_SMT_POSSIBLE: + r = 1; + if (hv_enabled) { + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + r = ((threads_per_subcore << 1) - 1); + else + /* P9 can emulate dbells, so allow any mode */ + r = 8 | 4 | 2 | 1; + } + break; case KVM_CAP_PPC_RMA: r = 0; break; @@ -619,6 +633,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300); break; #endif +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + case KVM_CAP_PPC_FWNMI: + r = hv_enabled; + break; +#endif case KVM_CAP_PPC_HTM: r = cpu_has_feature(CPU_FTR_TM_COMP) && is_kvmppc_hv_enabled(kvm); @@ -1538,6 +1557,15 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; } #endif /* CONFIG_KVM_XICS */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + case KVM_CAP_PPC_FWNMI: + r = -EINVAL; + if (!is_kvmppc_hv_enabled(vcpu->kvm)) + break; + r = 0; + vcpu->kvm->arch.fwnmi_enabled = true; + break; +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ default: r = -EINVAL; break; @@ -1712,6 +1740,15 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = 0; break; } + case KVM_CAP_PPC_SMT: { + unsigned long mode = cap->args[0]; + unsigned long flags = cap->args[1]; + + r = -EINVAL; + if (kvm->arch.kvm_ops->set_smt_mode) + r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags); + break; + } #endif default: r = -EINVAL; diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 71b995bbcae0..29d4f96ed33e 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -644,32 +644,22 @@ static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg, direction, attrs); } -static int dma_fixed_dma_supported(struct device *dev, u64 mask) -{ - return mask == DMA_BIT_MASK(64); -} - -static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask); +static int dma_suported_and_switch(struct device *dev, u64 dma_mask); static const struct dma_map_ops dma_iommu_fixed_ops = { .alloc = dma_fixed_alloc_coherent, .free = dma_fixed_free_coherent, .map_sg = dma_fixed_map_sg, .unmap_sg = dma_fixed_unmap_sg, - .dma_supported = dma_fixed_dma_supported, - .set_dma_mask = dma_set_mask_and_switch, + .dma_supported = dma_suported_and_switch, .map_page = dma_fixed_map_page, .unmap_page = dma_fixed_unmap_page, + .mapping_error = dma_iommu_mapping_error, }; -static void cell_dma_dev_setup_fixed(struct device *dev); - static void cell_dma_dev_setup(struct device *dev) { - /* Order is important here, these are not mutually exclusive */ - if (get_dma_ops(dev) == &dma_iommu_fixed_ops) - cell_dma_dev_setup_fixed(dev); - else if (get_pci_dma_ops() == &dma_iommu_ops) + if (get_pci_dma_ops() == &dma_iommu_ops) set_iommu_table_base(dev, cell_get_iommu_table(dev)); else if (get_pci_dma_ops() == &dma_direct_ops) set_dma_offset(dev, cell_dma_direct_offset); @@ -956,38 +946,29 @@ out: return dev_addr; } -static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask) +static int dma_suported_and_switch(struct device *dev, u64 dma_mask) { - if (!dev->dma_mask || !dma_supported(dev, dma_mask)) - return -EIO; - if (dma_mask == DMA_BIT_MASK(64) && - cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) - { + cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) { + u64 addr = cell_iommu_get_fixed_address(dev) + + dma_iommu_fixed_base; dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); + dev_dbg(dev, "iommu: fixed addr = %llx\n", addr); set_dma_ops(dev, &dma_iommu_fixed_ops); - } else { + set_dma_offset(dev, addr); + return 1; + } + + if (dma_iommu_dma_supported(dev, dma_mask)) { dev_dbg(dev, "iommu: not 64-bit, using default ops\n"); set_dma_ops(dev, get_pci_dma_ops()); + cell_dma_dev_setup(dev); + return 1; } - cell_dma_dev_setup(dev); - - *dev->dma_mask = dma_mask; - return 0; } -static void cell_dma_dev_setup_fixed(struct device *dev) -{ - u64 addr; - - addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base; - set_dma_offset(dev, addr); - - dev_dbg(dev, "iommu: fixed addr = %llx\n", addr); -} - static void insert_16M_pte(unsigned long addr, unsigned long *ptab, unsigned long base_pte) { @@ -1139,7 +1120,7 @@ static int __init cell_iommu_fixed_mapping_init(void) cell_iommu_setup_window(iommu, np, dbase, dsize, 0); } - dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch; + dma_iommu_ops.dma_supported = dma_suported_and_switch; set_pci_dma_ops(&dma_iommu_ops); return 0; diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 117beb9e8786..8a47f168476b 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -519,7 +519,7 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, { struct vio_dev *viodev = to_vio_dev(dev); struct iommu_table *tbl; - dma_addr_t ret = DMA_ERROR_CODE; + dma_addr_t ret = IOMMU_MAPPING_ERROR; tbl = get_iommu_table_base(dev); if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) { @@ -625,6 +625,7 @@ static const struct dma_map_ops vio_dma_mapping_ops = { .unmap_page = vio_dma_iommu_unmap_page, .dma_supported = vio_dma_iommu_dma_supported, .get_required_mask = vio_dma_get_required_mask, + .mapping_error = dma_iommu_mapping_error, }; /** diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 0ddd37e6c29d..b9300f8aee10 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -178,7 +178,6 @@ struct compat_statfs64 { u32 f_spare[4]; }; -#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff #define COMPAT_RLIM_INFINITY 0xffffffff typedef u32 compat_old_sigset_t; /* at least 32 bits */ diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index d0441ad2a990..e508dff92535 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -59,7 +59,9 @@ union ctlreg0 { unsigned long lap : 1; /* Low-address-protection control */ unsigned long : 4; unsigned long edat : 1; /* Enhanced-DAT-enablement control */ - unsigned long : 4; + unsigned long : 2; + unsigned long iep : 1; /* Instruction-Execution-Protection */ + unsigned long : 1; unsigned long afp : 1; /* AFP-register control */ unsigned long vx : 1; /* Vector enablement control */ unsigned long : 7; diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index 3108b8dbe266..512ad0eaa11a 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -8,8 +8,6 @@ #include <linux/dma-debug.h> #include <linux/io.h> -#define DMA_ERROR_CODE (~(dma_addr_t) 0x0) - extern const struct dma_map_ops s390_pci_dma_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 2f924bc30e35..dccf24ee26d3 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -41,24 +41,6 @@ /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_S390 -/* - * Size for s390x ELF notes per CPU - * - * Seven notes plus zero note at the end: prstatus, fpregset, timer, - * tod_cmp, tod_reg, control regs, and prefix - */ -#define KEXEC_NOTE_BYTES \ - (ALIGN(sizeof(struct elf_note), 4) * 8 + \ - ALIGN(sizeof("CORE"), 4) * 7 + \ - ALIGN(sizeof(struct elf_prstatus), 4) + \ - ALIGN(sizeof(elf_fpregset_t), 4) + \ - ALIGN(sizeof(u64), 4) + \ - ALIGN(sizeof(u64), 4) + \ - ALIGN(sizeof(u32), 4) + \ - ALIGN(sizeof(u64) * 16, 4) + \ - ALIGN(sizeof(u32), 4) \ - ) - /* Provide a dummy definition to avoid build failures. */ static inline void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) { } diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 6baae236f461..a409d5991934 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -42,9 +42,11 @@ #define KVM_HALT_POLL_NS_DEFAULT 80000 /* s390-specific vcpu->requests bit members */ -#define KVM_REQ_ENABLE_IBS 8 -#define KVM_REQ_DISABLE_IBS 9 -#define KVM_REQ_ICPT_OPEREXC 10 +#define KVM_REQ_ENABLE_IBS KVM_ARCH_REQ(0) +#define KVM_REQ_DISABLE_IBS KVM_ARCH_REQ(1) +#define KVM_REQ_ICPT_OPEREXC KVM_ARCH_REQ(2) +#define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3) +#define KVM_REQ_STOP_MIGRATION KVM_ARCH_REQ(4) #define SIGP_CTRL_C 0x80 #define SIGP_CTRL_SCN_MASK 0x3f @@ -56,7 +58,7 @@ union bsca_sigp_ctrl { __u8 r : 1; __u8 scn : 6; }; -} __packed; +}; union esca_sigp_ctrl { __u16 value; @@ -65,14 +67,14 @@ union esca_sigp_ctrl { __u8 reserved: 7; __u8 scn; }; -} __packed; +}; struct esca_entry { union esca_sigp_ctrl sigp_ctrl; __u16 reserved1[3]; __u64 sda; __u64 reserved2[6]; -} __packed; +}; struct bsca_entry { __u8 reserved0; @@ -80,7 +82,7 @@ struct bsca_entry { __u16 reserved[3]; __u64 sda; __u64 reserved2[2]; -} __attribute__((packed)); +}; union ipte_control { unsigned long val; @@ -97,7 +99,7 @@ struct bsca_block { __u64 mcn; __u64 reserved2; struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; -} __attribute__((packed)); +}; struct esca_block { union ipte_control ipte_control; @@ -105,7 +107,7 @@ struct esca_block { __u64 mcn[4]; __u64 reserved2[20]; struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; -} __packed; +}; /* * This struct is used to store some machine check info from lowcore @@ -274,7 +276,7 @@ struct kvm_s390_sie_block { struct kvm_s390_itdb { __u8 data[256]; -} __packed; +}; struct sie_page { struct kvm_s390_sie_block sie_block; @@ -282,7 +284,7 @@ struct sie_page { __u8 reserved218[1000]; /* 0x0218 */ struct kvm_s390_itdb itdb; /* 0x0600 */ __u8 reserved700[2304]; /* 0x0700 */ -} __packed; +}; struct kvm_vcpu_stat { u64 exit_userspace; @@ -695,7 +697,7 @@ struct sie_page2 { __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ struct kvm_s390_crypto_cb crycb; /* 0x0800 */ u8 reserved900[0x1000 - 0x900]; /* 0x0900 */ -} __packed; +}; struct kvm_s390_vsie { struct mutex mutex; @@ -705,6 +707,12 @@ struct kvm_s390_vsie { struct page *pages[KVM_MAX_VCPUS]; }; +struct kvm_s390_migration_state { + unsigned long bitmap_size; /* in bits (number of guest pages) */ + atomic64_t dirty_pages; /* number of dirty pages */ + unsigned long *pgste_bitmap; +}; + struct kvm_arch{ void *sca; int use_esca; @@ -732,6 +740,7 @@ struct kvm_arch{ struct kvm_s390_crypto crypto; struct kvm_s390_vsie vsie; u64 epoch; + struct kvm_s390_migration_state *migration_state; /* subset of available cpu features enabled by user space */ DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); }; diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 13623b9991d4..9d91cf3e427f 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -26,6 +26,12 @@ #define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20) #define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23) +#define MCCK_CR14_CR_PENDING_SUB_MASK (1 << 28) +#define MCCK_CR14_RECOVERY_SUB_MASK (1 << 27) +#define MCCK_CR14_DEGRAD_SUB_MASK (1 << 26) +#define MCCK_CR14_EXT_DAMAGE_SUB_MASK (1 << 25) +#define MCCK_CR14_WARN_SUB_MASK (1 << 24) + #ifndef __ASSEMBLY__ union mci { diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 6ba0bf928909..6bc941be6921 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -64,6 +64,12 @@ static inline void syscall_get_arguments(struct task_struct *task, { unsigned long mask = -1UL; + /* + * No arguments for this syscall, there's nothing to do. + */ + if (!n) + return; + BUG_ON(i + n > 6); #ifdef CONFIG_COMPAT if (test_tsk_thread_flag(task, TIF_31BIT)) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 78f3f093d143..28b528197cf5 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -276,23 +276,6 @@ static inline unsigned long strnlen_user(const char __user *src, unsigned long n return __strnlen_user(src, n); } -/** - * strlen_user: - Get the size of a string in user space. - * @str: The string to measure. - * - * Context: User context only. This function may sleep if pagefaults are - * enabled. - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * - * If there is a limit on the length of a valid string, you may wish to - * consider using strnlen_user() instead. - */ -#define strlen_user(str) strnlen_user(str, ~0UL) - /* * Zero Userspace */ diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 3dd2a1d308dd..69d09c39bbcd 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -28,6 +28,7 @@ #define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 #define KVM_DEV_FLIC_AISM 9 #define KVM_DEV_FLIC_AIRQ_INJECT 10 +#define KVM_DEV_FLIC_AISM_ALL 11 /* * We can have up to 4*64k pending subchannels + 8 adapter interrupts, * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. @@ -53,6 +54,11 @@ struct kvm_s390_ais_req { __u16 mode; }; +struct kvm_s390_ais_all { + __u8 simm; + __u8 nimm; +}; + #define KVM_S390_IO_ADAPTER_MASK 1 #define KVM_S390_IO_ADAPTER_MAP 2 #define KVM_S390_IO_ADAPTER_UNMAP 3 @@ -70,6 +76,7 @@ struct kvm_s390_io_adapter_req { #define KVM_S390_VM_TOD 1 #define KVM_S390_VM_CRYPTO 2 #define KVM_S390_VM_CPU_MODEL 3 +#define KVM_S390_VM_MIGRATION 4 /* kvm attributes for mem_ctrl */ #define KVM_S390_VM_MEM_ENABLE_CMMA 0 @@ -151,6 +158,11 @@ struct kvm_s390_vm_cpu_subfunc { #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2 #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3 +/* kvm attributes for migration mode */ +#define KVM_S390_VM_MIGRATION_STOP 0 +#define KVM_S390_VM_MIGRATION_START 1 +#define KVM_S390_VM_MIGRATION_STATUS 2 + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { /* general purpose regs for s390 */ diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 875f8bea8c67..653cae5e1ee1 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -89,7 +89,7 @@ struct region3_table_entry_fc1 { unsigned long f : 1; /* Fetch-Protection Bit */ unsigned long fc : 1; /* Format-Control */ unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long co : 1; /* Change-Recording Override */ + unsigned long iep: 1; /* Instruction-Execution-Protection */ unsigned long : 2; unsigned long i : 1; /* Region-Invalid Bit */ unsigned long cr : 1; /* Common-Region Bit */ @@ -131,7 +131,7 @@ struct segment_entry_fc1 { unsigned long f : 1; /* Fetch-Protection Bit */ unsigned long fc : 1; /* Format-Control */ unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long co : 1; /* Change-Recording Override */ + unsigned long iep: 1; /* Instruction-Execution-Protection */ unsigned long : 2; unsigned long i : 1; /* Segment-Invalid Bit */ unsigned long cs : 1; /* Common-Segment Bit */ @@ -168,7 +168,8 @@ union page_table_entry { unsigned long z : 1; /* Zero Bit */ unsigned long i : 1; /* Page-Invalid Bit */ unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long : 9; + unsigned long iep: 1; /* Instruction-Execution-Protection */ + unsigned long : 8; }; }; @@ -241,7 +242,7 @@ struct ale { unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */ unsigned long : 6; unsigned long astesn : 32; /* ASTE Sequence Number */ -} __packed; +}; struct aste { unsigned long i : 1; /* ASX-Invalid Bit */ @@ -257,7 +258,7 @@ struct aste { unsigned long ald : 32; unsigned long astesn : 32; /* .. more fields there */ -} __packed; +}; int ipte_lock_held(struct kvm_vcpu *vcpu) { @@ -485,6 +486,7 @@ enum prot_type { PROT_TYPE_KEYC = 1, PROT_TYPE_ALC = 2, PROT_TYPE_DAT = 3, + PROT_TYPE_IEP = 4, }; static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, @@ -500,6 +502,9 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, switch (code) { case PGM_PROTECTION: switch (prot) { + case PROT_TYPE_IEP: + tec->b61 = 1; + /* FALL THROUGH */ case PROT_TYPE_LA: tec->b56 = 1; break; @@ -591,6 +596,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) * @gpa: points to where guest physical (absolute) address should be stored * @asce: effective asce * @mode: indicates the access mode to be used + * @prot: returns the type for protection exceptions * * Translate a guest virtual address into a guest absolute address by means * of dynamic address translation as specified by the architecture. @@ -606,19 +612,21 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) */ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, unsigned long *gpa, const union asce asce, - enum gacc_mode mode) + enum gacc_mode mode, enum prot_type *prot) { union vaddress vaddr = {.addr = gva}; union raddress raddr = {.addr = gva}; union page_table_entry pte; int dat_protection = 0; + int iep_protection = 0; union ctlreg0 ctlreg0; unsigned long ptr; - int edat1, edat2; + int edat1, edat2, iep; ctlreg0.val = vcpu->arch.sie_block->gcr[0]; edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8); edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78); + iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130); if (asce.r) goto real_address; ptr = asce.origin * 4096; @@ -702,6 +710,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, return PGM_TRANSLATION_SPEC; if (rtte.fc && edat2) { dat_protection |= rtte.fc1.p; + iep_protection = rtte.fc1.iep; raddr.rfaa = rtte.fc1.rfaa; goto absolute_address; } @@ -729,6 +738,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, return PGM_TRANSLATION_SPEC; if (ste.fc && edat1) { dat_protection |= ste.fc1.p; + iep_protection = ste.fc1.iep; raddr.sfaa = ste.fc1.sfaa; goto absolute_address; } @@ -745,12 +755,19 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, if (pte.z) return PGM_TRANSLATION_SPEC; dat_protection |= pte.p; + iep_protection = pte.iep; raddr.pfra = pte.pfra; real_address: raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); absolute_address: - if (mode == GACC_STORE && dat_protection) + if (mode == GACC_STORE && dat_protection) { + *prot = PROT_TYPE_DAT; return PGM_PROTECTION; + } + if (mode == GACC_IFETCH && iep_protection && iep) { + *prot = PROT_TYPE_IEP; + return PGM_PROTECTION; + } if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) return PGM_ADDRESSING; *gpa = raddr.addr; @@ -782,6 +799,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, { psw_t *psw = &vcpu->arch.sie_block->gpsw; int lap_enabled, rc = 0; + enum prot_type prot; lap_enabled = low_address_protection_enabled(vcpu, asce); while (nr_pages) { @@ -791,7 +809,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, PROT_TYPE_LA); ga &= PAGE_MASK; if (psw_bits(*psw).dat) { - rc = guest_translate(vcpu, ga, pages, asce, mode); + rc = guest_translate(vcpu, ga, pages, asce, mode, &prot); if (rc < 0) return rc; } else { @@ -800,7 +818,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, rc = PGM_ADDRESSING; } if (rc) - return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT); + return trans_exc(vcpu, rc, ga, ar, mode, prot); ga += PAGE_SIZE; pages++; nr_pages--; @@ -886,6 +904,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, unsigned long *gpa, enum gacc_mode mode) { psw_t *psw = &vcpu->arch.sie_block->gpsw; + enum prot_type prot; union asce asce; int rc; @@ -900,9 +919,9 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, } if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */ - rc = guest_translate(vcpu, gva, gpa, asce, mode); + rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot); if (rc > 0) - return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT); + return trans_exc(vcpu, rc, gva, 0, mode, prot); } else { *gpa = kvm_s390_real_to_abs(vcpu, gva); if (kvm_is_error_gpa(vcpu->kvm, *gpa)) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2d120fef7d90..a619ddae610d 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -251,8 +251,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask); if (psw_mchk_disabled(vcpu)) active_mask &= ~IRQ_PEND_MCHK_MASK; + /* + * Check both floating and local interrupt's cr14 because + * bit IRQ_PEND_MCHK_REP could be set in both cases. + */ if (!(vcpu->arch.sie_block->gcr[14] & - vcpu->kvm->arch.float_int.mchk.cr14)) + (vcpu->kvm->arch.float_int.mchk.cr14 | + vcpu->arch.local_int.irq.mchk.cr14))) __clear_bit(IRQ_PEND_MCHK_REP, &active_mask); /* @@ -1876,6 +1881,28 @@ out: return ret < 0 ? ret : n; } +static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + struct kvm_s390_ais_all ais; + + if (attr->attr < sizeof(ais)) + return -EINVAL; + + if (!test_kvm_facility(kvm, 72)) + return -ENOTSUPP; + + mutex_lock(&fi->ais_lock); + ais.simm = fi->simm; + ais.nimm = fi->nimm; + mutex_unlock(&fi->ais_lock); + + if (copy_to_user((void __user *)attr->addr, &ais, sizeof(ais))) + return -EFAULT; + + return 0; +} + static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r; @@ -1885,6 +1912,9 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr, attr->attr); break; + case KVM_DEV_FLIC_AISM_ALL: + r = flic_ais_mode_get_all(dev->kvm, attr); + break; default: r = -EINVAL; } @@ -2235,6 +2265,25 @@ static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr) return kvm_s390_inject_airq(kvm, adapter); } +static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + struct kvm_s390_ais_all ais; + + if (!test_kvm_facility(kvm, 72)) + return -ENOTSUPP; + + if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais))) + return -EFAULT; + + mutex_lock(&fi->ais_lock); + fi->simm = ais.simm; + fi->nimm = ais.nimm; + mutex_unlock(&fi->ais_lock); + + return 0; +} + static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r = 0; @@ -2277,6 +2326,9 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) case KVM_DEV_FLIC_AIRQ_INJECT: r = flic_inject_airq(dev->kvm, attr); break; + case KVM_DEV_FLIC_AISM_ALL: + r = flic_ais_mode_set_all(dev->kvm, attr); + break; default: r = -EINVAL; } @@ -2298,6 +2350,7 @@ static int flic_has_attr(struct kvm_device *dev, case KVM_DEV_FLIC_CLEAR_IO_IRQ: case KVM_DEV_FLIC_AISM: case KVM_DEV_FLIC_AIRQ_INJECT: + case KVM_DEV_FLIC_AISM_ALL: return 0; } return -ENXIO; @@ -2415,6 +2468,42 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, return ret; } +/* + * Inject the machine check to the guest. + */ +void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, + struct mcck_volatile_info *mcck_info) +{ + struct kvm_s390_interrupt_info inti; + struct kvm_s390_irq irq; + struct kvm_s390_mchk_info *mchk; + union mci mci; + __u64 cr14 = 0; /* upper bits are not used */ + + mci.val = mcck_info->mcic; + if (mci.sr) + cr14 |= MCCK_CR14_RECOVERY_SUB_MASK; + if (mci.dg) + cr14 |= MCCK_CR14_DEGRAD_SUB_MASK; + if (mci.w) + cr14 |= MCCK_CR14_WARN_SUB_MASK; + + mchk = mci.ck ? &inti.mchk : &irq.u.mchk; + mchk->cr14 = cr14; + mchk->mcic = mcck_info->mcic; + mchk->ext_damage_code = mcck_info->ext_damage_code; + mchk->failing_storage_address = mcck_info->failing_storage_address; + if (mci.ck) { + /* Inject the floating machine check */ + inti.type = KVM_S390_MCHK; + WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti)); + } else { + /* Inject the machine check to specified vcpu */ + irq.type = KVM_S390_MCHK; + WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); + } +} + int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b0d7de5a533d..3f2884e99ed4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -30,6 +30,7 @@ #include <linux/vmalloc.h> #include <linux/bitmap.h> #include <linux/sched/signal.h> +#include <linux/string.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> @@ -386,6 +387,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_SKEYS: case KVM_CAP_S390_IRQ_STATE: case KVM_CAP_S390_USER_INSTR0: + case KVM_CAP_S390_CMMA_MIGRATION: case KVM_CAP_S390_AIS: r = 1; break; @@ -749,6 +751,129 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) return 0; } +static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) +{ + int cx; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(cx, vcpu, kvm) + kvm_s390_sync_request(req, vcpu); +} + +/* + * Must be called with kvm->srcu held to avoid races on memslots, and with + * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration. + */ +static int kvm_s390_vm_start_migration(struct kvm *kvm) +{ + struct kvm_s390_migration_state *mgs; + struct kvm_memory_slot *ms; + /* should be the only one */ + struct kvm_memslots *slots; + unsigned long ram_pages; + int slotnr; + + /* migration mode already enabled */ + if (kvm->arch.migration_state) + return 0; + + slots = kvm_memslots(kvm); + if (!slots || !slots->used_slots) + return -EINVAL; + + mgs = kzalloc(sizeof(*mgs), GFP_KERNEL); + if (!mgs) + return -ENOMEM; + kvm->arch.migration_state = mgs; + + if (kvm->arch.use_cmma) { + /* + * Get the last slot. They should be sorted by base_gfn, so the + * last slot is also the one at the end of the address space. + * We have verified above that at least one slot is present. + */ + ms = slots->memslots + slots->used_slots - 1; + /* round up so we only use full longs */ + ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); + /* allocate enough bytes to store all the bits */ + mgs->pgste_bitmap = vmalloc(ram_pages / 8); + if (!mgs->pgste_bitmap) { + kfree(mgs); + kvm->arch.migration_state = NULL; + return -ENOMEM; + } + + mgs->bitmap_size = ram_pages; + atomic64_set(&mgs->dirty_pages, ram_pages); + /* mark all the pages in active slots as dirty */ + for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { + ms = slots->memslots + slotnr; + bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages); + } + + kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); + } + return 0; +} + +/* + * Must be called with kvm->lock to avoid races with ourselves and + * kvm_s390_vm_start_migration. + */ +static int kvm_s390_vm_stop_migration(struct kvm *kvm) +{ + struct kvm_s390_migration_state *mgs; + + /* migration mode already disabled */ + if (!kvm->arch.migration_state) + return 0; + mgs = kvm->arch.migration_state; + kvm->arch.migration_state = NULL; + + if (kvm->arch.use_cmma) { + kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); + vfree(mgs->pgste_bitmap); + } + kfree(mgs); + return 0; +} + +static int kvm_s390_vm_set_migration(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + int idx, res = -ENXIO; + + mutex_lock(&kvm->lock); + switch (attr->attr) { + case KVM_S390_VM_MIGRATION_START: + idx = srcu_read_lock(&kvm->srcu); + res = kvm_s390_vm_start_migration(kvm); + srcu_read_unlock(&kvm->srcu, idx); + break; + case KVM_S390_VM_MIGRATION_STOP: + res = kvm_s390_vm_stop_migration(kvm); + break; + default: + break; + } + mutex_unlock(&kvm->lock); + + return res; +} + +static int kvm_s390_vm_get_migration(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + u64 mig = (kvm->arch.migration_state != NULL); + + if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) + return -ENXIO; + + if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) + return -EFAULT; + return 0; +} + static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) { u8 gtod_high; @@ -1089,6 +1214,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CRYPTO: ret = kvm_s390_vm_set_crypto(kvm, attr); break; + case KVM_S390_VM_MIGRATION: + ret = kvm_s390_vm_set_migration(kvm, attr); + break; default: ret = -ENXIO; break; @@ -1111,6 +1239,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MODEL: ret = kvm_s390_get_cpu_model(kvm, attr); break; + case KVM_S390_VM_MIGRATION: + ret = kvm_s390_vm_get_migration(kvm, attr); + break; default: ret = -ENXIO; break; @@ -1178,6 +1309,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) break; } break; + case KVM_S390_VM_MIGRATION: + ret = 0; + break; default: ret = -ENXIO; break; @@ -1285,6 +1419,182 @@ out: return r; } +/* + * Base address and length must be sent at the start of each block, therefore + * it's cheaper to send some clean data, as long as it's less than the size of + * two longs. + */ +#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) +/* for consistency */ +#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) + +/* + * This function searches for the next page with dirty CMMA attributes, and + * saves the attributes in the buffer up to either the end of the buffer or + * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; + * no trailing clean bytes are saved. + * In case no dirty bits were found, or if CMMA was not enabled or used, the + * output buffer will indicate 0 as length. + */ +static int kvm_s390_get_cmma_bits(struct kvm *kvm, + struct kvm_s390_cmma_log *args) +{ + struct kvm_s390_migration_state *s = kvm->arch.migration_state; + unsigned long bufsize, hva, pgstev, i, next, cur; + int srcu_idx, peek, r = 0, rr; + u8 *res; + + cur = args->start_gfn; + i = next = pgstev = 0; + + if (unlikely(!kvm->arch.use_cmma)) + return -ENXIO; + /* Invalid/unsupported flags were specified */ + if (args->flags & ~KVM_S390_CMMA_PEEK) + return -EINVAL; + /* Migration mode query, and we are not doing a migration */ + peek = !!(args->flags & KVM_S390_CMMA_PEEK); + if (!peek && !s) + return -EINVAL; + /* CMMA is disabled or was not used, or the buffer has length zero */ + bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); + if (!bufsize || !kvm->mm->context.use_cmma) { + memset(args, 0, sizeof(*args)); + return 0; + } + + if (!peek) { + /* We are not peeking, and there are no dirty pages */ + if (!atomic64_read(&s->dirty_pages)) { + memset(args, 0, sizeof(*args)); + return 0; + } + cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, + args->start_gfn); + if (cur >= s->bitmap_size) /* nothing found, loop back */ + cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0); + if (cur >= s->bitmap_size) { /* again! (very unlikely) */ + memset(args, 0, sizeof(*args)); + return 0; + } + next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1); + } + + res = vmalloc(bufsize); + if (!res) + return -ENOMEM; + + args->start_gfn = cur; + + down_read(&kvm->mm->mmap_sem); + srcu_idx = srcu_read_lock(&kvm->srcu); + while (i < bufsize) { + hva = gfn_to_hva(kvm, cur); + if (kvm_is_error_hva(hva)) { + r = -EFAULT; + break; + } + /* decrement only if we actually flipped the bit to 0 */ + if (!peek && test_and_clear_bit(cur, s->pgste_bitmap)) + atomic64_dec(&s->dirty_pages); + r = get_pgste(kvm->mm, hva, &pgstev); + if (r < 0) + pgstev = 0; + /* save the value */ + res[i++] = (pgstev >> 24) & 0x3; + /* + * if the next bit is too far away, stop. + * if we reached the previous "next", find the next one + */ + if (!peek) { + if (next > cur + KVM_S390_MAX_BIT_DISTANCE) + break; + if (cur == next) + next = find_next_bit(s->pgste_bitmap, + s->bitmap_size, cur + 1); + /* reached the end of the bitmap or of the buffer, stop */ + if ((next >= s->bitmap_size) || + (next >= args->start_gfn + bufsize)) + break; + } + cur++; + } + srcu_read_unlock(&kvm->srcu, srcu_idx); + up_read(&kvm->mm->mmap_sem); + args->count = i; + args->remaining = s ? atomic64_read(&s->dirty_pages) : 0; + + rr = copy_to_user((void __user *)args->values, res, args->count); + if (rr) + r = -EFAULT; + + vfree(res); + return r; +} + +/* + * This function sets the CMMA attributes for the given pages. If the input + * buffer has zero length, no action is taken, otherwise the attributes are + * set and the mm->context.use_cmma flag is set. + */ +static int kvm_s390_set_cmma_bits(struct kvm *kvm, + const struct kvm_s390_cmma_log *args) +{ + unsigned long hva, mask, pgstev, i; + uint8_t *bits; + int srcu_idx, r = 0; + + mask = args->mask; + + if (!kvm->arch.use_cmma) + return -ENXIO; + /* invalid/unsupported flags */ + if (args->flags != 0) + return -EINVAL; + /* Enforce sane limit on memory allocation */ + if (args->count > KVM_S390_CMMA_SIZE_MAX) + return -EINVAL; + /* Nothing to do */ + if (args->count == 0) + return 0; + + bits = vmalloc(sizeof(*bits) * args->count); + if (!bits) + return -ENOMEM; + + r = copy_from_user(bits, (void __user *)args->values, args->count); + if (r) { + r = -EFAULT; + goto out; + } + + down_read(&kvm->mm->mmap_sem); + srcu_idx = srcu_read_lock(&kvm->srcu); + for (i = 0; i < args->count; i++) { + hva = gfn_to_hva(kvm, args->start_gfn + i); + if (kvm_is_error_hva(hva)) { + r = -EFAULT; + break; + } + + pgstev = bits[i]; + pgstev = pgstev << 24; + mask &= _PGSTE_GPS_USAGE_MASK; + set_pgste_bits(kvm->mm, hva, mask, pgstev); + } + srcu_read_unlock(&kvm->srcu, srcu_idx); + up_read(&kvm->mm->mmap_sem); + + if (!kvm->mm->context.use_cmma) { + down_write(&kvm->mm->mmap_sem); + kvm->mm->context.use_cmma = 1; + up_write(&kvm->mm->mmap_sem); + } +out: + vfree(bits); + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1363,6 +1673,29 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_s390_set_skeys(kvm, &args); break; } + case KVM_S390_GET_CMMA_BITS: { + struct kvm_s390_cmma_log args; + + r = -EFAULT; + if (copy_from_user(&args, argp, sizeof(args))) + break; + r = kvm_s390_get_cmma_bits(kvm, &args); + if (!r) { + r = copy_to_user(argp, &args, sizeof(args)); + if (r) + r = -EFAULT; + } + break; + } + case KVM_S390_SET_CMMA_BITS: { + struct kvm_s390_cmma_log args; + + r = -EFAULT; + if (copy_from_user(&args, argp, sizeof(args))) + break; + r = kvm_s390_set_cmma_bits(kvm, &args); + break; + } default: r = -ENOTTY; } @@ -1631,6 +1964,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); kvm_s390_vsie_destroy(kvm); + if (kvm->arch.migration_state) { + vfree(kvm->arch.migration_state->pgste_bitmap); + kfree(kvm->arch.migration_state); + } KVM_EVENT(3, "vm 0x%pK destroyed", kvm); } @@ -1975,7 +2312,6 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) if (!vcpu->arch.sie_block->cbrlo) return -ENOMEM; - vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI; return 0; } @@ -2439,7 +2775,7 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) { retry: kvm_s390_vcpu_request_handled(vcpu); - if (!vcpu->requests) + if (!kvm_request_pending(vcpu)) return 0; /* * We use MMU_RELOAD just to re-arm the ipte notifier for the @@ -2488,6 +2824,27 @@ retry: goto retry; } + if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { + /* + * Disable CMMA virtualization; we will emulate the ESSA + * instruction manually, in order to provide additional + * functionalities needed for live migration. + */ + vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; + goto retry; + } + + if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { + /* + * Re-enable CMMA virtualization if CMMA is available and + * was used. + */ + if ((vcpu->kvm->arch.use_cmma) && + (vcpu->kvm->mm->context.use_cmma)) + vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; + goto retry; + } + /* nothing to do, just clear the request */ kvm_clear_request(KVM_REQ_UNHALT, vcpu); @@ -2682,6 +3039,9 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { + struct mcck_volatile_info *mcck_info; + struct sie_page *sie_page; + VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); @@ -2692,6 +3052,15 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; + if (exit_reason == -EINTR) { + VCPU_EVENT(vcpu, 3, "%s", "machine check"); + sie_page = container_of(vcpu->arch.sie_block, + struct sie_page, sie_block); + mcck_info = &sie_page->mcck_info; + kvm_s390_reinject_machine_check(vcpu, mcck_info); + return 0; + } + if (vcpu->arch.sie_block->icptcode > 0) { int rc = kvm_handle_sie_intercept(vcpu); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 55f5c8457d6d..6fedc8bc7a37 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -397,4 +397,6 @@ static inline int kvm_s390_use_sca_entries(void) */ return sclp.has_sigpif; } +void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, + struct mcck_volatile_info *mcck_info); #endif diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index e53292a89257..8a1dac793d6b 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -24,6 +24,7 @@ #include <asm/ebcdic.h> #include <asm/sysinfo.h> #include <asm/pgtable.h> +#include <asm/page-states.h> #include <asm/pgalloc.h> #include <asm/gmap.h> #include <asm/io.h> @@ -949,13 +950,72 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) return 0; } +static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) +{ + struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state; + int r1, r2, nappended, entries; + unsigned long gfn, hva, res, pgstev, ptev; + unsigned long *cbrlo; + + /* + * We don't need to set SD.FPF.SK to 1 here, because if we have a + * machine check here we either handle it or crash + */ + + kvm_s390_get_regs_rre(vcpu, &r1, &r2); + gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT; + hva = gfn_to_hva(vcpu->kvm, gfn); + entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; + + if (kvm_is_error_hva(hva)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + nappended = pgste_perform_essa(vcpu->kvm->mm, hva, orc, &ptev, &pgstev); + if (nappended < 0) { + res = orc ? 0x10 : 0; + vcpu->run->s.regs.gprs[r1] = res; /* Exception Indication */ + return 0; + } + res = (pgstev & _PGSTE_GPS_USAGE_MASK) >> 22; + /* + * Set the block-content state part of the result. 0 means resident, so + * nothing to do if the page is valid. 2 is for preserved pages + * (non-present and non-zero), and 3 for zero pages (non-present and + * zero). + */ + if (ptev & _PAGE_INVALID) { + res |= 2; + if (pgstev & _PGSTE_GPS_ZERO) + res |= 1; + } + vcpu->run->s.regs.gprs[r1] = res; + /* + * It is possible that all the normal 511 slots were full, in which case + * we will now write in the 512th slot, which is reserved for host use. + * In both cases we let the normal essa handling code process all the + * slots, including the reserved one, if needed. + */ + if (nappended > 0) { + cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo & PAGE_MASK); + cbrlo[entries] = gfn << PAGE_SHIFT; + } + + if (orc) { + /* increment only if we are really flipping the bit to 1 */ + if (!test_and_set_bit(gfn, ms->pgste_bitmap)) + atomic64_inc(&ms->dirty_pages); + } + + return nappended; +} + static int handle_essa(struct kvm_vcpu *vcpu) { /* entries expected to be 1FF */ int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; unsigned long *cbrlo; struct gmap *gmap; - int i; + int i, orc; VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries); gmap = vcpu->arch.gmap; @@ -965,12 +1025,45 @@ static int handle_essa(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - - if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6) + /* Check for invalid operation request code */ + orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; + if (orc > ESSA_MAX) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - /* Retry the ESSA instruction */ - kvm_s390_retry_instr(vcpu); + if (likely(!vcpu->kvm->arch.migration_state)) { + /* + * CMMA is enabled in the KVM settings, but is disabled in + * the SIE block and in the mm_context, and we are not doing + * a migration. Enable CMMA in the mm_context. + * Since we need to take a write lock to write to the context + * to avoid races with storage keys handling, we check if the + * value really needs to be written to; if the value is + * already correct, we do nothing and avoid the lock. + */ + if (vcpu->kvm->mm->context.use_cmma == 0) { + down_write(&vcpu->kvm->mm->mmap_sem); + vcpu->kvm->mm->context.use_cmma = 1; + up_write(&vcpu->kvm->mm->mmap_sem); + } + /* + * If we are here, we are supposed to have CMMA enabled in + * the SIE block. Enabling CMMA works on a per-CPU basis, + * while the context use_cmma flag is per process. + * It's possible that the context flag is enabled and the + * SIE flag is not, so we set the flag always; if it was + * already set, nothing changes, otherwise we enable it + * on this CPU too. + */ + vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; + /* Retry the ESSA instruction */ + kvm_s390_retry_instr(vcpu); + } else { + /* Account for the possible extra cbrl entry */ + i = do_essa(vcpu, orc); + if (i < 0) + return i; + entries += i; + } vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); down_read(&gmap->mm->mmap_sem); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 4719ecb9ab42..715c19c45d9a 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -26,16 +26,21 @@ struct vsie_page { struct kvm_s390_sie_block scb_s; /* 0x0000 */ + /* + * the backup info for machine check. ensure it's at + * the same offset as that in struct sie_page! + */ + struct mcck_volatile_info mcck_info; /* 0x0200 */ /* the pinned originial scb */ - struct kvm_s390_sie_block *scb_o; /* 0x0200 */ + struct kvm_s390_sie_block *scb_o; /* 0x0218 */ /* the shadow gmap in use by the vsie_page */ - struct gmap *gmap; /* 0x0208 */ + struct gmap *gmap; /* 0x0220 */ /* address of the last reported fault to guest2 */ - unsigned long fault_addr; /* 0x0210 */ - __u8 reserved[0x0700 - 0x0218]; /* 0x0218 */ + unsigned long fault_addr; /* 0x0228 */ + __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */ struct kvm_s390_crypto_cb crycb; /* 0x0700 */ __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ -} __packed; +}; /* trigger a validity icpt for the given scb */ static int set_validity_icpt(struct kvm_s390_sie_block *scb, @@ -801,6 +806,8 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; + struct mcck_volatile_info *mcck_info; + struct sie_page *sie_page; int rc; handle_last_fault(vcpu, vsie_page); @@ -822,6 +829,14 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) local_irq_enable(); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + if (rc == -EINTR) { + VCPU_EVENT(vcpu, 3, "%s", "machine check"); + sie_page = container_of(scb_s, struct sie_page, sie_block); + mcck_info = &sie_page->mcck_info; + kvm_s390_reinject_machine_check(vcpu, mcck_info); + return 0; + } + if (rc > 0) rc = 0; /* we could still have an icpt */ else if (rc == -EFAULT) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 8eb1cc341dab..0d300ee00f4e 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -14,6 +14,8 @@ #include <linux/pci.h> #include <asm/pci_dma.h> +#define S390_MAPPING_ERROR (~(dma_addr_t) 0x0) + static struct kmem_cache *dma_region_table_cache; static struct kmem_cache *dma_page_table_cache; static int s390_iommu_strict; @@ -281,7 +283,7 @@ static dma_addr_t dma_alloc_address(struct device *dev, int size) out_error: spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); - return DMA_ERROR_CODE; + return S390_MAPPING_ERROR; } static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) @@ -329,7 +331,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, /* This rounds up number of pages based on size and offset */ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); dma_addr = dma_alloc_address(dev, nr_pages); - if (dma_addr == DMA_ERROR_CODE) { + if (dma_addr == S390_MAPPING_ERROR) { ret = -ENOSPC; goto out_err; } @@ -352,7 +354,7 @@ out_free: out_err: zpci_err("map error:\n"); zpci_err_dma(ret, pa); - return DMA_ERROR_CODE; + return S390_MAPPING_ERROR; } static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, @@ -429,7 +431,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, int ret; dma_addr_base = dma_alloc_address(dev, nr_pages); - if (dma_addr_base == DMA_ERROR_CODE) + if (dma_addr_base == S390_MAPPING_ERROR) return -ENOMEM; dma_addr = dma_addr_base; @@ -476,7 +478,7 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, for (i = 1; i < nr_elements; i++) { s = sg_next(s); - s->dma_address = DMA_ERROR_CODE; + s->dma_address = S390_MAPPING_ERROR; s->dma_length = 0; if (s->offset || (size & ~PAGE_MASK) || @@ -525,6 +527,11 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, s->dma_length = 0; } } + +static int s390_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == S390_MAPPING_ERROR; +} int zpci_dma_init_device(struct zpci_dev *zdev) { @@ -659,6 +666,7 @@ const struct dma_map_ops s390_pci_dma_ops = { .unmap_sg = s390_dma_unmap_sg, .map_page = s390_dma_map_pages, .unmap_page = s390_dma_unmap_pages, + .mapping_error = s390_mapping_error, /* if we support direct DMA this must be conditional */ .is_phys = 0, /* dma_supported is unconditionally true without a callback */ diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h index 916e5dbf0bfd..0ef220474d9b 100644 --- a/arch/score/include/asm/uaccess.h +++ b/arch/score/include/asm/uaccess.h @@ -359,12 +359,6 @@ static inline int strncpy_from_user(char *dst, const char *src, long len) return -EFAULT; } -extern int __strlen_user(const char *src); -static inline long strlen_user(const char __user *src) -{ - return __strlen_user(src); -} - extern int __strnlen_user(const char *str, long len); static inline long strnlen_user(const char __user *str, long len) { diff --git a/arch/score/lib/string.S b/arch/score/lib/string.S index 16efa3ad037f..e0c0318c9010 100644 --- a/arch/score/lib/string.S +++ b/arch/score/lib/string.S @@ -104,34 +104,6 @@ ENTRY(__strnlen_user) .previous .align 2 -ENTRY(__strlen_user) -0: lb r6, [r4] - mv r7, r4 - extsb r6, r6 - cmpi.c r6, 0 - mv r4, r6 - beq .L27 -.L28: -1: lb r6, [r7, 1]+ - addi r6, 1 - cmpi.c r6, 0 - bne .L28 -.L27: - br r3 - .section .fixup, "ax" - ldi r4, 0x0 - br r3 -99: - ldi r4, 0 - br r3 - .previous - .section __ex_table, "a" - .align 2 - .word 0b ,99b - .word 1b ,99b - .previous - - .align 2 ENTRY(__copy_tofrom_user) cmpi.c r6, 0 mv r10,r6 diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index ee086958b2b2..640a85925060 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -2,6 +2,7 @@ config SUPERH def_bool y select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_NO_COHERENT_DMA_MMAP if !MMU select HAVE_PATA_PLATFORM select CLKDEV_LOOKUP select HAVE_IDE if HAS_IOPORT_MAP diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h index d99008af5f73..9b06be07db4d 100644 --- a/arch/sh/include/asm/dma-mapping.h +++ b/arch/sh/include/asm/dma-mapping.h @@ -9,8 +9,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return dma_ops; } -#define DMA_ERROR_CODE 0 - void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir); diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h index 2722b61b2283..211b44920dbe 100644 --- a/arch/sh/include/asm/uaccess.h +++ b/arch/sh/include/asm/uaccess.h @@ -100,7 +100,6 @@ struct __large_struct { unsigned long buf[100]; }; extern long strncpy_from_user(char *dest, const char __user *src, long count); -extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); /* Generic arbitrary sized copy. */ diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 53783978162e..d18724d186f3 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -96,19 +96,6 @@ static int mod_code_status; /* holds return value of text write */ static void *mod_code_ip; /* holds the IP to write to */ static void *mod_code_newcode; /* holds the text to write to the IP */ -static unsigned nmi_wait_count; -static atomic_t nmi_update_count = ATOMIC_INIT(0); - -int ftrace_arch_read_dyn_info(char *buf, int size) -{ - int r; - - r = snprintf(buf, size, "%u %u", - nmi_wait_count, - atomic_read(&nmi_update_count)); - return r; -} - static void clear_mod_flag(void) { int old = atomic_read(&nmi_running); @@ -144,7 +131,6 @@ void arch_ftrace_nmi_enter(void) if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { smp_rmb(); ftrace_mod_code(); - atomic_inc(&nmi_update_count); } /* Must have previous changes seen before executions */ smp_mb(); @@ -165,8 +151,6 @@ static void wait_for_nmi_and_set_mod_flag(void) do { cpu_relax(); } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); - - nmi_wait_count++; } static void wait_for_nmi(void) @@ -177,8 +161,6 @@ static void wait_for_nmi(void) do { cpu_relax(); } while (atomic_read(&nmi_running)); - - nmi_wait_count++; } static int diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index 69cc627779f2..60bf1633d554 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -5,11 +5,6 @@ #include <linux/mm.h> #include <linux/dma-debug.h> -#define DMA_ERROR_CODE (~(dma_addr_t)0x0) - -#define HAVE_ARCH_DMA_SUPPORTED 1 -int dma_supported(struct device *dev, u64 mask); - static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir) { @@ -19,7 +14,6 @@ static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, } extern const struct dma_map_ops *dma_ops; -extern const struct dma_map_ops *leon_dma_ops; extern const struct dma_map_ops pci32_dma_ops; extern struct bus_type pci_bus_type; @@ -28,7 +22,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { #ifdef CONFIG_SPARC_LEON if (sparc_cpu_model == sparc_leon) - return leon_dma_ops; + return &pci32_dma_ops; #endif #if defined(CONFIG_SPARC32) && defined(CONFIG_PCI) if (bus == &pci_bus_type) diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 12ebee2d97c7..bdb1447aa1bb 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -277,7 +277,6 @@ static inline unsigned long clear_user(void __user *addr, unsigned long n) return n; } -__must_check long strlen_user(const char __user *str); __must_check long strnlen_user(const char __user *str, long n); #endif /* _ASM_UACCESS_H */ diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index 6096d671aa63..113d84eaa15e 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -194,7 +194,6 @@ unsigned long __must_check __clear_user(void __user *, unsigned long); #define clear_user __clear_user -__must_check long strlen_user(const char __user *str); __must_check long strnlen_user(const char __user *str, long n); struct pt_regs; diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index c63ba99ca551..fcbcc031f615 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -314,7 +314,7 @@ bad: bad_no_ctx: if (printk_ratelimit()) WARN_ON(1); - return DMA_ERROR_CODE; + return SPARC_MAPPING_ERROR; } static void strbuf_flush(struct strbuf *strbuf, struct iommu *iommu, @@ -547,7 +547,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, if (outcount < incount) { outs = sg_next(outs); - outs->dma_address = DMA_ERROR_CODE; + outs->dma_address = SPARC_MAPPING_ERROR; outs->dma_length = 0; } @@ -573,7 +573,7 @@ iommu_map_failed: iommu_tbl_range_free(&iommu->tbl, vaddr, npages, IOMMU_ERROR_CODE); - s->dma_address = DMA_ERROR_CODE; + s->dma_address = SPARC_MAPPING_ERROR; s->dma_length = 0; } if (s == outs) @@ -741,6 +741,26 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev, spin_unlock_irqrestore(&iommu->lock, flags); } +static int dma_4u_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == SPARC_MAPPING_ERROR; +} + +static int dma_4u_supported(struct device *dev, u64 device_mask) +{ + struct iommu *iommu = dev->archdata.iommu; + + if (device_mask > DMA_BIT_MASK(32)) + return 0; + if ((device_mask & iommu->dma_addr_mask) == iommu->dma_addr_mask) + return 1; +#ifdef CONFIG_PCI + if (dev_is_pci(dev)) + return pci64_dma_supported(to_pci_dev(dev), device_mask); +#endif + return 0; +} + static const struct dma_map_ops sun4u_dma_ops = { .alloc = dma_4u_alloc_coherent, .free = dma_4u_free_coherent, @@ -750,31 +770,9 @@ static const struct dma_map_ops sun4u_dma_ops = { .unmap_sg = dma_4u_unmap_sg, .sync_single_for_cpu = dma_4u_sync_single_for_cpu, .sync_sg_for_cpu = dma_4u_sync_sg_for_cpu, + .dma_supported = dma_4u_supported, + .mapping_error = dma_4u_mapping_error, }; const struct dma_map_ops *dma_ops = &sun4u_dma_ops; EXPORT_SYMBOL(dma_ops); - -int dma_supported(struct device *dev, u64 device_mask) -{ - struct iommu *iommu = dev->archdata.iommu; - u64 dma_addr_mask = iommu->dma_addr_mask; - - if (device_mask > DMA_BIT_MASK(32)) { - if (iommu->atu) - dma_addr_mask = iommu->atu->dma_addr_mask; - else - return 0; - } - - if ((device_mask & dma_addr_mask) == dma_addr_mask) - return 1; - -#ifdef CONFIG_PCI - if (dev_is_pci(dev)) - return pci64_dma_supported(to_pci_dev(dev), device_mask); -#endif - - return 0; -} -EXPORT_SYMBOL(dma_supported); diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h index 828493329f68..5ea5c192b1d9 100644 --- a/arch/sparc/kernel/iommu_common.h +++ b/arch/sparc/kernel/iommu_common.h @@ -47,4 +47,6 @@ static inline int is_span_boundary(unsigned long entry, return iommu_is_span_boundary(entry, nr, shift, boundary_size); } +#define SPARC_MAPPING_ERROR (~(dma_addr_t)0x0) + #endif /* _IOMMU_COMMON_H */ diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index cf20033a1458..12894f259bea 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -401,6 +401,11 @@ static void sbus_sync_sg_for_device(struct device *dev, struct scatterlist *sg, BUG(); } +static int sbus_dma_supported(struct device *dev, u64 mask) +{ + return 0; +} + static const struct dma_map_ops sbus_dma_ops = { .alloc = sbus_alloc_coherent, .free = sbus_free_coherent, @@ -410,6 +415,7 @@ static const struct dma_map_ops sbus_dma_ops = { .unmap_sg = sbus_unmap_sg, .sync_sg_for_cpu = sbus_sync_sg_for_cpu, .sync_sg_for_device = sbus_sync_sg_for_device, + .dma_supported = sbus_dma_supported, }; static int __init sparc_register_ioport(void) @@ -637,6 +643,7 @@ static void pci32_sync_sg_for_device(struct device *device, struct scatterlist * } } +/* note: leon re-uses pci32_dma_ops */ const struct dma_map_ops pci32_dma_ops = { .alloc = pci32_alloc_coherent, .free = pci32_free_coherent, @@ -651,29 +658,9 @@ const struct dma_map_ops pci32_dma_ops = { }; EXPORT_SYMBOL(pci32_dma_ops); -/* leon re-uses pci32_dma_ops */ -const struct dma_map_ops *leon_dma_ops = &pci32_dma_ops; -EXPORT_SYMBOL(leon_dma_ops); - const struct dma_map_ops *dma_ops = &sbus_dma_ops; EXPORT_SYMBOL(dma_ops); - -/* - * Return whether the given PCI device DMA address mask can be - * supported properly. For example, if your device can only drive the - * low 24-bits during PCI bus mastering, then you would pass - * 0x00ffffff as the mask to this function. - */ -int dma_supported(struct device *dev, u64 mask) -{ - if (dev_is_pci(dev)) - return 1; - - return 0; -} -EXPORT_SYMBOL(dma_supported); - #ifdef CONFIG_PROC_FS static int sparc_io_proc_show(struct seq_file *m, void *v) diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 68bec7c97cb8..24f21c726dfa 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -24,6 +24,7 @@ #include "pci_impl.h" #include "iommu_common.h" +#include "kernel.h" #include "pci_sun4v.h" @@ -412,12 +413,12 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, bad: if (printk_ratelimit()) WARN_ON(1); - return DMA_ERROR_CODE; + return SPARC_MAPPING_ERROR; iommu_map_fail: local_irq_restore(flags); iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE); - return DMA_ERROR_CODE; + return SPARC_MAPPING_ERROR; } static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, @@ -590,7 +591,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, if (outcount < incount) { outs = sg_next(outs); - outs->dma_address = DMA_ERROR_CODE; + outs->dma_address = SPARC_MAPPING_ERROR; outs->dma_length = 0; } @@ -607,7 +608,7 @@ iommu_map_failed: iommu_tbl_range_free(tbl, vaddr, npages, IOMMU_ERROR_CODE); /* XXX demap? XXX */ - s->dma_address = DMA_ERROR_CODE; + s->dma_address = SPARC_MAPPING_ERROR; s->dma_length = 0; } if (s == outs) @@ -669,6 +670,26 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, local_irq_restore(flags); } +static int dma_4v_supported(struct device *dev, u64 device_mask) +{ + struct iommu *iommu = dev->archdata.iommu; + u64 dma_addr_mask; + + if (device_mask > DMA_BIT_MASK(32) && iommu->atu) + dma_addr_mask = iommu->atu->dma_addr_mask; + else + dma_addr_mask = iommu->dma_addr_mask; + + if ((device_mask & dma_addr_mask) == dma_addr_mask) + return 1; + return pci64_dma_supported(to_pci_dev(dev), device_mask); +} + +static int dma_4v_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == SPARC_MAPPING_ERROR; +} + static const struct dma_map_ops sun4v_dma_ops = { .alloc = dma_4v_alloc_coherent, .free = dma_4v_free_coherent, @@ -676,6 +697,8 @@ static const struct dma_map_ops sun4v_dma_ops = { .unmap_page = dma_4v_unmap_page, .map_sg = dma_4v_map_sg, .unmap_sg = dma_4v_unmap_sg, + .dma_supported = dma_4v_supported, + .mapping_error = dma_4v_mapping_error, }; static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent) diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index a803f6bb4d92..d0c79c1c54b4 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -327,7 +327,6 @@ extern unsigned long raw_copy_in_user( extern long strnlen_user(const char __user *str, long n); -extern long strlen_user(const char __user *str); extern long strncpy_from_user(char *dst, const char __user *src, long); /** diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index 569bb6dd154a..f2abedc8a080 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c @@ -317,18 +317,6 @@ static void tile_dma_sync_sg_for_device(struct device *dev, } } -static inline int -tile_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return 0; -} - -static inline int -tile_dma_supported(struct device *dev, u64 mask) -{ - return 1; -} - static const struct dma_map_ops tile_default_dma_map_ops = { .alloc = tile_dma_alloc_coherent, .free = tile_dma_free_coherent, @@ -340,8 +328,6 @@ static const struct dma_map_ops tile_default_dma_map_ops = { .sync_single_for_device = tile_dma_sync_single_for_device, .sync_sg_for_cpu = tile_dma_sync_sg_for_cpu, .sync_sg_for_device = tile_dma_sync_sg_for_device, - .mapping_error = tile_dma_mapping_error, - .dma_supported = tile_dma_supported }; const struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops; @@ -504,18 +490,6 @@ static void tile_pci_dma_sync_sg_for_device(struct device *dev, } } -static inline int -tile_pci_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return 0; -} - -static inline int -tile_pci_dma_supported(struct device *dev, u64 mask) -{ - return 1; -} - static const struct dma_map_ops tile_pci_default_dma_map_ops = { .alloc = tile_pci_dma_alloc_coherent, .free = tile_pci_dma_free_coherent, @@ -527,8 +501,6 @@ static const struct dma_map_ops tile_pci_default_dma_map_ops = { .sync_single_for_device = tile_pci_dma_sync_single_for_device, .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu, .sync_sg_for_device = tile_pci_dma_sync_sg_for_device, - .mapping_error = tile_pci_dma_mapping_error, - .dma_supported = tile_pci_dma_supported }; const struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops; @@ -578,8 +550,6 @@ static const struct dma_map_ops pci_hybrid_dma_ops = { .sync_single_for_device = tile_pci_dma_sync_single_for_device, .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu, .sync_sg_for_device = tile_pci_dma_sync_sg_for_device, - .mapping_error = tile_pci_dma_mapping_error, - .dma_supported = tile_pci_dma_supported }; const struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops; diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index a9bd61820042..2c7f721eccbc 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -255,11 +255,6 @@ int clear_user_proc(void __user *buf, int size) return clear_user(buf, size); } -int strlen_user_proc(char __user *str) -{ - return strlen_user(str); -} - int cpu(void) { return current_thread_info()->cpu; diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 24118c0b4640..5343c19814b3 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -116,7 +116,6 @@ struct compat_statfs { int f_spare[4]; }; -#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff #define COMPAT_RLIM_INFINITY 0xffffffff typedef u32 compat_old_sigset_t; /* at least 32 bits */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 08a0838b83fb..398c79889f5c 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -19,8 +19,6 @@ # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) #endif -#define DMA_ERROR_CODE 0 - extern int iommu_merge; extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; @@ -35,9 +33,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); #define arch_dma_alloc_attrs arch_dma_alloc_attrs -#define HAVE_ARCH_DMA_SUPPORTED 1 -extern int dma_supported(struct device *hwdev, u64 mask); - extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, unsigned long attrs); diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 793869879464..fca144a104e4 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -6,6 +6,8 @@ extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; +int x86_dma_supported(struct device *dev, u64 mask); + /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 695605eb1dfb..1588e9e3dc01 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -48,28 +48,31 @@ #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS /* x86-specific vcpu->requests bit members */ -#define KVM_REQ_MIGRATE_TIMER 8 -#define KVM_REQ_REPORT_TPR_ACCESS 9 -#define KVM_REQ_TRIPLE_FAULT 10 -#define KVM_REQ_MMU_SYNC 11 -#define KVM_REQ_CLOCK_UPDATE 12 -#define KVM_REQ_EVENT 14 -#define KVM_REQ_APF_HALT 15 -#define KVM_REQ_STEAL_UPDATE 16 -#define KVM_REQ_NMI 17 -#define KVM_REQ_PMU 18 -#define KVM_REQ_PMI 19 -#define KVM_REQ_SMI 20 -#define KVM_REQ_MASTERCLOCK_UPDATE 21 -#define KVM_REQ_MCLOCK_INPROGRESS (22 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_SCAN_IOAPIC (23 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24 -#define KVM_REQ_APIC_PAGE_RELOAD (25 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_HV_CRASH 26 -#define KVM_REQ_IOAPIC_EOI_EXIT 27 -#define KVM_REQ_HV_RESET 28 -#define KVM_REQ_HV_EXIT 29 -#define KVM_REQ_HV_STIMER 30 +#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) +#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) +#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) +#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) +#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) +#define KVM_REQ_EVENT KVM_ARCH_REQ(6) +#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) +#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) +#define KVM_REQ_NMI KVM_ARCH_REQ(9) +#define KVM_REQ_PMU KVM_ARCH_REQ(10) +#define KVM_REQ_PMI KVM_ARCH_REQ(11) +#define KVM_REQ_SMI KVM_ARCH_REQ(12) +#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) +#define KVM_REQ_MCLOCK_INPROGRESS \ + KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_SCAN_IOAPIC \ + KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16) +#define KVM_REQ_APIC_PAGE_RELOAD \ + KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18) +#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19) +#define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) +#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) +#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -254,7 +257,8 @@ union kvm_mmu_page_role { unsigned cr0_wp:1; unsigned smep_andnot_wp:1; unsigned smap_andnot_wp:1; - unsigned :8; + unsigned ad_disabled:1; + unsigned :7; /* * This is left at the top of the word so that diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d406894cd9a2..5573c75f8e4c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -426,6 +426,8 @@ #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_BNDCFGS 0x00000d90 +#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc + #define MSR_IA32_XSS 0x00000da0 #define FEATURE_CONTROL_LOCKED (1<<0) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a059aac9e937..476ea27f490b 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -565,7 +565,6 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n); extern __must_check long strncpy_from_user(char *dst, const char __user *src, long count); -extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); unsigned long __must_check clear_user(void __user *mem, unsigned long len); diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index f6d20f6cca12..11071fcd630e 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -43,6 +43,7 @@ #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/smap.h> #include <xen/interface/xen.h> #include <xen/interface/sched.h> @@ -50,6 +51,8 @@ #include <xen/interface/platform.h> #include <xen/interface/xen-mca.h> +struct xen_dm_op_buf; + /* * The hypercall asms have to meet several constraints: * - Work on 32- and 64-bit. @@ -214,10 +217,12 @@ privcmd_call(unsigned call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + stac(); asm volatile("call *%[call]" : __HYPERCALL_5PARAM : [call] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); + clac(); return (long)__res; } @@ -474,9 +479,13 @@ HYPERVISOR_xenpmu_op(unsigned int op, void *arg) static inline int HYPERVISOR_dm_op( - domid_t dom, unsigned int nr_bufs, void *bufs) + domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs) { - return _hypercall3(int, dm_op, dom, nr_bufs, bufs); + int ret; + stac(); + ret = _hypercall3(int, dm_op, dom, nr_bufs, bufs); + clac(); + return ret; } static inline void diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 815dd63f49d0..cc0e8bc0ea3f 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -704,6 +704,7 @@ static const struct dma_map_ops gart_dma_ops = { .alloc = gart_alloc_coherent, .free = gart_free_coherent, .mapping_error = gart_mapping_error, + .dma_supported = x86_dma_supported, }; static void gart_iommu_shutdown(void) diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index fda7867046d0..5286a4a92cf7 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -50,6 +50,8 @@ #include <asm/x86_init.h> #include <asm/iommu_table.h> +#define CALGARY_MAPPING_ERROR 0 + #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT int use_calgary __read_mostly = 1; #else @@ -252,7 +254,7 @@ static unsigned long iommu_range_alloc(struct device *dev, if (panic_on_overflow) panic("Calgary: fix the allocator.\n"); else - return DMA_ERROR_CODE; + return CALGARY_MAPPING_ERROR; } } @@ -272,10 +274,10 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, entry = iommu_range_alloc(dev, tbl, npages); - if (unlikely(entry == DMA_ERROR_CODE)) { + if (unlikely(entry == CALGARY_MAPPING_ERROR)) { pr_warn("failed to allocate %u pages in iommu %p\n", npages, tbl); - return DMA_ERROR_CODE; + return CALGARY_MAPPING_ERROR; } /* set the return dma address */ @@ -295,7 +297,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned long flags; /* were we called with bad_dma_address? */ - badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); + badend = CALGARY_MAPPING_ERROR + (EMERGENCY_PAGES * PAGE_SIZE); if (unlikely(dma_addr < badend)) { WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); @@ -380,7 +382,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); entry = iommu_range_alloc(dev, tbl, npages); - if (entry == DMA_ERROR_CODE) { + if (entry == CALGARY_MAPPING_ERROR) { /* makes sure unmap knows to stop */ s->dma_length = 0; goto error; @@ -398,7 +400,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, error: calgary_unmap_sg(dev, sg, nelems, dir, 0); for_each_sg(sg, s, nelems, i) { - sg->dma_address = DMA_ERROR_CODE; + sg->dma_address = CALGARY_MAPPING_ERROR; sg->dma_length = 0; } return 0; @@ -453,7 +455,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, /* set up tces to cover the allocated range */ mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); - if (mapping == DMA_ERROR_CODE) + if (mapping == CALGARY_MAPPING_ERROR) goto free; *dma_handle = mapping; return ret; @@ -478,6 +480,11 @@ static void calgary_free_coherent(struct device *dev, size_t size, free_pages((unsigned long)vaddr, get_order(size)); } +static int calgary_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == CALGARY_MAPPING_ERROR; +} + static const struct dma_map_ops calgary_dma_ops = { .alloc = calgary_alloc_coherent, .free = calgary_free_coherent, @@ -485,6 +492,8 @@ static const struct dma_map_ops calgary_dma_ops = { .unmap_sg = calgary_unmap_sg, .map_page = calgary_map_page, .unmap_page = calgary_unmap_page, + .mapping_error = calgary_mapping_error, + .dma_supported = x86_dma_supported, }; static inline void __iomem * busno_to_bbar(unsigned char num) @@ -732,7 +741,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) struct iommu_table *tbl = pci_iommu(dev->bus); /* reserve EMERGENCY_PAGES from bad_dma_address and up */ - iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES); + iommu_range_reserve(tbl, CALGARY_MAPPING_ERROR, EMERGENCY_PAGES); /* avoid the BIOS/VGA first 640KB-1MB region */ /* for CalIOC2 - avoid the entire first MB */ diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 3a216ec869cd..5e16d3f29594 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -213,10 +213,8 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); -int dma_supported(struct device *dev, u64 mask) +int x86_dma_supported(struct device *dev, u64 mask) { - const struct dma_map_ops *ops = get_dma_ops(dev); - #ifdef CONFIG_PCI if (mask > 0xffffffff && forbid_dac > 0) { dev_info(dev, "PCI: Disallowing DAC for device\n"); @@ -224,9 +222,6 @@ int dma_supported(struct device *dev, u64 mask) } #endif - if (ops->dma_supported) - return ops->dma_supported(dev, mask); - /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. The caller just has to use GFP_DMA in this case. */ @@ -252,7 +247,6 @@ int dma_supported(struct device *dev, u64 mask) return 1; } -EXPORT_SYMBOL(dma_supported); static int __init pci_iommu_init(void) { diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index a88952ef371c..a6d404087fe3 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -11,6 +11,8 @@ #include <asm/iommu.h> #include <asm/dma.h> +#define NOMMU_MAPPING_ERROR 0 + static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { @@ -33,7 +35,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, dma_addr_t bus = page_to_phys(page) + offset; WARN_ON(size == 0); if (!check_addr("map_single", dev, bus, size)) - return DMA_ERROR_CODE; + return NOMMU_MAPPING_ERROR; flush_write_buffers(); return bus; } @@ -88,6 +90,11 @@ static void nommu_sync_sg_for_device(struct device *dev, flush_write_buffers(); } +static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr == NOMMU_MAPPING_ERROR; +} + const struct dma_map_ops nommu_dma_ops = { .alloc = dma_generic_alloc_coherent, .free = dma_generic_free_coherent, @@ -96,4 +103,6 @@ const struct dma_map_ops nommu_dma_ops = { .sync_single_for_device = nommu_sync_single_for_device, .sync_sg_for_device = nommu_sync_sg_for_device, .is_phys = 1, + .mapping_error = nommu_mapping_error, + .dma_supported = x86_dma_supported, }; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index a6fd40aade7c..da6728383052 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -144,6 +144,14 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) return best && (best->ebx & bit(X86_FEATURE_RTM)); } +static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->ebx & bit(X86_FEATURE_MPX)); +} + static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 80890dee66ce..fb0055953fbc 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -900,7 +900,7 @@ static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, if (rc != X86EMUL_CONTINUE) \ goto done; \ ctxt->_eip += sizeof(_type); \ - _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \ + memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \ ctxt->fetch.ptr += sizeof(_type); \ _x; \ }) @@ -3942,6 +3942,25 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt) } /* + * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save + * and restore MXCSR. + */ +static size_t __fxstate_size(int nregs) +{ + return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16; +} + +static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt) +{ + bool cr4_osfxsr; + if (ctxt->mode == X86EMUL_MODE_PROT64) + return __fxstate_size(16); + + cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR; + return __fxstate_size(cr4_osfxsr ? 8 : 0); +} + +/* * FXSAVE and FXRSTOR have 4 different formats depending on execution mode, * 1) 16 bit mode * 2) 32 bit mode @@ -3962,7 +3981,6 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt) static int em_fxsave(struct x86_emulate_ctxt *ctxt) { struct fxregs_state fx_state; - size_t size; int rc; rc = check_fxsr(ctxt); @@ -3978,68 +3996,42 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR) - size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]); - else - size = offsetof(struct fxregs_state, xmm_space[0]); - - return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); -} - -static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt, - struct fxregs_state *new) -{ - int rc = X86EMUL_CONTINUE; - struct fxregs_state old; - - rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old)); - if (rc != X86EMUL_CONTINUE) - return rc; - - /* - * 64 bit host will restore XMM 8-15, which is not correct on non-64 - * bit guests. Load the current values in order to preserve 64 bit - * XMMs after fxrstor. - */ -#ifdef CONFIG_X86_64 - /* XXX: accessing XMM 8-15 very awkwardly */ - memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16); -#endif - - /* - * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but - * does save and restore MXCSR. - */ - if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)) - memcpy(new->xmm_space, old.xmm_space, 8 * 16); - - return rc; + return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, + fxstate_size(ctxt)); } static int em_fxrstor(struct x86_emulate_ctxt *ctxt) { struct fxregs_state fx_state; int rc; + size_t size; rc = check_fxsr(ctxt); if (rc != X86EMUL_CONTINUE) return rc; - rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512); - if (rc != X86EMUL_CONTINUE) - return rc; + ctxt->ops->get_fpu(ctxt); - if (fx_state.mxcsr >> 16) - return emulate_gp(ctxt, 0); + size = fxstate_size(ctxt); + if (size < __fxstate_size(16)) { + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + if (rc != X86EMUL_CONTINUE) + goto out; + } - ctxt->ops->get_fpu(ctxt); + rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); + if (rc != X86EMUL_CONTINUE) + goto out; - if (ctxt->mode < X86EMUL_MODE_PROT64) - rc = fxrstor_fixup(ctxt, &fx_state); + if (fx_state.mxcsr >> 16) { + rc = emulate_gp(ctxt, 0); + goto out; + } if (rc == X86EMUL_CONTINUE) rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); +out: ctxt->ops->put_fpu(ctxt); return rc; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d24c8742d9b0..2819d4c123eb 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1495,6 +1495,7 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); static void cancel_hv_timer(struct kvm_lapic *apic) { + WARN_ON(!apic->lapic_timer.hv_timer_in_use); preempt_disable(); kvm_x86_ops->cancel_hv_timer(apic->vcpu); apic->lapic_timer.hv_timer_in_use = false; @@ -1503,25 +1504,56 @@ static void cancel_hv_timer(struct kvm_lapic *apic) static bool start_hv_timer(struct kvm_lapic *apic) { - u64 tscdeadline = apic->lapic_timer.tscdeadline; + struct kvm_timer *ktimer = &apic->lapic_timer; + int r; - if ((atomic_read(&apic->lapic_timer.pending) && - !apic_lvtt_period(apic)) || - kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) { - if (apic->lapic_timer.hv_timer_in_use) - cancel_hv_timer(apic); - } else { - apic->lapic_timer.hv_timer_in_use = true; - hrtimer_cancel(&apic->lapic_timer.timer); + if (!kvm_x86_ops->set_hv_timer) + return false; + + if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) + return false; - /* In case the sw timer triggered in the window */ - if (atomic_read(&apic->lapic_timer.pending) && - !apic_lvtt_period(apic)) - cancel_hv_timer(apic); + r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline); + if (r < 0) + return false; + + ktimer->hv_timer_in_use = true; + hrtimer_cancel(&ktimer->timer); + + /* + * Also recheck ktimer->pending, in case the sw timer triggered in + * the window. For periodic timer, leave the hv timer running for + * simplicity, and the deadline will be recomputed on the next vmexit. + */ + if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) { + if (r) + apic_timer_expired(apic); + return false; } - trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, - apic->lapic_timer.hv_timer_in_use); - return apic->lapic_timer.hv_timer_in_use; + + trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true); + return true; +} + +static void start_sw_timer(struct kvm_lapic *apic) +{ + struct kvm_timer *ktimer = &apic->lapic_timer; + if (apic->lapic_timer.hv_timer_in_use) + cancel_hv_timer(apic); + if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) + return; + + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) + start_sw_period(apic); + else if (apic_lvtt_tscdeadline(apic)) + start_sw_tscdeadline(apic); + trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false); +} + +static void restart_apic_timer(struct kvm_lapic *apic) +{ + if (!start_hv_timer(apic)) + start_sw_timer(apic); } void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) @@ -1535,19 +1567,14 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) if (apic_lvtt_period(apic) && apic->lapic_timer.period) { advance_periodic_target_expiration(apic); - if (!start_hv_timer(apic)) - start_sw_period(apic); + restart_apic_timer(apic); } } EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) { - struct kvm_lapic *apic = vcpu->arch.apic; - - WARN_ON(apic->lapic_timer.hv_timer_in_use); - - start_hv_timer(apic); + restart_apic_timer(vcpu->arch.apic); } EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer); @@ -1556,33 +1583,28 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; /* Possibly the TSC deadline timer is not enabled yet */ - if (!apic->lapic_timer.hv_timer_in_use) - return; - - cancel_hv_timer(apic); + if (apic->lapic_timer.hv_timer_in_use) + start_sw_timer(apic); +} +EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); - if (atomic_read(&apic->lapic_timer.pending)) - return; +void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; - if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) - start_sw_period(apic); - else if (apic_lvtt_tscdeadline(apic)) - start_sw_tscdeadline(apic); + WARN_ON(!apic->lapic_timer.hv_timer_in_use); + restart_apic_timer(apic); } -EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); static void start_apic_timer(struct kvm_lapic *apic) { atomic_set(&apic->lapic_timer.pending, 0); - if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { - if (set_target_expiration(apic) && - !(kvm_x86_ops->set_hv_timer && start_hv_timer(apic))) - start_sw_period(apic); - } else if (apic_lvtt_tscdeadline(apic)) { - if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic))) - start_sw_tscdeadline(apic); - } + if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) + && !set_target_expiration(apic)) + return; + + restart_apic_timer(apic); } static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) @@ -1813,16 +1835,6 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) * LAPIC interface *---------------------------------------------------------------------- */ -u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - - if (!lapic_in_kernel(vcpu)) - return 0; - - return apic->lapic_timer.tscdeadline; -} - u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index bcbe811f3b97..29caa2c3dff9 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -87,7 +87,6 @@ int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); -u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); @@ -216,4 +215,5 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu); void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); +void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cb8225969255..aafd399cf8c6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -183,13 +183,13 @@ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mmio_mask; +static u64 __read_mostly shadow_mmio_value; static u64 __read_mostly shadow_present_mask; /* - * The mask/value to distinguish a PTE that has been marked not-present for - * access tracking purposes. - * The mask would be either 0 if access tracking is disabled, or - * SPTE_SPECIAL_MASK|VMX_EPT_RWX_MASK if access tracking is enabled. + * SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value. + * Non-present SPTEs with shadow_acc_track_value set are in place for access + * tracking. */ static u64 __read_mostly shadow_acc_track_mask; static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK; @@ -207,16 +207,40 @@ static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIF static void mmu_spte_set(u64 *sptep, u64 spte); static void mmu_free_roots(struct kvm_vcpu *vcpu); -void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) +void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) { + BUG_ON((mmio_mask & mmio_value) != mmio_value); + shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK; shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); +static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) +{ + return sp->role.ad_disabled; +} + +static inline bool spte_ad_enabled(u64 spte) +{ + MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value); + return !(spte & shadow_acc_track_value); +} + +static inline u64 spte_shadow_accessed_mask(u64 spte) +{ + MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value); + return spte_ad_enabled(spte) ? shadow_accessed_mask : 0; +} + +static inline u64 spte_shadow_dirty_mask(u64 spte) +{ + MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value); + return spte_ad_enabled(spte) ? shadow_dirty_mask : 0; +} + static inline bool is_access_track_spte(u64 spte) { - /* Always false if shadow_acc_track_mask is zero. */ - return (spte & shadow_acc_track_mask) == shadow_acc_track_value; + return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0; } /* @@ -270,7 +294,7 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, u64 mask = generation_mmio_spte_mask(gen); access &= ACC_WRITE_MASK | ACC_USER_MASK; - mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT; + mask |= shadow_mmio_value | access | gfn << PAGE_SHIFT; trace_mark_mmio_spte(sptep, gfn, access, gen); mmu_spte_set(sptep, mask); @@ -278,7 +302,7 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, static bool is_mmio_spte(u64 spte) { - return (spte & shadow_mmio_mask) == shadow_mmio_mask; + return (spte & shadow_mmio_mask) == shadow_mmio_value; } static gfn_t get_mmio_spte_gfn(u64 spte) @@ -315,12 +339,20 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) return likely(kvm_gen == spte_gen); } +/* + * Sets the shadow PTE masks used by the MMU. + * + * Assumptions: + * - Setting either @accessed_mask or @dirty_mask requires setting both + * - At least one of @accessed_mask or @acc_track_mask must be set + */ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, u64 acc_track_mask) { - if (acc_track_mask != 0) - acc_track_mask |= SPTE_SPECIAL_MASK; + BUG_ON(!dirty_mask != !accessed_mask); + BUG_ON(!accessed_mask && !acc_track_mask); + BUG_ON(acc_track_mask & shadow_acc_track_value); shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; @@ -329,7 +361,6 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, shadow_x_mask = x_mask; shadow_present_mask = p_mask; shadow_acc_track_mask = acc_track_mask; - WARN_ON(shadow_accessed_mask != 0 && shadow_acc_track_mask != 0); } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); @@ -549,7 +580,7 @@ static bool spte_has_volatile_bits(u64 spte) is_access_track_spte(spte)) return true; - if (shadow_accessed_mask) { + if (spte_ad_enabled(spte)) { if ((spte & shadow_accessed_mask) == 0 || (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0)) return true; @@ -560,14 +591,17 @@ static bool spte_has_volatile_bits(u64 spte) static bool is_accessed_spte(u64 spte) { - return shadow_accessed_mask ? spte & shadow_accessed_mask - : !is_access_track_spte(spte); + u64 accessed_mask = spte_shadow_accessed_mask(spte); + + return accessed_mask ? spte & accessed_mask + : !is_access_track_spte(spte); } static bool is_dirty_spte(u64 spte) { - return shadow_dirty_mask ? spte & shadow_dirty_mask - : spte & PT_WRITABLE_MASK; + u64 dirty_mask = spte_shadow_dirty_mask(spte); + + return dirty_mask ? spte & dirty_mask : spte & PT_WRITABLE_MASK; } /* Rules for using mmu_spte_set: @@ -707,10 +741,10 @@ static u64 mmu_spte_get_lockless(u64 *sptep) static u64 mark_spte_for_access_track(u64 spte) { - if (shadow_accessed_mask != 0) + if (spte_ad_enabled(spte)) return spte & ~shadow_accessed_mask; - if (shadow_acc_track_mask == 0 || is_access_track_spte(spte)) + if (is_access_track_spte(spte)) return spte; /* @@ -729,7 +763,6 @@ static u64 mark_spte_for_access_track(u64 spte) spte |= (spte & shadow_acc_track_saved_bits_mask) << shadow_acc_track_saved_bits_shift; spte &= ~shadow_acc_track_mask; - spte |= shadow_acc_track_value; return spte; } @@ -741,6 +774,7 @@ static u64 restore_acc_track_spte(u64 spte) u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift) & shadow_acc_track_saved_bits_mask; + WARN_ON_ONCE(spte_ad_enabled(spte)); WARN_ON_ONCE(!is_access_track_spte(spte)); new_spte &= ~shadow_acc_track_mask; @@ -759,7 +793,7 @@ static bool mmu_spte_age(u64 *sptep) if (!is_accessed_spte(spte)) return false; - if (shadow_accessed_mask) { + if (spte_ad_enabled(spte)) { clear_bit((ffs(shadow_accessed_mask) - 1), (unsigned long *)sptep); } else { @@ -1390,6 +1424,22 @@ static bool spte_clear_dirty(u64 *sptep) return mmu_spte_update(sptep, spte); } +static bool wrprot_ad_disabled_spte(u64 *sptep) +{ + bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT, + (unsigned long *)sptep); + if (was_writable) + kvm_set_pfn_dirty(spte_to_pfn(*sptep)); + + return was_writable; +} + +/* + * Gets the GFN ready for another round of dirty logging by clearing the + * - D bit on ad-enabled SPTEs, and + * - W bit on ad-disabled SPTEs. + * Returns true iff any D or W bits were cleared. + */ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { u64 *sptep; @@ -1397,7 +1447,10 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) bool flush = false; for_each_rmap_spte(rmap_head, &iter, sptep) - flush |= spte_clear_dirty(sptep); + if (spte_ad_enabled(*sptep)) + flush |= spte_clear_dirty(sptep); + else + flush |= wrprot_ad_disabled_spte(sptep); return flush; } @@ -1420,7 +1473,8 @@ static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) bool flush = false; for_each_rmap_spte(rmap_head, &iter, sptep) - flush |= spte_set_dirty(sptep); + if (spte_ad_enabled(*sptep)) + flush |= spte_set_dirty(sptep); return flush; } @@ -1452,7 +1506,8 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, } /** - * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages + * kvm_mmu_clear_dirty_pt_masked - clear MMU D-bit for PT level pages, or write + * protect the page if the D-bit isn't supported. * @kvm: kvm instance * @slot: slot to clear D-bit * @gfn_offset: start of the BITS_PER_LONG pages we care about @@ -1766,18 +1821,9 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, u64 *sptep; struct rmap_iterator iter; - /* - * If there's no access bit in the secondary pte set by the hardware and - * fast access tracking is also not enabled, it's up to gup-fast/gup to - * set the access bit in the primary pte or in the page structure. - */ - if (!shadow_accessed_mask && !shadow_acc_track_mask) - goto out; - for_each_rmap_spte(rmap_head, &iter, sptep) if (is_accessed_spte(*sptep)) return 1; -out: return 0; } @@ -1798,18 +1844,6 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - /* - * In case of absence of EPT Access and Dirty Bits supports, - * emulate the accessed bit for EPT, by checking if this page has - * an EPT mapping, and clearing it if it does. On the next access, - * a new EPT mapping will be established. - * This has some overhead, but not as much as the cost of swapping - * out actively used pages or breaking up actively used hugepages. - */ - if (!shadow_accessed_mask && !shadow_acc_track_mask) - return kvm_handle_hva_range(kvm, start, end, 0, - kvm_unmap_rmapp); - return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp); } @@ -2398,7 +2432,12 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK | - shadow_user_mask | shadow_x_mask | shadow_accessed_mask; + shadow_user_mask | shadow_x_mask; + + if (sp_ad_disabled(sp)) + spte |= shadow_acc_track_value; + else + spte |= shadow_accessed_mask; mmu_spte_set(sptep, spte); @@ -2666,10 +2705,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, { u64 spte = 0; int ret = 0; + struct kvm_mmu_page *sp; if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) return 0; + sp = page_header(__pa(sptep)); + if (sp_ad_disabled(sp)) + spte |= shadow_acc_track_value; + /* * For the EPT case, shadow_present_mask is 0 if hardware * supports exec-only page table entries. In that case, @@ -2678,7 +2722,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, */ spte |= shadow_present_mask; if (!speculative) - spte |= shadow_accessed_mask; + spte |= spte_shadow_accessed_mask(spte); if (pte_access & ACC_EXEC_MASK) spte |= shadow_x_mask; @@ -2735,7 +2779,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (pte_access & ACC_WRITE_MASK) { kvm_vcpu_mark_page_dirty(vcpu, gfn); - spte |= shadow_dirty_mask; + spte |= spte_shadow_dirty_mask(spte); } if (speculative) @@ -2877,16 +2921,16 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) { struct kvm_mmu_page *sp; + sp = page_header(__pa(sptep)); + /* - * Since it's no accessed bit on EPT, it's no way to - * distinguish between actually accessed translations - * and prefetched, so disable pte prefetch if EPT is - * enabled. + * Without accessed bits, there's no way to distinguish between + * actually accessed translations and prefetched, so disable pte + * prefetch if accessed bits aren't available. */ - if (!shadow_accessed_mask) + if (sp_ad_disabled(sp)) return; - sp = page_header(__pa(sptep)); if (sp->role.level > PT_PAGE_TABLE_LEVEL) return; @@ -4290,6 +4334,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->base_role.word = 0; context->base_role.smm = is_smm(vcpu); + context->base_role.ad_disabled = (shadow_accessed_mask == 0); context->page_fault = tdp_page_fault; context->sync_page = nonpaging_sync_page; context->invlpg = nonpaging_invlpg; @@ -4377,6 +4422,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, context->root_level = context->shadow_root_level; context->root_hpa = INVALID_PAGE; context->direct_map = false; + context->base_role.ad_disabled = !accessed_dirty; update_permission_bitmask(vcpu, context, true); update_pkru_bitmask(vcpu, context, true); @@ -4636,6 +4682,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, mask.smep_andnot_wp = 1; mask.smap_andnot_wp = 1; mask.smm = 1; + mask.ad_disabled = 1; /* * If we don't have indirect shadow pages, it means no page is diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 330bf3a811fb..a276834950c1 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -51,7 +51,7 @@ static inline u64 rsvd_bits(int s, int e) return ((1ULL << (e - s + 1)) - 1) << s; } -void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); +void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value); void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 5a24b846a1cb..8b97a6cba8d1 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -30,8 +30,9 @@ \ role.word = __entry->role; \ \ - trace_seq_printf(p, "sp gen %lx gfn %llx %u%s q%u%s %s%s" \ - " %snxe root %u %s%c", __entry->mmu_valid_gen, \ + trace_seq_printf(p, "sp gen %lx gfn %llx l%u%s q%u%s %s%s" \ + " %snxe %sad root %u %s%c", \ + __entry->mmu_valid_gen, \ __entry->gfn, role.level, \ role.cr4_pae ? " pae" : "", \ role.quadrant, \ @@ -39,6 +40,7 @@ access_str[role.access], \ role.invalid ? " invalid" : "", \ role.nxe ? "" : "!", \ + role.ad_disabled ? "!" : "", \ __entry->root_count, \ __entry->unsync ? "unsync" : "sync", 0); \ saved_ptr; \ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 33460fcdeef9..905ea6052517 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -190,6 +190,7 @@ struct vcpu_svm { struct nested_state nested; bool nmi_singlestep; + u64 nmi_singlestep_guest_rflags; unsigned int3_injected; unsigned long int3_rip; @@ -964,6 +965,18 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } +static void disable_nmi_singlestep(struct vcpu_svm *svm) +{ + svm->nmi_singlestep = false; + if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { + /* Clear our flags if they were not set by the guest */ + if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) + svm->vmcb->save.rflags &= ~X86_EFLAGS_TF; + if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF)) + svm->vmcb->save.rflags &= ~X86_EFLAGS_RF; + } +} + /* Note: * This hash table is used to map VM_ID to a struct kvm_arch, * when handling AMD IOMMU GALOG notification to schedule in @@ -1713,11 +1726,24 @@ static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu) static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) { - return to_svm(vcpu)->vmcb->save.rflags; + struct vcpu_svm *svm = to_svm(vcpu); + unsigned long rflags = svm->vmcb->save.rflags; + + if (svm->nmi_singlestep) { + /* Hide our flags if they were not set by the guest */ + if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) + rflags &= ~X86_EFLAGS_TF; + if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF)) + rflags &= ~X86_EFLAGS_RF; + } + return rflags; } static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { + if (to_svm(vcpu)->nmi_singlestep) + rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); + /* * Any change of EFLAGS.VM is accompanied by a reload of SS * (caused by either a task switch or an inter-privilege IRET), @@ -2112,10 +2138,7 @@ static int db_interception(struct vcpu_svm *svm) } if (svm->nmi_singlestep) { - svm->nmi_singlestep = false; - if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) - svm->vmcb->save.rflags &= - ~(X86_EFLAGS_TF | X86_EFLAGS_RF); + disable_nmi_singlestep(svm); } if (svm->vcpu.guest_debug & @@ -2370,8 +2393,8 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) static int nested_svm_check_permissions(struct vcpu_svm *svm) { - if (!(svm->vcpu.arch.efer & EFER_SVME) - || !is_paging(&svm->vcpu)) { + if (!(svm->vcpu.arch.efer & EFER_SVME) || + !is_paging(&svm->vcpu)) { kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; } @@ -2381,7 +2404,7 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) return 1; } - return 0; + return 0; } static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, @@ -2534,6 +2557,31 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; } +/* DB exceptions for our internal use must not cause vmexit */ +static int nested_svm_intercept_db(struct vcpu_svm *svm) +{ + unsigned long dr6; + + /* if we're not singlestepping, it's not ours */ + if (!svm->nmi_singlestep) + return NESTED_EXIT_DONE; + + /* if it's not a singlestep exception, it's not ours */ + if (kvm_get_dr(&svm->vcpu, 6, &dr6)) + return NESTED_EXIT_DONE; + if (!(dr6 & DR6_BS)) + return NESTED_EXIT_DONE; + + /* if the guest is singlestepping, it should get the vmexit */ + if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) { + disable_nmi_singlestep(svm); + return NESTED_EXIT_DONE; + } + + /* it's ours, the nested hypervisor must not see this one */ + return NESTED_EXIT_HOST; +} + static int nested_svm_exit_special(struct vcpu_svm *svm) { u32 exit_code = svm->vmcb->control.exit_code; @@ -2589,8 +2637,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm) } case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); - if (svm->nested.intercept_exceptions & excp_bits) - vmexit = NESTED_EXIT_DONE; + if (svm->nested.intercept_exceptions & excp_bits) { + if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR) + vmexit = nested_svm_intercept_db(svm); + else + vmexit = NESTED_EXIT_DONE; + } /* async page fault always cause vmexit */ else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && svm->apf_reason != 0) @@ -4627,10 +4679,17 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) == HF_NMI_MASK) return; /* IRET will cause a vm exit */ + if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0) + return; /* STGI will cause a vm exit */ + + if (svm->nested.exit_required) + return; /* we're not going to run the guest yet */ + /* * Something prevents NMI from been injected. Single step over possible * problem (IRET or exception injection or interrupt shadow) */ + svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu); svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); } @@ -4771,6 +4830,22 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(svm->nested.exit_required)) return; + /* + * Disable singlestep if we're injecting an interrupt/exception. + * We don't want our modified rflags to be pushed on the stack where + * we might not be able to easily reset them if we disabled NMI + * singlestep later. + */ + if (svm->nmi_singlestep && svm->vmcb->control.event_inj) { + /* + * Event injection happens before external interrupts cause a + * vmexit and interrupts are disabled here, so smp_send_reschedule + * is enough to force an immediate vmexit. + */ + disable_nmi_singlestep(svm); + smp_send_reschedule(vcpu->cpu); + } + pre_svm_run(svm); sync_lapic_to_cr8(vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6dcc4873e435..f76efad248ab 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -913,8 +913,9 @@ static void nested_release_page_clean(struct page *page) kvm_release_page_clean(page); } +static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu); static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); -static u64 construct_eptp(unsigned long root_hpa); +static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); static bool vmx_xsaves_supported(void); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); static void vmx_set_segment(struct kvm_vcpu *vcpu, @@ -2772,7 +2773,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) if (enable_ept_ad_bits) { vmx->nested.nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_PML; - vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; + vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; } } else vmx->nested.nested_vmx_ept_caps = 0; @@ -3198,7 +3199,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); break; case MSR_IA32_BNDCFGS: - if (!kvm_mpx_supported()) + if (!kvm_mpx_supported() || + (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) return 1; msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; @@ -3280,7 +3282,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_BNDCFGS: - if (!kvm_mpx_supported()) + if (!kvm_mpx_supported() || + (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) + return 1; + if (is_noncanonical_address(data & PAGE_MASK) || + (data & MSR_IA32_BNDCFGS_RSVD)) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; @@ -4013,7 +4019,7 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) if (enable_ept) { if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; - ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); + ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa)); } else { vpid_sync_context(vpid); } @@ -4188,14 +4194,15 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmx->emulation_required = emulation_required(vcpu); } -static u64 construct_eptp(unsigned long root_hpa) +static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) { u64 eptp; /* TODO write the value reading from MSR */ eptp = VMX_EPT_DEFAULT_MT | VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; - if (enable_ept_ad_bits) + if (enable_ept_ad_bits && + (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) eptp |= VMX_EPT_AD_ENABLE_BIT; eptp |= (root_hpa & PAGE_MASK); @@ -4209,7 +4216,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) guest_cr3 = cr3; if (enable_ept) { - eptp = construct_eptp(cr3); + eptp = construct_eptp(vcpu, cr3); vmcs_write64(EPT_POINTER, eptp); if (is_paging(vcpu) || is_guest_mode(vcpu)) guest_cr3 = kvm_read_cr3(vcpu); @@ -5170,7 +5177,8 @@ static void ept_set_mmio_spte_mask(void) * EPT Misconfigurations can be generated if the value of bits 2:0 * of an EPT paging-structure entry is 110b (write/execute). */ - kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE); + kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, + VMX_EPT_MISCONFIG_WX_VALUE); } #define VMX_XSS_EXIT_BITMAP 0 @@ -6220,17 +6228,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - if (is_guest_mode(vcpu) - && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) { - /* - * Fix up exit_qualification according to whether guest - * page table accesses are reads or writes. - */ - u64 eptp = nested_ept_get_cr3(vcpu); - if (!(eptp & VMX_EPT_AD_ENABLE_BIT)) - exit_qualification &= ~EPT_VIOLATION_ACC_WRITE; - } - /* * EPT violation happened while executing iret from NMI, * "blocked by NMI" bit has to be set before next VM entry. @@ -6453,7 +6450,7 @@ void vmx_enable_tdp(void) enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK, cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, - enable_ept_ad_bits ? 0ull : VMX_EPT_RWX_MASK); + VMX_EPT_RWX_MASK); ept_set_mmio_spte_mask(); kvm_enable_tdp(); @@ -6557,7 +6554,6 @@ static __init int hardware_setup(void) vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); memcpy(vmx_msr_bitmap_legacy_x2apic_apicv, vmx_msr_bitmap_legacy, PAGE_SIZE); @@ -7661,7 +7657,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) unsigned long type, types; gva_t gva; struct x86_exception e; - int vpid; + struct { + u64 vpid; + u64 gla; + } operand; if (!(vmx->nested.nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_VPID) || @@ -7691,17 +7690,28 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), vmx_instruction_info, false, &gva)) return 1; - if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid, - sizeof(u32), &e)) { + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, + sizeof(operand), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } + if (operand.vpid >> 16) { + nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + return kvm_skip_emulated_instruction(vcpu); + } switch (type) { case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: + if (is_noncanonical_address(operand.gla)) { + nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + return kvm_skip_emulated_instruction(vcpu); + } + /* fall through */ case VMX_VPID_EXTENT_SINGLE_CONTEXT: case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL: - if (!vpid) { + if (!operand.vpid) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); return kvm_skip_emulated_instruction(vcpu); @@ -9394,6 +9404,11 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, vmcs12->guest_physical_address = fault->address; } +static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) +{ + return nested_ept_get_cr3(vcpu) & VMX_EPT_AD_ENABLE_BIT; +} + /* Callbacks for nested_ept_init_mmu_context: */ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) @@ -9404,18 +9419,18 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) { - u64 eptp; + bool wants_ad; WARN_ON(mmu_is_nested(vcpu)); - eptp = nested_ept_get_cr3(vcpu); - if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits) + wants_ad = nested_ept_ad_enabled(vcpu); + if (wants_ad && !enable_ept_ad_bits) return 1; kvm_mmu_unload(vcpu); kvm_init_shadow_ept_mmu(vcpu, to_vmx(vcpu)->nested.nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT, - eptp & VMX_EPT_AD_ENABLE_BIT); + wants_ad); vcpu->arch.mmu.set_cr3 = vmx_set_cr3; vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; @@ -10728,8 +10743,7 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); } - if (nested_cpu_has_ept(vmcs12)) - vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); + vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); if (nested_cpu_has_vid(vmcs12)) vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS); @@ -10754,8 +10768,6 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); if (kvm_mpx_supported()) vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); - if (nested_cpu_has_xsaves(vmcs12)) - vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); } /* @@ -11152,7 +11164,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) vmx->hv_deadline_tsc = tscl + delta_tsc; vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_VMX_PREEMPTION_TIMER); - return 0; + + return delta_tsc == 0; } static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0e846f0cb83b..6c7266f7766d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2841,10 +2841,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_vcpu_write_tsc_offset(vcpu, offset); vcpu->arch.tsc_catchup = 1; } - if (kvm_lapic_hv_timer_in_use(vcpu) && - kvm_x86_ops->set_hv_timer(vcpu, - kvm_get_lapic_target_expiration_tsc(vcpu))) - kvm_lapic_switch_to_sw_timer(vcpu); + + if (kvm_lapic_hv_timer_in_use(vcpu)) + kvm_lapic_restart_hv_timer(vcpu); + /* * On a host with synchronized TSC, there is no need to update * kvmclock on vcpu->cpu migration @@ -6011,7 +6011,7 @@ static void kvm_set_mmio_spte_mask(void) mask &= ~1ull; #endif - kvm_mmu_set_mmio_spte_mask(mask); + kvm_mmu_set_mmio_spte_mask(mask, mask); } #ifdef CONFIG_X86_64 @@ -6733,7 +6733,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = false; - if (vcpu->requests) { + if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) kvm_mmu_unload(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) @@ -6897,7 +6897,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->sync_pir_to_irr(vcpu); } - if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests + if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || need_resched() || signal_pending(current)) { vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index ec008e800b45..53d600217973 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -26,6 +26,7 @@ #include <linux/pci_ids.h> #include <linux/export.h> #include <linux/list.h> +#include <asm/iommu.h> #define STA2X11_SWIOTLB_SIZE (4*1024*1024) extern int swiotlb_late_init_with_default_size(size_t default_size); @@ -191,7 +192,7 @@ static const struct dma_map_ops sta2x11_dma_ops = { .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, .mapping_error = swiotlb_dma_mapping_error, - .dma_supported = NULL, /* FIXME: we should use this instead! */ + .dma_supported = x86_dma_supported, }; /* At setup time, we use our own ops if the device is a ConneXt one */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a5ffcbb20cc0..0e7ef69e8531 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -106,15 +106,83 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), return rc >= 0 ? 0 : rc; } -static void clamp_max_cpus(void) +static int xen_vcpu_setup_restore(int cpu) { -#ifdef CONFIG_SMP - if (setup_max_cpus > MAX_VIRT_CPUS) - setup_max_cpus = MAX_VIRT_CPUS; -#endif + int rc = 0; + + /* Any per_cpu(xen_vcpu) is stale, so reset it */ + xen_vcpu_info_reset(cpu); + + /* + * For PVH and PVHVM, setup online VCPUs only. The rest will + * be handled by hotplug. + */ + if (xen_pv_domain() || + (xen_hvm_domain() && cpu_online(cpu))) { + rc = xen_vcpu_setup(cpu); + } + + return rc; +} + +/* + * On restore, set the vcpu placement up again. + * If it fails, then we're in a bad state, since + * we can't back out from using it... + */ +void xen_vcpu_restore(void) +{ + int cpu, rc; + + for_each_possible_cpu(cpu) { + bool other_cpu = (cpu != smp_processor_id()); + bool is_up; + + if (xen_vcpu_nr(cpu) == XEN_VCPU_ID_INVALID) + continue; + + /* Only Xen 4.5 and higher support this. */ + is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, + xen_vcpu_nr(cpu), NULL) > 0; + + if (other_cpu && is_up && + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) + BUG(); + + if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock)) + xen_setup_runstate_info(cpu); + + rc = xen_vcpu_setup_restore(cpu); + if (rc) + pr_emerg_once("vcpu restore failed for cpu=%d err=%d. " + "System will hang.\n", cpu, rc); + /* + * In case xen_vcpu_setup_restore() fails, do not bring up the + * VCPU. This helps us avoid the resulting OOPS when the VCPU + * accesses pvclock_vcpu_time via xen_vcpu (which is NULL.) + * Note that this does not improve the situation much -- now the + * VM hangs instead of OOPSing -- with the VCPUs that did not + * fail, spinning in stop_machine(), waiting for the failed + * VCPUs to come up. + */ + if (other_cpu && is_up && (rc == 0) && + HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) + BUG(); + } } -void xen_vcpu_setup(int cpu) +void xen_vcpu_info_reset(int cpu) +{ + if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) { + per_cpu(xen_vcpu, cpu) = + &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; + } else { + /* Set to NULL so that if somebody accesses it we get an OOPS */ + per_cpu(xen_vcpu, cpu) = NULL; + } +} + +int xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; int err; @@ -123,11 +191,11 @@ void xen_vcpu_setup(int cpu) BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); /* - * This path is called twice on PVHVM - first during bootup via - * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being - * hotplugged: cpu_up -> xen_hvm_cpu_notify. - * As we can only do the VCPUOP_register_vcpu_info once lets - * not over-write its result. + * This path is called on PVHVM at bootup (xen_hvm_smp_prepare_boot_cpu) + * and at restore (xen_vcpu_restore). Also called for hotplugged + * VCPUs (cpu_init -> xen_hvm_cpu_prepare_hvm). + * However, the hypercall can only be done once (see below) so if a VCPU + * is offlined and comes back online then let's not redo the hypercall. * * For PV it is called during restore (xen_vcpu_restore) and bootup * (xen_setup_vcpu_info_placement). The hotplug mechanism does not @@ -135,42 +203,44 @@ void xen_vcpu_setup(int cpu) */ if (xen_hvm_domain()) { if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) - return; + return 0; } - if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) - per_cpu(xen_vcpu, cpu) = - &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; - if (!xen_have_vcpu_info_placement) { - if (cpu >= MAX_VIRT_CPUS) - clamp_max_cpus(); - return; + if (xen_have_vcpu_info_placement) { + vcpup = &per_cpu(xen_vcpu_info, cpu); + info.mfn = arbitrary_virt_to_mfn(vcpup); + info.offset = offset_in_page(vcpup); + + /* + * Check to see if the hypervisor will put the vcpu_info + * structure where we want it, which allows direct access via + * a percpu-variable. + * N.B. This hypercall can _only_ be called once per CPU. + * Subsequent calls will error out with -EINVAL. This is due to + * the fact that hypervisor has no unregister variant and this + * hypercall does not allow to over-write info.mfn and + * info.offset. + */ + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, + xen_vcpu_nr(cpu), &info); + + if (err) { + pr_warn_once("register_vcpu_info failed: cpu=%d err=%d\n", + cpu, err); + xen_have_vcpu_info_placement = 0; + } else { + /* + * This cpu is using the registered vcpu info, even if + * later ones fail to. + */ + per_cpu(xen_vcpu, cpu) = vcpup; + } } - vcpup = &per_cpu(xen_vcpu_info, cpu); - info.mfn = arbitrary_virt_to_mfn(vcpup); - info.offset = offset_in_page(vcpup); - - /* Check to see if the hypervisor will put the vcpu_info - structure where we want it, which allows direct access via - a percpu-variable. - N.B. This hypercall can _only_ be called once per CPU. Subsequent - calls will error out with -EINVAL. This is due to the fact that - hypervisor has no unregister variant and this hypercall does not - allow to over-write info.mfn and info.offset. - */ - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), - &info); + if (!xen_have_vcpu_info_placement) + xen_vcpu_info_reset(cpu); - if (err) { - printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); - xen_have_vcpu_info_placement = 0; - clamp_max_cpus(); - } else { - /* This cpu is using the registered vcpu info, even if - later ones fail to. */ - per_cpu(xen_vcpu, cpu) = vcpup; - } + return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0); } void xen_reboot(int reason) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index a6d014f47e52..87d791356ea9 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -1,5 +1,6 @@ #include <linux/cpu.h> #include <linux/kexec.h> +#include <linux/memblock.h> #include <xen/features.h> #include <xen/events.h> @@ -10,9 +11,11 @@ #include <asm/reboot.h> #include <asm/setup.h> #include <asm/hypervisor.h> +#include <asm/e820/api.h> #include <asm/xen/cpuid.h> #include <asm/xen/hypervisor.h> +#include <asm/xen/page.h> #include "xen-ops.h" #include "mmu.h" @@ -20,37 +23,34 @@ void __ref xen_hvm_init_shared_info(void) { - int cpu; struct xen_add_to_physmap xatp; - static struct shared_info *shared_info_page; + u64 pa; + + if (HYPERVISOR_shared_info == &xen_dummy_shared_info) { + /* + * Search for a free page starting at 4kB physical address. + * Low memory is preferred to avoid an EPT large page split up + * by the mapping. + * Starting below X86_RESERVE_LOW (usually 64kB) is fine as + * the BIOS used for HVM guests is well behaved and won't + * clobber memory other than the first 4kB. + */ + for (pa = PAGE_SIZE; + !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) || + memblock_is_reserved(pa); + pa += PAGE_SIZE) + ; + + memblock_reserve(pa, PAGE_SIZE); + HYPERVISOR_shared_info = __va(pa); + } - if (!shared_info_page) - shared_info_page = (struct shared_info *) - extend_brk(PAGE_SIZE, PAGE_SIZE); xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; + xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info); if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); - - HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; - - /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info - * page, we use it in the event channel upcall and in some pvclock - * related functions. We don't need the vcpu_info placement - * optimizations because we don't use any pv_mmu or pv_irq op on - * HVM. - * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is - * online but xen_hvm_init_shared_info is run at resume time too and - * in that case multiple vcpus might be online. */ - for_each_online_cpu(cpu) { - /* Leave it to be NULL. */ - if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS) - continue; - per_cpu(xen_vcpu, cpu) = - &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; - } } static void __init init_hvm_pv_info(void) @@ -106,7 +106,7 @@ static void xen_hvm_crash_shutdown(struct pt_regs *regs) static int xen_cpu_up_prepare_hvm(unsigned int cpu) { - int rc; + int rc = 0; /* * This can happen if CPU was offlined earlier and @@ -121,7 +121,9 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); else per_cpu(xen_vcpu_id, cpu) = cpu; - xen_vcpu_setup(cpu); + rc = xen_vcpu_setup(cpu); + if (rc) + return rc; if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); @@ -130,9 +132,8 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) if (rc) { WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n", cpu, rc); - return rc; } - return 0; + return rc; } static int xen_cpu_dead_hvm(unsigned int cpu) @@ -154,6 +155,13 @@ static void __init xen_hvm_guest_init(void) xen_hvm_init_shared_info(); + /* + * xen_vcpu is a pointer to the vcpu_info struct in the shared_info + * page, we use it in the event channel upcall and in some pvclock + * related functions. + */ + xen_vcpu_info_reset(0); + xen_panic_handler_init(); if (xen_feature(XENFEAT_hvm_callback_vector)) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index f33eef4ebd12..811e4ddb3f37 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -89,8 +89,6 @@ void *xen_initial_gdt; -RESERVE_BRK(shared_info_page_brk, PAGE_SIZE); - static int xen_cpu_up_prepare_pv(unsigned int cpu); static int xen_cpu_dead_pv(unsigned int cpu); @@ -107,35 +105,6 @@ struct tls_descs { */ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); -/* - * On restore, set the vcpu placement up again. - * If it fails, then we're in a bad state, since - * we can't back out from using it... - */ -void xen_vcpu_restore(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - bool other_cpu = (cpu != smp_processor_id()); - bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), - NULL); - - if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) - BUG(); - - xen_setup_runstate_info(cpu); - - if (xen_have_vcpu_info_placement) - xen_vcpu_setup(cpu); - - if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) - BUG(); - } -} - static void __init xen_banner(void) { unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); @@ -960,30 +929,43 @@ void xen_setup_shared_info(void) HYPERVISOR_shared_info = (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); -#ifndef CONFIG_SMP - /* In UP this is as good a place as any to set up shared info */ - xen_setup_vcpu_info_placement(); -#endif - xen_setup_mfn_list_list(); - /* - * Now that shared info is set up we can start using routines that - * point to pvclock area. - */ - if (system_state == SYSTEM_BOOTING) + if (system_state == SYSTEM_BOOTING) { +#ifndef CONFIG_SMP + /* + * In UP this is as good a place as any to set up shared info. + * Limit this to boot only, at restore vcpu setup is done via + * xen_vcpu_restore(). + */ + xen_setup_vcpu_info_placement(); +#endif + /* + * Now that shared info is set up we can start using routines + * that point to pvclock area. + */ xen_init_time_ops(); + } } /* This is called once we have the cpu_possible_mask */ -void xen_setup_vcpu_info_placement(void) +void __ref xen_setup_vcpu_info_placement(void) { int cpu; for_each_possible_cpu(cpu) { /* Set up direct vCPU id mapping for PV guests. */ per_cpu(xen_vcpu_id, cpu) = cpu; - xen_vcpu_setup(cpu); + + /* + * xen_vcpu_setup(cpu) can fail -- in which case it + * falls back to the shared_info version for cpus + * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS. + * + * xen_cpu_up_prepare_pv() handles the rest by failing + * them in hotplug. + */ + (void) xen_vcpu_setup(cpu); } /* @@ -1332,9 +1314,17 @@ asmlinkage __visible void __init xen_start_kernel(void) */ acpi_numa = -1; #endif - /* Don't do the full vcpu_info placement stuff until we have a - possible map and a non-dummy shared_info. */ - per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; + /* Let's presume PV guests always boot on vCPU with id 0. */ + per_cpu(xen_vcpu_id, 0) = 0; + + /* + * Setup xen_vcpu early because start_kernel needs it for + * local_irq_disable(), irqs_disabled(). + * + * Don't do the full vcpu_info placement stuff until we have + * the cpu_possible_mask and a non-dummy shared_info. + */ + xen_vcpu_info_reset(0); WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); @@ -1431,9 +1421,7 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif xen_raw_console_write("about to get started...\n"); - /* Let's presume PV guests always boot on vCPU with id 0. */ - per_cpu(xen_vcpu_id, 0) = 0; - + /* We need this for printk timestamps */ xen_setup_runstate_info(0); xen_efi_init(); @@ -1451,6 +1439,9 @@ static int xen_cpu_up_prepare_pv(unsigned int cpu) { int rc; + if (per_cpu(xen_vcpu, cpu) == NULL) + return -ENODEV; + xen_setup_timer(cpu); rc = xen_smp_intr_init(cpu); diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 42b08f8fc2ca..37c6056a7bba 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -18,20 +18,6 @@ int xen_swiotlb __read_mostly; -static const struct dma_map_ops xen_swiotlb_dma_ops = { - .alloc = xen_swiotlb_alloc_coherent, - .free = xen_swiotlb_free_coherent, - .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, - .sync_single_for_device = xen_swiotlb_sync_single_for_device, - .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = xen_swiotlb_sync_sg_for_device, - .map_sg = xen_swiotlb_map_sg_attrs, - .unmap_sg = xen_swiotlb_unmap_sg_attrs, - .map_page = xen_swiotlb_map_page, - .unmap_page = xen_swiotlb_unmap_page, - .dma_supported = xen_swiotlb_dma_supported, -}; - /* * pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary * diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a5bf7c451435..c81046323ebc 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -499,7 +499,7 @@ static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages, void __init xen_remap_memory(void) { unsigned long buf = (unsigned long)&xen_remap_buf; - unsigned long mfn_save, mfn, pfn; + unsigned long mfn_save, pfn; unsigned long remapped = 0; unsigned int i; unsigned long pfn_s = ~0UL; @@ -515,8 +515,7 @@ void __init xen_remap_memory(void) pfn = xen_remap_buf.target_pfn; for (i = 0; i < xen_remap_buf.size; i++) { - mfn = xen_remap_buf.mfns[i]; - xen_update_mem_tables(pfn, mfn); + xen_update_mem_tables(pfn, xen_remap_buf.mfns[i]); remapped++; pfn++; } @@ -530,8 +529,6 @@ void __init xen_remap_memory(void) pfn_s = xen_remap_buf.target_pfn; len = xen_remap_buf.size; } - - mfn = xen_remap_mfn; xen_remap_mfn = xen_remap_buf.next_area_mfn; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 82ac611f2fc1..e7f02eb73727 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -1,4 +1,5 @@ #include <linux/smp.h> +#include <linux/cpu.h> #include <linux/slab.h> #include <linux/cpumask.h> #include <linux/percpu.h> @@ -114,6 +115,36 @@ int xen_smp_intr_init(unsigned int cpu) return rc; } +void __init xen_smp_cpus_done(unsigned int max_cpus) +{ + int cpu, rc, count = 0; + + if (xen_hvm_domain()) + native_smp_cpus_done(max_cpus); + + if (xen_have_vcpu_info_placement) + return; + + for_each_online_cpu(cpu) { + if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) + continue; + + rc = cpu_down(cpu); + + if (rc == 0) { + /* + * Reset vcpu_info so this cpu cannot be onlined again. + */ + xen_vcpu_info_reset(cpu); + count++; + } else { + pr_warn("%s: failed to bring CPU %d down, error %d\n", + __func__, cpu, rc); + } + } + WARN(count, "%s: brought %d CPUs offline\n", __func__, count); +} + void xen_smp_send_reschedule(int cpu) { xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index 8ebb6acca64a..87d3c76cba37 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -14,6 +14,8 @@ extern void xen_smp_intr_free(unsigned int cpu); int xen_smp_intr_init_pv(unsigned int cpu); void xen_smp_intr_free_pv(unsigned int cpu); +void xen_smp_cpus_done(unsigned int max_cpus); + void xen_smp_send_reschedule(int cpu); void xen_smp_send_call_function_ipi(const struct cpumask *mask); void xen_smp_send_call_function_single_ipi(int cpu); diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index f18561bbf5c9..fd60abedf658 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -12,7 +12,8 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) native_smp_prepare_boot_cpu(); /* - * Setup vcpu_info for boot CPU. + * Setup vcpu_info for boot CPU. Secondary CPUs get their vcpu_info + * in xen_cpu_up_prepare_hvm(). */ xen_vcpu_setup(0); @@ -27,10 +28,20 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void) static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) { + int cpu; + native_smp_prepare_cpus(max_cpus); WARN_ON(xen_smp_intr_init(0)); xen_init_lock_cpu(0); + + for_each_possible_cpu(cpu) { + if (cpu == 0) + continue; + + /* Set default vcpu_id to make sure that we don't use cpu-0's */ + per_cpu(xen_vcpu_id, cpu) = XEN_VCPU_ID_INVALID; + } } #ifdef CONFIG_HOTPLUG_CPU @@ -60,4 +71,5 @@ void __init xen_hvm_smp_init(void) smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; + smp_ops.smp_cpus_done = xen_smp_cpus_done; } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index aae32535f4ec..1ea598e5f030 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -371,10 +371,6 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) return 0; } -static void xen_pv_smp_cpus_done(unsigned int max_cpus) -{ -} - #ifdef CONFIG_HOTPLUG_CPU static int xen_pv_cpu_disable(void) { @@ -469,7 +465,7 @@ static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) static const struct smp_ops xen_smp_ops __initconst = { .smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_pv_smp_prepare_cpus, - .smp_cpus_done = xen_pv_smp_cpus_done, + .smp_cpus_done = xen_smp_cpus_done, .cpu_up = xen_pv_cpu_up, .cpu_die = xen_pv_cpu_die, diff --git a/arch/x86/xen/suspend_hvm.c b/arch/x86/xen/suspend_hvm.c index 01afcadde50a..484999416d8b 100644 --- a/arch/x86/xen/suspend_hvm.c +++ b/arch/x86/xen/suspend_hvm.c @@ -8,15 +8,10 @@ void xen_hvm_post_suspend(int suspend_cancelled) { - int cpu; - - if (!suspend_cancelled) + if (!suspend_cancelled) { xen_hvm_init_shared_info(); + xen_vcpu_restore(); + } xen_callback_vector(); xen_unplug_emulated_devices(); - if (xen_feature(XENFEAT_hvm_safe_pvclock)) { - for_each_online_cpu(cpu) { - xen_setup_runstate_info(cpu); - } - } } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9a440a42c618..0d5004477db6 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -78,7 +78,8 @@ bool xen_vcpu_stolen(int vcpu); extern int xen_have_vcpu_info_placement; -void xen_vcpu_setup(int cpu); +int xen_vcpu_setup(int cpu); +void xen_vcpu_info_reset(int cpu); void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index f4126cf997a4..7ad6d77b2f22 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -3,6 +3,7 @@ config ZONE_DMA config XTENSA def_bool y + select ARCH_NO_COHERENT_DMA_MMAP if !MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h index c6140fa8c0be..269738dc9d1d 100644 --- a/arch/xtensa/include/asm/dma-mapping.h +++ b/arch/xtensa/include/asm/dma-mapping.h @@ -16,8 +16,6 @@ #include <linux/mm.h> #include <linux/scatterlist.h> -#define DMA_ERROR_CODE (~(dma_addr_t)0x0) - extern const struct dma_map_ops xtensa_dma_map_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index 2e7bac0d4b2c..b8f152b6aaa5 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -278,19 +278,15 @@ clear_user(void *addr, unsigned long size) extern long __strncpy_user(char *, const char *, long); -#define __strncpy_from_user __strncpy_user static inline long strncpy_from_user(char *dst, const char *src, long count) { if (access_ok(VERIFY_READ, src, 1)) - return __strncpy_from_user(dst, src, count); + return __strncpy_user(dst, src, count); return -EFAULT; } - -#define strlen_user(str) strnlen_user((str), TASK_SIZE - 1) - /* * Return the size of a string (including the ending 0!) */ |