From d2db7773ba864df6b4e19643dfc54838550d8049 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:37 +0100 Subject: kvm: arm/arm64: Fix stage2_flush_memslot for 4 level page table So far we have only supported 3 level page table with fixed IPA of 40bits, where PUD is folded. With 4 level page tables, we need to check if the PUD entry is valid or not. Fix stage2_flush_memslot() to do this check, before walking down the table. Acked-by: Christoffer Dall Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ed162a6c57c5..ee7ce8fa4a12 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -412,7 +412,8 @@ static void stage2_flush_memslot(struct kvm *kvm, pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { next = stage2_pgd_addr_end(addr, end); - stage2_flush_puds(kvm, pgd, addr, next); + if (!stage2_pgd_none(*pgd)) + stage2_flush_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } -- cgit v1.2.3 From 7788a28062aca211979ecd2c334e06e2ef5281cc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:38 +0100 Subject: kvm: arm/arm64: Remove spurious WARN_ON On a 4-level page table pgd entry can be empty, unlike a 3-level page table. Remove the spurious WARN_ON() in stage_get_pud(). Acked-by: Christoffer Dall Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ee7ce8fa4a12..4a285d760ce0 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1005,7 +1005,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache pud_t *pud; pgd = kvm->arch.pgd + stage2_pgd_index(addr); - if (WARN_ON(stage2_pgd_none(*pgd))) { + if (stage2_pgd_none(*pgd)) { if (!cache) return NULL; pud = mmu_memory_cache_alloc(cache); -- cgit v1.2.3 From 5b6c6742b5350a6fb5c631fb99a6bc046a62739c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:42 +0100 Subject: kvm: arm/arm64: Allow arch specific configurations for VM Allow the arch backends to perform VM specific initialisation. This will be later used to handle IPA size configuration and per-VM VTCR configuration on arm64. Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 7 +++++++ arch/arm64/include/asm/kvm_host.h | 2 ++ arch/arm64/kvm/reset.c | 7 +++++++ virt/kvm/arm/arm.c | 5 +++-- 4 files changed, 19 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 3ad482d2f1eb..72d46418e1ef 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -354,4 +354,11 @@ static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {} struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); +static inline int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) +{ + if (type) + return -EINVAL; + return 0; +} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3d6d7336f871..b04280ae1be0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -513,4 +513,6 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); +int kvm_arm_config_vm(struct kvm *kvm, unsigned long type); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index e37c78bbe1ca..b0c07dab5cb3 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -133,3 +133,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset timer */ return kvm_timer_vcpu_reset(vcpu); } + +int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) +{ + if (type) + return -EINVAL; + return 0; +} diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index c92053bc3f96..327d0fd28380 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -120,8 +120,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret, cpu; - if (type) - return -EINVAL; + ret = kvm_arm_config_vm(kvm, type); + if (ret) + return ret; kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); if (!kvm->arch.last_vcpu_ran) -- cgit v1.2.3 From e55cac5bf2a9cc86b57a9533d6b9e5005bc19b5c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:44 +0100 Subject: kvm: arm/arm64: Prepare for VM specific stage2 translations Right now the stage2 page table for a VM is hard coded, assuming an IPA of 40bits. As we are about to add support for per VM IPA, prepare the stage2 page table helpers to accept the kvm instance to make the right decision for the VM. No functional changes. Adds stage2_pgd_size(kvm) to replace S2_PGD_SIZE. Also, moves some of the definitions in arm32 to align with the arm64. Also drop the _AC() specifier constants wherever possible. Cc: Christoffer Dall Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_arm.h | 3 +- arch/arm/include/asm/kvm_mmu.h | 13 +-- arch/arm/include/asm/stage2_pgtable.h | 54 +++++++----- arch/arm64/include/asm/kvm_mmu.h | 7 +- arch/arm64/include/asm/stage2_pgtable-nopmd.h | 18 ++-- arch/arm64/include/asm/stage2_pgtable-nopud.h | 16 ++-- arch/arm64/include/asm/stage2_pgtable.h | 58 +++++++------ virt/kvm/arm/arm.c | 2 +- virt/kvm/arm/mmu.c | 119 +++++++++++++------------- virt/kvm/arm/vgic/vgic-kvm-device.c | 2 +- 10 files changed, 158 insertions(+), 134 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 3ab8b3781bfe..c3f1f9b304b7 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -133,8 +133,7 @@ * space. */ #define KVM_PHYS_SHIFT (40) -#define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT) -#define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL)) + #define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30)) /* Virtualization Translation Control Register (VTCR) bits */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 265ea9cf7df7..12ae5fbbcf01 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -35,16 +35,12 @@ addr; \ }) -/* - * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels. - */ -#define KVM_MMU_CACHE_MIN_PAGES 2 - #ifndef __ASSEMBLY__ #include #include #include +#include #include #include #include @@ -52,6 +48,13 @@ /* Ensure compatibility with arm64 */ #define VA_BITS 32 +#define kvm_phys_shift(kvm) KVM_PHYS_SHIFT +#define kvm_phys_size(kvm) (1ULL << kvm_phys_shift(kvm)) +#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - 1ULL) +#define kvm_vttbr_baddr_mask(kvm) VTTBR_BADDR_MASK + +#define stage2_pgd_size(kvm) (PTRS_PER_S2_PGD * sizeof(pgd_t)) + int create_hyp_mappings(void *from, void *to, pgprot_t prot); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h index 460d616bb2d6..f6a7ea805232 100644 --- a/arch/arm/include/asm/stage2_pgtable.h +++ b/arch/arm/include/asm/stage2_pgtable.h @@ -19,43 +19,53 @@ #ifndef __ARM_S2_PGTABLE_H_ #define __ARM_S2_PGTABLE_H_ -#define stage2_pgd_none(pgd) pgd_none(pgd) -#define stage2_pgd_clear(pgd) pgd_clear(pgd) -#define stage2_pgd_present(pgd) pgd_present(pgd) -#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) -#define stage2_pud_offset(pgd, address) pud_offset(pgd, address) -#define stage2_pud_free(pud) pud_free(NULL, pud) - -#define stage2_pud_none(pud) pud_none(pud) -#define stage2_pud_clear(pud) pud_clear(pud) -#define stage2_pud_present(pud) pud_present(pud) -#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) -#define stage2_pmd_offset(pud, address) pmd_offset(pud, address) -#define stage2_pmd_free(pmd) pmd_free(NULL, pmd) - -#define stage2_pud_huge(pud) pud_huge(pud) +/* + * kvm_mmu_cache_min_pages() is the number of pages required + * to install a stage-2 translation. We pre-allocate the entry + * level table at VM creation. Since we have a 3 level page-table, + * we need only two pages to add a new mapping. + */ +#define kvm_mmu_cache_min_pages(kvm) 2 + +#define stage2_pgd_none(kvm, pgd) pgd_none(pgd) +#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd) +#define stage2_pgd_present(kvm, pgd) pgd_present(pgd) +#define stage2_pgd_populate(kvm, pgd, pud) pgd_populate(NULL, pgd, pud) +#define stage2_pud_offset(kvm, pgd, address) pud_offset(pgd, address) +#define stage2_pud_free(kvm, pud) pud_free(NULL, pud) + +#define stage2_pud_none(kvm, pud) pud_none(pud) +#define stage2_pud_clear(kvm, pud) pud_clear(pud) +#define stage2_pud_present(kvm, pud) pud_present(pud) +#define stage2_pud_populate(kvm, pud, pmd) pud_populate(NULL, pud, pmd) +#define stage2_pmd_offset(kvm, pud, address) pmd_offset(pud, address) +#define stage2_pmd_free(kvm, pmd) pmd_free(NULL, pmd) + +#define stage2_pud_huge(kvm, pud) pud_huge(pud) /* Open coded p*d_addr_end that can deal with 64bit addresses */ -static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) +static inline phys_addr_t +stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) { phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK; return (boundary - 1 < end - 1) ? boundary : end; } -#define stage2_pud_addr_end(addr, end) (end) +#define stage2_pud_addr_end(kvm, addr, end) (end) -static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) +static inline phys_addr_t +stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) { phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK; return (boundary - 1 < end - 1) ? boundary : end; } -#define stage2_pgd_index(addr) pgd_index(addr) +#define stage2_pgd_index(kvm, addr) pgd_index(addr) -#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) -#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) -#define stage2_pud_table_empty(pudp) false +#define stage2_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) +#define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) +#define stage2_pud_table_empty(kvm, pudp) false #endif /* __ARM_S2_PGTABLE_H_ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index d6fff7de5539..3a032066e52c 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -141,8 +141,11 @@ static inline unsigned long __kern_hyp_va(unsigned long v) * We currently only support a 40bit IPA. */ #define KVM_PHYS_SHIFT (40) -#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) -#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) + +#define kvm_phys_shift(kvm) KVM_PHYS_SHIFT +#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm)) +#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL)) +#define kvm_vttbr_baddr_mask(kvm) VTTBR_BADDR_MASK #include diff --git a/arch/arm64/include/asm/stage2_pgtable-nopmd.h b/arch/arm64/include/asm/stage2_pgtable-nopmd.h index 2656a0fd05a6..0280dedbf75f 100644 --- a/arch/arm64/include/asm/stage2_pgtable-nopmd.h +++ b/arch/arm64/include/asm/stage2_pgtable-nopmd.h @@ -26,17 +26,17 @@ #define S2_PMD_SIZE (1UL << S2_PMD_SHIFT) #define S2_PMD_MASK (~(S2_PMD_SIZE-1)) -#define stage2_pud_none(pud) (0) -#define stage2_pud_present(pud) (1) -#define stage2_pud_clear(pud) do { } while (0) -#define stage2_pud_populate(pud, pmd) do { } while (0) -#define stage2_pmd_offset(pud, address) ((pmd_t *)(pud)) +#define stage2_pud_none(kvm, pud) (0) +#define stage2_pud_present(kvm, pud) (1) +#define stage2_pud_clear(kvm, pud) do { } while (0) +#define stage2_pud_populate(kvm, pud, pmd) do { } while (0) +#define stage2_pmd_offset(kvm, pud, address) ((pmd_t *)(pud)) -#define stage2_pmd_free(pmd) do { } while (0) +#define stage2_pmd_free(kvm, pmd) do { } while (0) -#define stage2_pmd_addr_end(addr, end) (end) +#define stage2_pmd_addr_end(kvm, addr, end) (end) -#define stage2_pud_huge(pud) (0) -#define stage2_pmd_table_empty(pmdp) (0) +#define stage2_pud_huge(kvm, pud) (0) +#define stage2_pmd_table_empty(kvm, pmdp) (0) #endif diff --git a/arch/arm64/include/asm/stage2_pgtable-nopud.h b/arch/arm64/include/asm/stage2_pgtable-nopud.h index 5ee87b54ebf3..cd6304e203be 100644 --- a/arch/arm64/include/asm/stage2_pgtable-nopud.h +++ b/arch/arm64/include/asm/stage2_pgtable-nopud.h @@ -24,16 +24,16 @@ #define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT) #define S2_PUD_MASK (~(S2_PUD_SIZE-1)) -#define stage2_pgd_none(pgd) (0) -#define stage2_pgd_present(pgd) (1) -#define stage2_pgd_clear(pgd) do { } while (0) -#define stage2_pgd_populate(pgd, pud) do { } while (0) +#define stage2_pgd_none(kvm, pgd) (0) +#define stage2_pgd_present(kvm, pgd) (1) +#define stage2_pgd_clear(kvm, pgd) do { } while (0) +#define stage2_pgd_populate(kvm, pgd, pud) do { } while (0) -#define stage2_pud_offset(pgd, address) ((pud_t *)(pgd)) +#define stage2_pud_offset(kvm, pgd, address) ((pud_t *)(pgd)) -#define stage2_pud_free(x) do { } while (0) +#define stage2_pud_free(kvm, x) do { } while (0) -#define stage2_pud_addr_end(addr, end) (end) -#define stage2_pud_table_empty(pmdp) (0) +#define stage2_pud_addr_end(kvm, addr, end) (end) +#define stage2_pud_table_empty(kvm, pmdp) (0) #endif diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index 8b68099348e5..11891612be14 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -55,7 +55,7 @@ /* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */ #define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS) -#define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT) +#define S2_PGDIR_SIZE (1UL << S2_PGDIR_SHIFT) #define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1)) /* @@ -65,28 +65,30 @@ #define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT)) /* - * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation - * levels in addition to the PGD. + * kvm_mmmu_cache_min_pages() is the number of pages required to install + * a stage-2 translation. We pre-allocate the entry level page table at + * the VM creation. */ -#define KVM_MMU_CACHE_MIN_PAGES (STAGE2_PGTABLE_LEVELS - 1) +#define kvm_mmu_cache_min_pages(kvm) (STAGE2_PGTABLE_LEVELS - 1) #if STAGE2_PGTABLE_LEVELS > 3 #define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) -#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT) +#define S2_PUD_SIZE (1UL << S2_PUD_SHIFT) #define S2_PUD_MASK (~(S2_PUD_SIZE - 1)) -#define stage2_pgd_none(pgd) pgd_none(pgd) -#define stage2_pgd_clear(pgd) pgd_clear(pgd) -#define stage2_pgd_present(pgd) pgd_present(pgd) -#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) -#define stage2_pud_offset(pgd, address) pud_offset(pgd, address) -#define stage2_pud_free(pud) pud_free(NULL, pud) +#define stage2_pgd_none(kvm, pgd) pgd_none(pgd) +#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd) +#define stage2_pgd_present(kvm, pgd) pgd_present(pgd) +#define stage2_pgd_populate(kvm, pgd, pud) pgd_populate(NULL, pgd, pud) +#define stage2_pud_offset(kvm, pgd, address) pud_offset(pgd, address) +#define stage2_pud_free(kvm, pud) pud_free(NULL, pud) -#define stage2_pud_table_empty(pudp) kvm_page_empty(pudp) +#define stage2_pud_table_empty(kvm, pudp) kvm_page_empty(pudp) -static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end) +static inline phys_addr_t +stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) { phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK; @@ -99,20 +101,21 @@ static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end) #if STAGE2_PGTABLE_LEVELS > 2 #define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) -#define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT) +#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT) #define S2_PMD_MASK (~(S2_PMD_SIZE - 1)) -#define stage2_pud_none(pud) pud_none(pud) -#define stage2_pud_clear(pud) pud_clear(pud) -#define stage2_pud_present(pud) pud_present(pud) -#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) -#define stage2_pmd_offset(pud, address) pmd_offset(pud, address) -#define stage2_pmd_free(pmd) pmd_free(NULL, pmd) +#define stage2_pud_none(kvm, pud) pud_none(pud) +#define stage2_pud_clear(kvm, pud) pud_clear(pud) +#define stage2_pud_present(kvm, pud) pud_present(pud) +#define stage2_pud_populate(kvm, pud, pmd) pud_populate(NULL, pud, pmd) +#define stage2_pmd_offset(kvm, pud, address) pmd_offset(pud, address) +#define stage2_pmd_free(kvm, pmd) pmd_free(NULL, pmd) -#define stage2_pud_huge(pud) pud_huge(pud) -#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#define stage2_pud_huge(kvm, pud) pud_huge(pud) +#define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) -static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) +static inline phys_addr_t +stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) { phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK; @@ -121,7 +124,7 @@ static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) #endif /* STAGE2_PGTABLE_LEVELS > 2 */ -#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) +#define stage2_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) #if STAGE2_PGTABLE_LEVELS == 2 #include @@ -129,10 +132,13 @@ static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) #include #endif +#define stage2_pgd_size(kvm) (PTRS_PER_S2_PGD * sizeof(pgd_t)) -#define stage2_pgd_index(addr) (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) +#define stage2_pgd_index(kvm, addr) \ + (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) -static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) +static inline phys_addr_t +stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) { phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 327d0fd28380..43e716bc3f08 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -545,7 +545,7 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm->arch.pgd); - BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); + BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 4a285d760ce0..7e477b3cae5b 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -45,7 +45,6 @@ static phys_addr_t hyp_idmap_vector; static unsigned long io_map_base; -#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) @@ -150,20 +149,20 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) { - pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL); - stage2_pgd_clear(pgd); + pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, pgd, 0UL); + stage2_pgd_clear(kvm, pgd); kvm_tlb_flush_vmid_ipa(kvm, addr); - stage2_pud_free(pud_table); + stage2_pud_free(kvm, pud_table); put_page(virt_to_page(pgd)); } static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0); - VM_BUG_ON(stage2_pud_huge(*pud)); - stage2_pud_clear(pud); + pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0); + VM_BUG_ON(stage2_pud_huge(kvm, *pud)); + stage2_pud_clear(kvm, pud); kvm_tlb_flush_vmid_ipa(kvm, addr); - stage2_pmd_free(pmd_table); + stage2_pmd_free(kvm, pmd_table); put_page(virt_to_page(pud)); } @@ -252,7 +251,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, } } while (pte++, addr += PAGE_SIZE, addr != end); - if (stage2_pte_table_empty(start_pte)) + if (stage2_pte_table_empty(kvm, start_pte)) clear_stage2_pmd_entry(kvm, pmd, start_addr); } @@ -262,9 +261,9 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, phys_addr_t next, start_addr = addr; pmd_t *pmd, *start_pmd; - start_pmd = pmd = stage2_pmd_offset(pud, addr); + start_pmd = pmd = stage2_pmd_offset(kvm, pud, addr); do { - next = stage2_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(kvm, addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) { pmd_t old_pmd = *pmd; @@ -281,7 +280,7 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, } } while (pmd++, addr = next, addr != end); - if (stage2_pmd_table_empty(start_pmd)) + if (stage2_pmd_table_empty(kvm, start_pmd)) clear_stage2_pud_entry(kvm, pud, start_addr); } @@ -291,14 +290,14 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, phys_addr_t next, start_addr = addr; pud_t *pud, *start_pud; - start_pud = pud = stage2_pud_offset(pgd, addr); + start_pud = pud = stage2_pud_offset(kvm, pgd, addr); do { - next = stage2_pud_addr_end(addr, end); - if (!stage2_pud_none(*pud)) { - if (stage2_pud_huge(*pud)) { + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { + if (stage2_pud_huge(kvm, *pud)) { pud_t old_pud = *pud; - stage2_pud_clear(pud); + stage2_pud_clear(kvm, pud); kvm_tlb_flush_vmid_ipa(kvm, addr); kvm_flush_dcache_pud(old_pud); put_page(virt_to_page(pud)); @@ -308,7 +307,7 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, } } while (pud++, addr = next, addr != end); - if (stage2_pud_table_empty(start_pud)) + if (stage2_pud_table_empty(kvm, start_pud)) clear_stage2_pgd_entry(kvm, pgd, start_addr); } @@ -332,7 +331,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) assert_spin_locked(&kvm->mmu_lock); WARN_ON(size & ~PAGE_MASK); - pgd = kvm->arch.pgd + stage2_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); do { /* * Make sure the page table is still active, as another thread @@ -341,8 +340,8 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) */ if (!READ_ONCE(kvm->arch.pgd)) break; - next = stage2_pgd_addr_end(addr, end); - if (!stage2_pgd_none(*pgd)) + next = stage2_pgd_addr_end(kvm, addr, end); + if (!stage2_pgd_none(kvm, *pgd)) unmap_stage2_puds(kvm, pgd, addr, next); /* * If the range is too large, release the kvm->mmu_lock @@ -371,9 +370,9 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, pmd_t *pmd; phys_addr_t next; - pmd = stage2_pmd_offset(pud, addr); + pmd = stage2_pmd_offset(kvm, pud, addr); do { - next = stage2_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(kvm, addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) kvm_flush_dcache_pmd(*pmd); @@ -389,11 +388,11 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, pud_t *pud; phys_addr_t next; - pud = stage2_pud_offset(pgd, addr); + pud = stage2_pud_offset(kvm, pgd, addr); do { - next = stage2_pud_addr_end(addr, end); - if (!stage2_pud_none(*pud)) { - if (stage2_pud_huge(*pud)) + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { + if (stage2_pud_huge(kvm, *pud)) kvm_flush_dcache_pud(*pud); else stage2_flush_pmds(kvm, pud, addr, next); @@ -409,10 +408,10 @@ static void stage2_flush_memslot(struct kvm *kvm, phys_addr_t next; pgd_t *pgd; - pgd = kvm->arch.pgd + stage2_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); do { - next = stage2_pgd_addr_end(addr, end); - if (!stage2_pgd_none(*pgd)) + next = stage2_pgd_addr_end(kvm, addr, end); + if (!stage2_pgd_none(kvm, *pgd)) stage2_flush_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -898,7 +897,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) } /* Allocate the HW PGD, making sure that each page gets its own refcount */ - pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO); + pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO); if (!pgd) return -ENOMEM; @@ -987,7 +986,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) spin_lock(&kvm->mmu_lock); if (kvm->arch.pgd) { - unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); + unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); pgd = READ_ONCE(kvm->arch.pgd); kvm->arch.pgd = NULL; } @@ -995,7 +994,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) /* Free the HW pgd, one page at a time */ if (pgd) - free_pages_exact(pgd, S2_PGD_SIZE); + free_pages_exact(pgd, stage2_pgd_size(kvm)); } static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, @@ -1004,16 +1003,16 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache pgd_t *pgd; pud_t *pud; - pgd = kvm->arch.pgd + stage2_pgd_index(addr); - if (stage2_pgd_none(*pgd)) { + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + if (stage2_pgd_none(kvm, *pgd)) { if (!cache) return NULL; pud = mmu_memory_cache_alloc(cache); - stage2_pgd_populate(pgd, pud); + stage2_pgd_populate(kvm, pgd, pud); get_page(virt_to_page(pgd)); } - return stage2_pud_offset(pgd, addr); + return stage2_pud_offset(kvm, pgd, addr); } static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, @@ -1026,15 +1025,15 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache if (!pud) return NULL; - if (stage2_pud_none(*pud)) { + if (stage2_pud_none(kvm, *pud)) { if (!cache) return NULL; pmd = mmu_memory_cache_alloc(cache); - stage2_pud_populate(pud, pmd); + stage2_pud_populate(kvm, pud, pmd); get_page(virt_to_page(pud)); } - return stage2_pmd_offset(pud, addr); + return stage2_pmd_offset(kvm, pud, addr); } static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache @@ -1208,8 +1207,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (writable) pte = kvm_s2pte_mkwrite(pte); - ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, - KVM_NR_MEM_OBJS); + ret = mmu_topup_memory_cache(&cache, + kvm_mmu_cache_min_pages(kvm), + KVM_NR_MEM_OBJS); if (ret) goto out; spin_lock(&kvm->mmu_lock); @@ -1297,19 +1297,21 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) /** * stage2_wp_pmds - write protect PUD range + * kvm: kvm instance for the VM * @pud: pointer to pud entry * @addr: range start address * @end: range end address */ -static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) +static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) { pmd_t *pmd; phys_addr_t next; - pmd = stage2_pmd_offset(pud, addr); + pmd = stage2_pmd_offset(kvm, pud, addr); do { - next = stage2_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(kvm, addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) { if (!kvm_s2pmd_readonly(pmd)) @@ -1329,18 +1331,19 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) * * Process PUD entries, for a huge PUD we cause a panic. */ -static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) +static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) { pud_t *pud; phys_addr_t next; - pud = stage2_pud_offset(pgd, addr); + pud = stage2_pud_offset(kvm, pgd, addr); do { - next = stage2_pud_addr_end(addr, end); - if (!stage2_pud_none(*pud)) { + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { /* TODO:PUD not supported, revisit later if supported */ - BUG_ON(stage2_pud_huge(*pud)); - stage2_wp_pmds(pud, addr, next); + BUG_ON(stage2_pud_huge(kvm, *pud)); + stage2_wp_pmds(kvm, pud, addr, next); } } while (pud++, addr = next, addr != end); } @@ -1356,7 +1359,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) pgd_t *pgd; phys_addr_t next; - pgd = kvm->arch.pgd + stage2_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); do { /* * Release kvm_mmu_lock periodically if the memory region is @@ -1370,9 +1373,9 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) cond_resched_lock(&kvm->mmu_lock); if (!READ_ONCE(kvm->arch.pgd)) break; - next = stage2_pgd_addr_end(addr, end); - if (stage2_pgd_present(*pgd)) - stage2_wp_puds(pgd, addr, next); + next = stage2_pgd_addr_end(kvm, addr, end); + if (stage2_pgd_present(kvm, *pgd)) + stage2_wp_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -1521,7 +1524,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, up_read(¤t->mm->mmap_sem); /* We need minimum second+third level pages */ - ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, + ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm), KVM_NR_MEM_OBJS); if (ret) return ret; @@ -1764,7 +1767,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) } /* Userspace should not be able to register out-of-bounds IPAs */ - VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); + VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); if (fault_status == FSC_ACCESS) { handle_access_fault(vcpu, fault_ipa); @@ -2063,7 +2066,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * space addressable by the KVM guest IPA space. */ if (memslot->base_gfn + memslot->npages >= - (KVM_PHYS_SIZE >> PAGE_SHIFT)) + (kvm_phys_size(kvm) >> PAGE_SHIFT)) return -EFAULT; down_read(¤t->mm->mmap_sem); diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index 6ada2432e37c..114dce9f4bf5 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c @@ -25,7 +25,7 @@ int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, phys_addr_t addr, phys_addr_t alignment) { - if (addr & ~KVM_PHYS_MASK) + if (addr & ~kvm_phys_mask(kvm)) return -E2BIG; if (!IS_ALIGNED(addr, alignment)) -- cgit v1.2.3 From 8ad50c8985d805923f52a80698010a0a5123c07d Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 26 Sep 2018 17:32:50 +0100 Subject: vgic: Add support for 52bit guest physical address Add support for handling 52bit guest physical address to the VGIC layer. So far we have limited the guest physical address to 48bits, by explicitly masking the upper bits. This patch removes the restriction. We do not have to check if the host supports 52bit as the gpa is always validated during an access. (e.g, kvm_{read/write}_guest, kvm_is_visible_gfn()). Also, the ITS table save-restore is also not affected with the enhancement. The DTE entries already store the bits[51:8] of the ITT_addr (with a 256byte alignment). Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Kristina Martsenko [ Macro clean ups, fix PROPBASER and PENDBASER accesses ] Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 5 +++++ virt/kvm/arm/vgic/vgic-its.c | 36 ++++++++++-------------------------- virt/kvm/arm/vgic/vgic-mmio-v3.c | 2 -- 3 files changed, 15 insertions(+), 28 deletions(-) (limited to 'virt') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 8bdbb5f29494..74b0aa9c7499 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -357,6 +357,8 @@ #define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) #define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb) +#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12)) + #define GITS_BASER_NR_REGS 8 #define GITS_BASER_VALID (1ULL << 63) @@ -388,6 +390,9 @@ #define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) #define GITS_BASER_PHYS_52_to_48(phys) \ (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) +#define GITS_BASER_ADDR_48_to_52(baser) \ + (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48) + #define GITS_BASER_SHAREABILITY_SHIFT (10) #define GITS_BASER_InnerShareable \ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 12502251727e..eb2a390a6c86 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -241,13 +241,6 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id, list_for_each_entry(dev, &(its)->device_list, dev_list) \ list_for_each_entry(ite, &(dev)->itt_head, ite_list) -/* - * We only implement 48 bits of PA at the moment, although the ITS - * supports more. Let's be restrictive here. - */ -#define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) -#define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) - #define GIC_LPI_OFFSET 8192 #define VITS_TYPER_IDBITS 16 @@ -759,6 +752,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, { int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; u64 indirect_ptr, type = GITS_BASER_TYPE(baser); + phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser); int esz = GITS_BASER_ENTRY_SIZE(baser); int index; gfn_t gfn; @@ -783,7 +777,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, if (id >= (l1_tbl_size / esz)) return false; - addr = BASER_ADDRESS(baser) + id * esz; + addr = base + id * esz; gfn = addr >> PAGE_SHIFT; if (eaddr) @@ -798,7 +792,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, /* Each 1st level entry is represented by a 64-bit value. */ if (kvm_read_guest_lock(its->dev->kvm, - BASER_ADDRESS(baser) + index * sizeof(indirect_ptr), + base + index * sizeof(indirect_ptr), &indirect_ptr, sizeof(indirect_ptr))) return false; @@ -808,11 +802,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, if (!(indirect_ptr & BIT_ULL(63))) return false; - /* - * Mask the guest physical address and calculate the frame number. - * Any address beyond our supported 48 bits of PA will be caught - * by the actual check in the final step. - */ + /* Mask the guest physical address and calculate the frame number. */ indirect_ptr &= GENMASK_ULL(51, 16); /* Find the address of the actual entry */ @@ -1304,9 +1294,6 @@ static u64 vgic_sanitise_its_baser(u64 reg) GITS_BASER_OUTER_CACHEABILITY_SHIFT, vgic_sanitise_outer_cacheability); - /* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */ - reg &= ~GENMASK_ULL(15, 12); - /* We support only one (ITS) page size: 64K */ reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K; @@ -1325,11 +1312,8 @@ static u64 vgic_sanitise_its_cbaser(u64 reg) GITS_CBASER_OUTER_CACHEABILITY_SHIFT, vgic_sanitise_outer_cacheability); - /* - * Sanitise the physical address to be 64k aligned. - * Also limit the physical addresses to 48 bits. - */ - reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12)); + /* Sanitise the physical address to be 64k aligned. */ + reg &= ~GENMASK_ULL(15, 12); return reg; } @@ -1375,7 +1359,7 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) if (!its->enabled) return; - cbaser = CBASER_ADDRESS(its->cbaser); + cbaser = GITS_CBASER_ADDRESS(its->cbaser); while (its->cwriter != its->creadr) { int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr, @@ -2233,7 +2217,7 @@ static int vgic_its_restore_device_tables(struct vgic_its *its) if (!(baser & GITS_BASER_VALID)) return 0; - l1_gpa = BASER_ADDRESS(baser); + l1_gpa = GITS_BASER_ADDR_48_to_52(baser); if (baser & GITS_BASER_INDIRECT) { l1_esz = GITS_LVL1_ENTRY_SIZE; @@ -2305,7 +2289,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); u64 baser = its->baser_coll_table; - gpa_t gpa = BASER_ADDRESS(baser); + gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser); struct its_collection *collection; u64 val; size_t max_size, filled = 0; @@ -2354,7 +2338,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) if (!(baser & GITS_BASER_VALID)) return 0; - gpa = BASER_ADDRESS(baser); + gpa = GITS_BASER_ADDR_48_to_52(baser); max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index a2a175b08b17..b3d1f0985117 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -364,7 +364,6 @@ static u64 vgic_sanitise_pendbaser(u64 reg) vgic_sanitise_outer_cacheability); reg &= ~PENDBASER_RES0_MASK; - reg &= ~GENMASK_ULL(51, 48); return reg; } @@ -382,7 +381,6 @@ static u64 vgic_sanitise_propbaser(u64 reg) vgic_sanitise_outer_cacheability); reg &= ~PROPBASER_RES0_MASK; - reg &= ~GENMASK_ULL(51, 48); return reg; } -- cgit v1.2.3 From 0f62f0e95be29200ab2ab98ca870e22c9b148dfa Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:52 +0100 Subject: kvm: arm64: Set a limit on the IPA size So far we have restricted the IPA size of the VM to the default value (40bits). Now that we can manage the IPA size per VM and support dynamic stage2 page tables, we can allow VMs to have larger IPA. This patch introduces a the maximum IPA size supported on the host. This is decided by the following factors : 1) Maximum PARange supported by the CPUs - This can be inferred from the system wide safe value. 2) Maximum PA size supported by the host kernel (48 vs 52) 3) Number of levels in the host page table (as we base our stage2 tables on the host table helpers). Since the stage2 page table code is dependent on the stage1 page table, we always ensure that : Number of Levels at Stage1 >= Number of Levels at Stage2 So we limit the IPA to make sure that the above condition is satisfied. This will affect the following combinations of VA_BITS and IPA for different page sizes. Host configuration | Unsupported IPA ranges 39bit VA, 4K | [44, 48] 36bit VA, 16K | [41, 48] 42bit VA, 64K | [47, 52] Supporting the above combinations need independent stage2 page table manipulation code, which would need substantial changes. We could purse the solution independently and switch the page table code once we have it ready. Cc: Catalin Marinas Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_mmu.h | 2 ++ arch/arm64/include/asm/kvm_host.h | 12 +++-------- arch/arm64/kvm/reset.c | 43 +++++++++++++++++++++++++++++++++++++++ virt/kvm/arm/arm.c | 2 ++ 4 files changed, 50 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 12ae5fbbcf01..5ad1a54f98dc 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -358,6 +358,8 @@ static inline int hyp_map_aux_data(void) #define kvm_phys_to_vttbr(addr) (addr) +static inline void kvm_set_ipa_limit(void) {} + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5ecd457bce7d..f008f8866b2a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -442,15 +442,7 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -static inline void __cpu_init_stage2(void) -{ - u32 ps; - - /* Sanity check for minimum IPA size support */ - ps = id_aa64mmfr0_parange_to_phys_shift(read_sysreg(id_aa64mmfr0_el1) & 0x7); - WARN_ONCE(ps < 40, - "PARange is %d bits, unsupported configuration!", ps); -} +static inline void __cpu_init_stage2(void) {} /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); @@ -513,6 +505,8 @@ static inline int kvm_arm_have_ssbd(void) void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); +void kvm_set_ipa_limit(void); + #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 2bf41e007390..96b3f50101bc 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -34,6 +34,9 @@ #include #include +/* Maximum phys_shift supported for any VM on this host */ +static u32 kvm_ipa_limit; + /* * ARMv8 Reset Values */ @@ -135,6 +138,46 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) return kvm_timer_vcpu_reset(vcpu); } +void kvm_set_ipa_limit(void) +{ + unsigned int ipa_max, pa_max, va_max, parange; + + parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7; + pa_max = id_aa64mmfr0_parange_to_phys_shift(parange); + + /* Clamp the IPA limit to the PA size supported by the kernel */ + ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max; + /* + * Since our stage2 table is dependent on the stage1 page table code, + * we must always honor the following condition: + * + * Number of levels in Stage1 >= Number of levels in Stage2. + * + * So clamp the ipa limit further down to limit the number of levels. + * Since we can concatenate upto 16 tables at entry level, we could + * go upto 4bits above the maximum VA addressible with the current + * number of levels. + */ + va_max = PGDIR_SHIFT + PAGE_SHIFT - 3; + va_max += 4; + + if (va_max < ipa_max) + ipa_max = va_max; + + /* + * If the final limit is lower than the real physical address + * limit of the CPUs, report the reason. + */ + if (ipa_max < pa_max) + pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n", + (va_max < pa_max) ? "Virtual" : "Physical"); + + WARN(ipa_max < KVM_PHYS_SHIFT, + "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max); + kvm_ipa_limit = ipa_max; + kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit); +} + /* * Configure the VTCR_EL2 for this VM. The VTCR value is common * across all the physical CPUs on the system. We use system wide diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 43e716bc3f08..631f9a3ad99a 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1413,6 +1413,8 @@ static int init_common_resources(void) kvm_vmid_bits = kvm_get_vmid_bits(); kvm_info("%d-bit VMID\n", kvm_vmid_bits); + kvm_set_ipa_limit(); + return 0; } -- cgit v1.2.3 From bca607ebc76af9540e4aad5b2241a7323354be43 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Oct 2018 13:40:36 +0100 Subject: KVM: arm/arm64: Rename kvm_arm_config_vm to kvm_arm_setup_stage2 VM tends to be a very overloaded term in KVM, so let's keep it to describe the virtual machine. For the virtual memory setup, let's use the "stage2" suffix. Reviewed-by: Eric Auger Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 6 +++++- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/reset.c | 2 +- virt/kvm/arm/arm.c | 2 +- 5 files changed, 9 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 72d46418e1ef..b45af481ccf7 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -354,8 +354,12 @@ static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {} struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); -static inline int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) +static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) { + /* + * On 32bit ARM, VMs get a static 40bit IPA stage2 setup, + * so any non-zero value used as type is illegal. + */ if (type) return -EINVAL; return 0; diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index f1330284720d..6e324d1f1231 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -133,7 +133,7 @@ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are * not known to exist and will break with this configuration. * - * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_config_vm(). + * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2(). * * Note that when using 4K pages, we concatenate two first level page tables * together. With 16K pages, we concatenate 16 first level page tables. diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f008f8866b2a..376a5b695467 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -511,6 +511,6 @@ void kvm_set_ipa_limit(void); struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); -int kvm_arm_config_vm(struct kvm *kvm, unsigned long type); +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 95f28d5950e0..aa806d582552 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -189,7 +189,7 @@ void kvm_set_ipa_limit(void) * all CPUs, as it is safe to run with or without the feature and * the bit is RES0 on CPUs that don't support it. */ -int kvm_arm_config_vm(struct kvm *kvm, unsigned long type) +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) { u64 vtcr = VTCR_EL2_FLAGS; u32 parange, phys_shift; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 631f9a3ad99a..91c464c9cd21 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -120,7 +120,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret, cpu; - ret = kvm_arm_config_vm(kvm, type); + ret = kvm_arm_setup_stage2(kvm, type); if (ret) return ret; -- cgit v1.2.3 From 9d47bb0d9ea8528373b4c6f9bca6c7f402900297 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Oct 2018 13:41:32 +0100 Subject: KVM: arm64: Drop __cpu_init_stage2 on the VHE path __cpu_init_stage2 doesn't do anything anymore on arm64, and is totally non-sensical if running VHE (as VHE is 64bit only). Reviewed-by: Eric Auger Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 91c464c9cd21..4ce99bb223bc 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1310,16 +1310,10 @@ static void cpu_hyp_reinit(void) { cpu_hyp_reset(); - if (is_kernel_in_hyp_mode()) { - /* - * __cpu_init_stage2() is safe to call even if the PM - * event was cancelled before the CPU was reset. - */ - __cpu_init_stage2(); + if (is_kernel_in_hyp_mode()) kvm_timer_init_vhe(); - } else { + else cpu_init_hyp_mode(NULL); - } if (vgic_present) kvm_vgic_init_cpu_hardware(); -- cgit v1.2.3 From fd2ef358282c849c193aa36dadbf4f07f7dcd29b Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Mon, 1 Oct 2018 16:54:35 +0100 Subject: KVM: arm/arm64: Ensure only THP is candidate for adjustment PageTransCompoundMap() returns true for hugetlbfs and THP hugepages. This behaviour incorrectly leads to stage 2 faults for unsupported hugepage sizes (e.g., 64K hugepage with 4K pages) to be treated as THP faults. Tighten the check to filter out hugetlbfs pages. This also leads to consistently mapping all unsupported hugepage sizes as PTE level entries at stage 2. Signed-off-by: Punit Agrawal Reviewed-by: Suzuki Poulose Cc: Christoffer Dall Cc: Marc Zyngier Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 7e477b3cae5b..c23a1b323aad 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1231,8 +1231,14 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) { kvm_pfn_t pfn = *pfnp; gfn_t gfn = *ipap >> PAGE_SHIFT; + struct page *page = pfn_to_page(pfn); - if (PageTransCompoundMap(pfn_to_page(pfn))) { + /* + * PageTransCompoungMap() returns true for THP and + * hugetlbfs. Make sure the adjustment is done only for THP + * pages. + */ + if (!PageHuge(page) && PageTransCompoundMap(page)) { unsigned long mask; /* * The address we faulted on is backed by a transparent huge -- cgit v1.2.3 From a812297c4fd9c2c9337b451ad8d66083c5b24ceb Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 22 Aug 2018 12:18:29 +0200 Subject: KVM: x86: hyperv: optimize 'all cpus' case in kvm_hv_flush_tlb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can use 'NULL' to represent 'all cpus' case in kvm_make_vcpus_request_mask() and avoid building vCPU mask with all vCPUs. Suggested-by: Radim Krčmář Signed-off-by: Vitaly Kuznetsov Reviewed-by: Roman Kagan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 42 +++++++++++++++++++++++------------------- virt/kvm/kvm_main.c | 6 ++---- 2 files changed, 25 insertions(+), 23 deletions(-) (limited to 'virt') diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 0cd597b0f754..b45ce136be2f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1325,35 +1325,39 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, cpumask_clear(&hv_current->tlb_lush); + if (all_cpus) { + kvm_make_vcpus_request_mask(kvm, + KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, + NULL, &hv_current->tlb_lush); + goto ret_success; + } + kvm_for_each_vcpu(i, vcpu, kvm) { struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; int bank = hv->vp_index / 64, sbank = 0; - if (!all_cpus) { - /* Banks >64 can't be represented */ - if (bank >= 64) - continue; - - /* Non-ex hypercalls can only address first 64 vCPUs */ - if (!ex && bank) - continue; + /* Banks >64 can't be represented */ + if (bank >= 64) + continue; - if (ex) { - /* - * Check is the bank of this vCPU is in sparse - * set and get the sparse bank number. - */ - sbank = get_sparse_bank_no(valid_bank_mask, - bank); + /* Non-ex hypercalls can only address first 64 vCPUs */ + if (!ex && bank) + continue; - if (sbank < 0) - continue; - } + if (ex) { + /* + * Check is the bank of this vCPU is in sparse + * set and get the sparse bank number. + */ + sbank = get_sparse_bank_no(valid_bank_mask, bank); - if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64))) + if (sbank < 0) continue; } + if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64))) + continue; + /* * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we * can't analyze it here, flush TLB regardless of the specified diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f986e31fa68c..587e1a0a8715 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -219,7 +219,7 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, me = get_cpu(); kvm_for_each_vcpu(i, vcpu, kvm) { - if (!test_bit(i, vcpu_bitmap)) + if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) continue; kvm_make_request(req, vcpu); @@ -243,12 +243,10 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) { cpumask_var_t cpus; bool called; - static unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] - = {[0 ... BITS_TO_LONGS(KVM_MAX_VCPUS)-1] = ULONG_MAX}; zalloc_cpumask_var(&cpus, GFP_ATOMIC); - called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap, cpus); + called = kvm_make_vcpus_request_mask(kvm, req, NULL, cpus); free_cpumask_var(cpus); return called; -- cgit v1.2.3 From 31fc4f95dddc4ebc9f9596a2720662e15e5d444e Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 22 Aug 2018 21:57:11 +0800 Subject: KVM: leverage change to adjust slots->used_slots in update_memslots() update_memslots() is only called by __kvm_set_memory_region(), in which "change" is calculated and indicates how to adjust slots->used_slots * increase by one if it is KVM_MR_CREATE * decrease by one if it is KVM_MR_DELETE * not change for others This patch adjusts slots->used_slots in update_memslots() based on "change" value instead of re-calculate those states again. Signed-off-by: Wei Yang Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 587e1a0a8715..acc951cc2663 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -805,20 +805,25 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) * sorted array and known changed memslot position. */ static void update_memslots(struct kvm_memslots *slots, - struct kvm_memory_slot *new) + struct kvm_memory_slot *new, + enum kvm_mr_change change) { int id = new->id; int i = slots->id_to_index[id]; struct kvm_memory_slot *mslots = slots->memslots; WARN_ON(mslots[i].id != id); - if (!new->npages) { - WARN_ON(!mslots[i].npages); - if (mslots[i].npages) - slots->used_slots--; - } else { - if (!mslots[i].npages) - slots->used_slots++; + switch (change) { + case KVM_MR_CREATE: + slots->used_slots++; + WARN_ON(mslots[i].npages || !new->npages); + break; + case KVM_MR_DELETE: + slots->used_slots--; + WARN_ON(new->npages || !mslots[i].npages); + break; + default: + break; } while (i < KVM_MEM_SLOTS_NUM - 1 && @@ -1054,7 +1059,7 @@ int __kvm_set_memory_region(struct kvm *kvm, memset(&new.arch, 0, sizeof(new.arch)); } - update_memslots(slots, &new); + update_memslots(slots, &new, change); old_memslots = install_new_memslots(kvm, as_id, slots); kvm_arch_commit_memory_region(kvm, mem, &old, &new, change); -- cgit v1.2.3 From 0804c849f1df0992d39a37c4fc259f7f8b16f385 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Sun, 14 Oct 2018 07:09:55 +0800 Subject: kvm/x86 : add coalesced pio support Coalesced pio is based on coalesced mmio and can be used for some port like rtc port, pci-host config port and so on. Specially in case of rtc as coalesced pio, some versions of windows guest access rtc frequently because of rtc as system tick. guest access rtc like this: write register index to 0x70, then write or read data from 0x71. writing 0x70 port is just as index and do nothing else. So we can use coalesced pio to handle this scene to reduce VM-EXIT time. When starting and closing a virtual machine, it will access pci-host config port frequently. So setting these port as coalesced pio can reduce startup and shutdown time. without my patch, get the vm-exit time of accessing rtc 0x70 and piix 0xcf8 using perf tools: (guest OS : windows 7 64bit) IO Port Access Samples Samples% Time% Min Time Max Time Avg time 0x70:POUT 86 30.99% 74.59% 9us 29us 10.75us (+- 3.41%) 0xcf8:POUT 1119 2.60% 2.12% 2.79us 56.83us 3.41us (+- 2.23%) with my patch IO Port Access Samples Samples% Time% Min Time Max Time Avg time 0x70:POUT 106 32.02% 29.47% 0us 10us 1.57us (+- 7.38%) 0xcf8:POUT 1065 1.67% 0.28% 0.41us 65.44us 0.66us (+- 10.55%) Signed-off-by: Peng Hao Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 16 ++++++++++------ include/uapi/linux/kvm.h | 11 +++++++++-- virt/kvm/coalesced_mmio.c | 12 +++++++++--- virt/kvm/kvm_main.c | 2 ++ 4 files changed, 30 insertions(+), 11 deletions(-) (limited to 'virt') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 10d48eb67da9..70f9c8bb1840 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3683,27 +3683,31 @@ the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE. 4.116 KVM_(UN)REGISTER_COALESCED_MMIO -Capability: KVM_CAP_COALESCED_MMIO +Capability: KVM_CAP_COALESCED_MMIO (for coalesced mmio) + KVM_CAP_COALESCED_PIO (for coalesced pio) Architectures: all Type: vm ioctl Parameters: struct kvm_coalesced_mmio_zone Returns: 0 on success, < 0 on error -Coalesced mmio is a performance optimization that defers hardware +Coalesced I/O is a performance optimization that defers hardware register write emulation so that userspace exits are avoided. It is typically used to reduce the overhead of emulating frequently accessed hardware registers. -When a hardware register is configured for coalesced mmio, write accesses +When a hardware register is configured for coalesced I/O, write accesses do not exit to userspace and their value is recorded in a ring buffer that is shared between kernel and userspace. -Coalesced mmio is used if one or more write accesses to a hardware +Coalesced I/O is used if one or more write accesses to a hardware register can be deferred until a read or a write to another hardware register on the same device. This last access will cause a vmexit and userspace will process accesses from the ring buffer before emulating -it. That will avoid exiting to userspace on repeated writes to the -first register. +it. That will avoid exiting to userspace on repeated writes. + +Coalesced pio is based on coalesced mmio. There is little difference +between coalesced mmio and pio except that coalesced pio records accesses +to I/O ports. 5. The kvm_run structure ------------------------ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7785678caedb..97780a0277fe 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -420,13 +420,19 @@ struct kvm_run { struct kvm_coalesced_mmio_zone { __u64 addr; __u32 size; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; }; struct kvm_coalesced_mmio { __u64 phys_addr; __u32 len; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; __u8 data[8]; }; @@ -956,6 +962,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_MSR_PLATFORM_INFO 159 #define KVM_CAP_PPC_NESTED_HV 160 #define KVM_CAP_HYPERV_SEND_IPI 161 +#define KVM_CAP_COALESCED_PIO 162 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 9e65feb6fa58..3710342cf6ad 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -83,6 +83,7 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu, ring->coalesced_mmio[ring->last].phys_addr = addr; ring->coalesced_mmio[ring->last].len = len; memcpy(ring->coalesced_mmio[ring->last].data, val, len); + ring->coalesced_mmio[ring->last].pio = dev->zone.pio; smp_wmb(); ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; spin_unlock(&dev->kvm->ring_lock); @@ -140,6 +141,9 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, int ret; struct kvm_coalesced_mmio_dev *dev; + if (zone->pio != 1 && zone->pio != 0) + return -EINVAL; + dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); if (!dev) return -ENOMEM; @@ -149,8 +153,9 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, dev->zone = *zone; mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr, - zone->size, &dev->dev); + ret = kvm_io_bus_register_dev(kvm, + zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, + zone->addr, zone->size, &dev->dev); if (ret < 0) goto out_free_dev; list_add_tail(&dev->list, &kvm->coalesced_zones); @@ -174,7 +179,8 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list) if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) { - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev); + kvm_io_bus_unregister_dev(kvm, + zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev); kvm_iodevice_destructor(&dev->dev); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index acc951cc2663..067b71abae00 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2949,6 +2949,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_KVM_MMIO case KVM_CAP_COALESCED_MMIO: return KVM_COALESCED_MMIO_PAGE_OFFSET; + case KVM_CAP_COALESCED_PIO: + return 1; #endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: -- cgit v1.2.3 From 970c0d4b94efcba5610b37f0118cb1a2286ba962 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 9 Oct 2018 10:41:15 +0800 Subject: KVM: refine the comment of function gfn_to_hva_memslot_prot() The original comment is little hard to understand. No functional change, just amend the comment a little. Signed-off-by: Wei Yang Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 067b71abae00..786ade1843a2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1314,8 +1314,12 @@ unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn) EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva); /* - * If writable is set to false, the hva returned by this function is only - * allowed to be read. + * Return the hva of a @gfn and the R/W attribute if possible. + * + * @slot: the kvm_memory_slot which contains @gfn + * @gfn: the gfn to be translated + * @writable: used to return the read/write attribute of the @slot if the hva + * is valid and @writable is not NULL */ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable) -- cgit v1.2.3 From da5a3ce66b8bb51b0ea8a89f42aac153903f90fb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 17 Oct 2018 17:42:10 +0100 Subject: KVM: arm64: Fix caching of host MDCR_EL2 value At boot time, KVM stashes the host MDCR_EL2 value, but only does this when the kernel is not running in hyp mode (i.e. is non-VHE). In these cases, the stashed value of MDCR_EL2.HPMN happens to be zero, which can lead to CONSTRAINED UNPREDICTABLE behaviour. Since we use this value to derive the MDCR_EL2 value when switching to/from a guest, after a guest have been run, the performance counters do not behave as expected. This has been observed to result in accesses via PMXEVTYPER_EL0 and PMXEVCNTR_EL0 not affecting the relevant counters, resulting in events not being counted. In these cases, only the fixed-purpose cycle counter appears to work as expected. Fix this by always stashing the host MDCR_EL2 value, regardless of VHE. Cc: Christopher Dall Cc: James Morse Cc: Will Deacon Cc: stable@vger.kernel.org Fixes: 1e947bad0b63b351 ("arm64: KVM: Skip HYP setup when already running in HYP") Tested-by: Robin Murphy Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 4ce99bb223bc..4c5ff06b18f9 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1296,8 +1296,6 @@ static void cpu_init_hyp_mode(void *dummy) __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); __cpu_init_stage2(); - - kvm_arm_init_debug(); } static void cpu_hyp_reset(void) @@ -1315,6 +1313,8 @@ static void cpu_hyp_reinit(void) else cpu_init_hyp_mode(NULL); + kvm_arm_init_debug(); + if (vgic_present) kvm_vgic_init_cpu_hardware(); } -- cgit v1.2.3 From 375bdd3b5d4f7cf146f0df1488b4671b141dd799 Mon Sep 17 00:00:00 2001 From: Dongjiu Geng Date: Sat, 13 Oct 2018 00:12:48 +0800 Subject: arm/arm64: KVM: Rename function kvm_arch_dev_ioctl_check_extension() Rename kvm_arch_dev_ioctl_check_extension() to kvm_arch_vm_ioctl_check_extension(), because it does not have any relationship with device. Renaming this function can make code readable. Cc: James Morse Reviewed-by: Suzuki K Poulose Signed-off-by: Dongjiu Geng Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/reset.c | 4 ++-- virt/kvm/arm/arm.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index b45af481ccf7..5ca5d9af0c26 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -273,7 +273,7 @@ static inline void __cpu_init_stage2(void) kvm_call_hyp(__init_stage2_translation); } -static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) +static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) { return 0; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 376a5b695467..f84052f306af 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -53,7 +53,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); -int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext); +int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); struct kvm_arch { diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index aa806d582552..337d2fbc2f06 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -59,12 +59,12 @@ static bool cpu_has_32bit_el1(void) } /** - * kvm_arch_dev_ioctl_check_extension + * kvm_arch_vm_ioctl_check_extension * * We currently assume that the number of HW registers is uniform * across all CPUs (see cpuinfo_sanity_check). */ -int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) +int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 4c5ff06b18f9..7c9d7b51ce89 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -241,7 +241,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; default: - r = kvm_arch_dev_ioctl_check_extension(kvm, ext); + r = kvm_arch_vm_ioctl_check_extension(kvm, ext); break; } return r; -- cgit v1.2.3 From 58bf437ff64eac8aca606e42d7e4623e40b61fa1 Mon Sep 17 00:00:00 2001 From: Dongjiu Geng Date: Sat, 13 Oct 2018 00:12:49 +0800 Subject: arm/arm64: KVM: Enable 32 bits kvm vcpu events support The commit 539aee0edb9f ("KVM: arm64: Share the parts of get/set events useful to 32bit") shares the get/set events helper for arm64 and arm32, but forgot to share the cap extension code. User space will check whether KVM supports vcpu events by checking the KVM_CAP_VCPU_EVENTS extension Acked-by: James Morse Reviewed-by : Suzuki K Poulose Signed-off-by: Dongjiu Geng Signed-off-by: Marc Zyngier --- arch/arm64/kvm/reset.c | 1 - virt/kvm/arm/arm.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 337d2fbc2f06..b72a3dd56204 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -86,7 +86,6 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: - case KVM_CAP_VCPU_EVENTS: r = 1; break; case KVM_CAP_ARM_VM_IPA_SIZE: diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 7c9d7b51ce89..11b98b2b0486 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -213,6 +213,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_VCPU_EVENTS: r = 1; break; case KVM_CAP_ARM_SET_DEVICE_ADDR: -- cgit v1.2.3