diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-06 18:38:31 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-06 18:38:31 -0700 |
commit | c136b84393d4e340e1b53fc7f737dd5827b19ee5 (patch) | |
tree | 985a1bdfafe7ec5ce2d3c738f601cad3998d8ce9 /arch/s390/kvm/kvm-s390.c | |
parent | e0f25a3f2d052e36ff67a9b4db835c3e27e950d8 (diff) | |
parent | 1372324b328cd5dabaef5e345e37ad48c63df2a9 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"PPC:
- Better machine check handling for HV KVM
- Ability to support guests with threads=2, 4 or 8 on POWER9
- Fix for a race that could cause delayed recognition of signals
- Fix for a bug where POWER9 guests could sleep with interrupts pending.
ARM:
- VCPU request overhaul
- allow timer and PMU to have their interrupt number selected from userspace
- workaround for Cavium erratum 30115
- handling of memory poisonning
- the usual crop of fixes and cleanups
s390:
- initial machine check forwarding
- migration support for the CMMA page hinting information
- cleanups and fixes
x86:
- nested VMX bugfixes and improvements
- more reliable NMI window detection on AMD
- APIC timer optimizations
Generic:
- VCPU request overhaul + documentation of common code patterns
- kvm_stat improvements"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits)
Update my email address
kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS
x86: kvm: mmu: use ept a/d in vmcs02 iff used in vmcs12
kvm: x86: mmu: allow A/D bits to be disabled in an mmu
x86: kvm: mmu: make spte mmio mask more explicit
x86: kvm: mmu: dead code thanks to access tracking
KVM: PPC: Book3S: Fix typo in XICS-on-XIVE state saving code
KVM: PPC: Book3S HV: Close race with testing for signals on guest entry
KVM: PPC: Book3S HV: Simplify dynamic micro-threading code
KVM: x86: remove ignored type attribute
KVM: LAPIC: Fix lapic timer injection delay
KVM: lapic: reorganize restart_apic_timer
KVM: lapic: reorganize start_hv_timer
kvm: nVMX: Check memory operand to INVVPID
KVM: s390: Inject machine check into the nested guest
KVM: s390: Inject machine check into the guest
tools/kvm_stat: add new interactive command 'b'
tools/kvm_stat: add new command line switch '-i'
tools/kvm_stat: fix error on interactive command 'g'
KVM: SVM: suppress unnecessary NMI singlestep on GIF=0 and nested exit
...
Diffstat (limited to 'arch/s390/kvm/kvm-s390.c')
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 373 |
1 files changed, 371 insertions, 2 deletions
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b0d7de5a533d..3f2884e99ed4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -30,6 +30,7 @@ #include <linux/vmalloc.h> #include <linux/bitmap.h> #include <linux/sched/signal.h> +#include <linux/string.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> @@ -386,6 +387,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_SKEYS: case KVM_CAP_S390_IRQ_STATE: case KVM_CAP_S390_USER_INSTR0: + case KVM_CAP_S390_CMMA_MIGRATION: case KVM_CAP_S390_AIS: r = 1; break; @@ -749,6 +751,129 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) return 0; } +static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) +{ + int cx; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(cx, vcpu, kvm) + kvm_s390_sync_request(req, vcpu); +} + +/* + * Must be called with kvm->srcu held to avoid races on memslots, and with + * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration. + */ +static int kvm_s390_vm_start_migration(struct kvm *kvm) +{ + struct kvm_s390_migration_state *mgs; + struct kvm_memory_slot *ms; + /* should be the only one */ + struct kvm_memslots *slots; + unsigned long ram_pages; + int slotnr; + + /* migration mode already enabled */ + if (kvm->arch.migration_state) + return 0; + + slots = kvm_memslots(kvm); + if (!slots || !slots->used_slots) + return -EINVAL; + + mgs = kzalloc(sizeof(*mgs), GFP_KERNEL); + if (!mgs) + return -ENOMEM; + kvm->arch.migration_state = mgs; + + if (kvm->arch.use_cmma) { + /* + * Get the last slot. They should be sorted by base_gfn, so the + * last slot is also the one at the end of the address space. + * We have verified above that at least one slot is present. + */ + ms = slots->memslots + slots->used_slots - 1; + /* round up so we only use full longs */ + ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); + /* allocate enough bytes to store all the bits */ + mgs->pgste_bitmap = vmalloc(ram_pages / 8); + if (!mgs->pgste_bitmap) { + kfree(mgs); + kvm->arch.migration_state = NULL; + return -ENOMEM; + } + + mgs->bitmap_size = ram_pages; + atomic64_set(&mgs->dirty_pages, ram_pages); + /* mark all the pages in active slots as dirty */ + for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { + ms = slots->memslots + slotnr; + bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages); + } + + kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); + } + return 0; +} + +/* + * Must be called with kvm->lock to avoid races with ourselves and + * kvm_s390_vm_start_migration. + */ +static int kvm_s390_vm_stop_migration(struct kvm *kvm) +{ + struct kvm_s390_migration_state *mgs; + + /* migration mode already disabled */ + if (!kvm->arch.migration_state) + return 0; + mgs = kvm->arch.migration_state; + kvm->arch.migration_state = NULL; + + if (kvm->arch.use_cmma) { + kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); + vfree(mgs->pgste_bitmap); + } + kfree(mgs); + return 0; +} + +static int kvm_s390_vm_set_migration(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + int idx, res = -ENXIO; + + mutex_lock(&kvm->lock); + switch (attr->attr) { + case KVM_S390_VM_MIGRATION_START: + idx = srcu_read_lock(&kvm->srcu); + res = kvm_s390_vm_start_migration(kvm); + srcu_read_unlock(&kvm->srcu, idx); + break; + case KVM_S390_VM_MIGRATION_STOP: + res = kvm_s390_vm_stop_migration(kvm); + break; + default: + break; + } + mutex_unlock(&kvm->lock); + + return res; +} + +static int kvm_s390_vm_get_migration(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + u64 mig = (kvm->arch.migration_state != NULL); + + if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) + return -ENXIO; + + if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig))) + return -EFAULT; + return 0; +} + static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) { u8 gtod_high; @@ -1089,6 +1214,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CRYPTO: ret = kvm_s390_vm_set_crypto(kvm, attr); break; + case KVM_S390_VM_MIGRATION: + ret = kvm_s390_vm_set_migration(kvm, attr); + break; default: ret = -ENXIO; break; @@ -1111,6 +1239,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MODEL: ret = kvm_s390_get_cpu_model(kvm, attr); break; + case KVM_S390_VM_MIGRATION: + ret = kvm_s390_vm_get_migration(kvm, attr); + break; default: ret = -ENXIO; break; @@ -1178,6 +1309,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) break; } break; + case KVM_S390_VM_MIGRATION: + ret = 0; + break; default: ret = -ENXIO; break; @@ -1285,6 +1419,182 @@ out: return r; } +/* + * Base address and length must be sent at the start of each block, therefore + * it's cheaper to send some clean data, as long as it's less than the size of + * two longs. + */ +#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) +/* for consistency */ +#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) + +/* + * This function searches for the next page with dirty CMMA attributes, and + * saves the attributes in the buffer up to either the end of the buffer or + * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; + * no trailing clean bytes are saved. + * In case no dirty bits were found, or if CMMA was not enabled or used, the + * output buffer will indicate 0 as length. + */ +static int kvm_s390_get_cmma_bits(struct kvm *kvm, + struct kvm_s390_cmma_log *args) +{ + struct kvm_s390_migration_state *s = kvm->arch.migration_state; + unsigned long bufsize, hva, pgstev, i, next, cur; + int srcu_idx, peek, r = 0, rr; + u8 *res; + + cur = args->start_gfn; + i = next = pgstev = 0; + + if (unlikely(!kvm->arch.use_cmma)) + return -ENXIO; + /* Invalid/unsupported flags were specified */ + if (args->flags & ~KVM_S390_CMMA_PEEK) + return -EINVAL; + /* Migration mode query, and we are not doing a migration */ + peek = !!(args->flags & KVM_S390_CMMA_PEEK); + if (!peek && !s) + return -EINVAL; + /* CMMA is disabled or was not used, or the buffer has length zero */ + bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); + if (!bufsize || !kvm->mm->context.use_cmma) { + memset(args, 0, sizeof(*args)); + return 0; + } + + if (!peek) { + /* We are not peeking, and there are no dirty pages */ + if (!atomic64_read(&s->dirty_pages)) { + memset(args, 0, sizeof(*args)); + return 0; + } + cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, + args->start_gfn); + if (cur >= s->bitmap_size) /* nothing found, loop back */ + cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0); + if (cur >= s->bitmap_size) { /* again! (very unlikely) */ + memset(args, 0, sizeof(*args)); + return 0; + } + next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1); + } + + res = vmalloc(bufsize); + if (!res) + return -ENOMEM; + + args->start_gfn = cur; + + down_read(&kvm->mm->mmap_sem); + srcu_idx = srcu_read_lock(&kvm->srcu); + while (i < bufsize) { + hva = gfn_to_hva(kvm, cur); + if (kvm_is_error_hva(hva)) { + r = -EFAULT; + break; + } + /* decrement only if we actually flipped the bit to 0 */ + if (!peek && test_and_clear_bit(cur, s->pgste_bitmap)) + atomic64_dec(&s->dirty_pages); + r = get_pgste(kvm->mm, hva, &pgstev); + if (r < 0) + pgstev = 0; + /* save the value */ + res[i++] = (pgstev >> 24) & 0x3; + /* + * if the next bit is too far away, stop. + * if we reached the previous "next", find the next one + */ + if (!peek) { + if (next > cur + KVM_S390_MAX_BIT_DISTANCE) + break; + if (cur == next) + next = find_next_bit(s->pgste_bitmap, + s->bitmap_size, cur + 1); + /* reached the end of the bitmap or of the buffer, stop */ + if ((next >= s->bitmap_size) || + (next >= args->start_gfn + bufsize)) + break; + } + cur++; + } + srcu_read_unlock(&kvm->srcu, srcu_idx); + up_read(&kvm->mm->mmap_sem); + args->count = i; + args->remaining = s ? atomic64_read(&s->dirty_pages) : 0; + + rr = copy_to_user((void __user *)args->values, res, args->count); + if (rr) + r = -EFAULT; + + vfree(res); + return r; +} + +/* + * This function sets the CMMA attributes for the given pages. If the input + * buffer has zero length, no action is taken, otherwise the attributes are + * set and the mm->context.use_cmma flag is set. + */ +static int kvm_s390_set_cmma_bits(struct kvm *kvm, + const struct kvm_s390_cmma_log *args) +{ + unsigned long hva, mask, pgstev, i; + uint8_t *bits; + int srcu_idx, r = 0; + + mask = args->mask; + + if (!kvm->arch.use_cmma) + return -ENXIO; + /* invalid/unsupported flags */ + if (args->flags != 0) + return -EINVAL; + /* Enforce sane limit on memory allocation */ + if (args->count > KVM_S390_CMMA_SIZE_MAX) + return -EINVAL; + /* Nothing to do */ + if (args->count == 0) + return 0; + + bits = vmalloc(sizeof(*bits) * args->count); + if (!bits) + return -ENOMEM; + + r = copy_from_user(bits, (void __user *)args->values, args->count); + if (r) { + r = -EFAULT; + goto out; + } + + down_read(&kvm->mm->mmap_sem); + srcu_idx = srcu_read_lock(&kvm->srcu); + for (i = 0; i < args->count; i++) { + hva = gfn_to_hva(kvm, args->start_gfn + i); + if (kvm_is_error_hva(hva)) { + r = -EFAULT; + break; + } + + pgstev = bits[i]; + pgstev = pgstev << 24; + mask &= _PGSTE_GPS_USAGE_MASK; + set_pgste_bits(kvm->mm, hva, mask, pgstev); + } + srcu_read_unlock(&kvm->srcu, srcu_idx); + up_read(&kvm->mm->mmap_sem); + + if (!kvm->mm->context.use_cmma) { + down_write(&kvm->mm->mmap_sem); + kvm->mm->context.use_cmma = 1; + up_write(&kvm->mm->mmap_sem); + } +out: + vfree(bits); + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1363,6 +1673,29 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_s390_set_skeys(kvm, &args); break; } + case KVM_S390_GET_CMMA_BITS: { + struct kvm_s390_cmma_log args; + + r = -EFAULT; + if (copy_from_user(&args, argp, sizeof(args))) + break; + r = kvm_s390_get_cmma_bits(kvm, &args); + if (!r) { + r = copy_to_user(argp, &args, sizeof(args)); + if (r) + r = -EFAULT; + } + break; + } + case KVM_S390_SET_CMMA_BITS: { + struct kvm_s390_cmma_log args; + + r = -EFAULT; + if (copy_from_user(&args, argp, sizeof(args))) + break; + r = kvm_s390_set_cmma_bits(kvm, &args); + break; + } default: r = -ENOTTY; } @@ -1631,6 +1964,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); kvm_s390_vsie_destroy(kvm); + if (kvm->arch.migration_state) { + vfree(kvm->arch.migration_state->pgste_bitmap); + kfree(kvm->arch.migration_state); + } KVM_EVENT(3, "vm 0x%pK destroyed", kvm); } @@ -1975,7 +2312,6 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) if (!vcpu->arch.sie_block->cbrlo) return -ENOMEM; - vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI; return 0; } @@ -2439,7 +2775,7 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) { retry: kvm_s390_vcpu_request_handled(vcpu); - if (!vcpu->requests) + if (!kvm_request_pending(vcpu)) return 0; /* * We use MMU_RELOAD just to re-arm the ipte notifier for the @@ -2488,6 +2824,27 @@ retry: goto retry; } + if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { + /* + * Disable CMMA virtualization; we will emulate the ESSA + * instruction manually, in order to provide additional + * functionalities needed for live migration. + */ + vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; + goto retry; + } + + if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { + /* + * Re-enable CMMA virtualization if CMMA is available and + * was used. + */ + if ((vcpu->kvm->arch.use_cmma) && + (vcpu->kvm->mm->context.use_cmma)) + vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; + goto retry; + } + /* nothing to do, just clear the request */ kvm_clear_request(KVM_REQ_UNHALT, vcpu); @@ -2682,6 +3039,9 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { + struct mcck_volatile_info *mcck_info; + struct sie_page *sie_page; + VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); @@ -2692,6 +3052,15 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; + if (exit_reason == -EINTR) { + VCPU_EVENT(vcpu, 3, "%s", "machine check"); + sie_page = container_of(vcpu->arch.sie_block, + struct sie_page, sie_block); + mcck_info = &sie_page->mcck_info; + kvm_s390_reinject_machine_check(vcpu, mcck_info); + return 0; + } + if (vcpu->arch.sie_block->icptcode > 0) { int rc = kvm_handle_sie_intercept(vcpu); |