diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2016-09-20 16:15:05 +0200 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2016-11-03 12:27:51 +0100 |
commit | ad3610919e6f6a4d815c5bf163556b950d6aea56 (patch) | |
tree | 5a2a849d3f1020797701d8a3c993bb4bf342684b /arch/x86 | |
parent | 1b07304c587d4fe572ea50bdefaa6047dc9a6d1a (diff) |
kvm: x86: avoid atomic operations on APICv vmentry
On some benchmarks (e.g. netperf with ioeventfd disabled), APICv
posted interrupts turn out to be slower than interrupt injection via
KVM_REQ_EVENT.
This patch optimizes a bit the IRR update, avoiding expensive atomic
operations in the common case where PI.ON=0 at vmentry or the PIR vector
is mostly zero. This saves at least 20 cycles (1%) per vmexit, as
measured by kvm-unit-tests' inl_from_qemu test (20 runs):
| enable_apicv=1 | enable_apicv=0
| mean stdev | mean stdev
----------|-----------------|------------------
before | 5826 32.65 | 5765 47.09
after | 5809 43.42 | 5777 77.02
Of course, any change in the right column is just placebo effect. :)
The savings are bigger if interrupts are frequent.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/kvm/lapic.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 14 |
2 files changed, 17 insertions, 3 deletions
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6acd76b7e71d..890f218ddd7a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -342,9 +342,11 @@ void __kvm_apic_update_irr(u32 *pir, void *regs) u32 i, pir_val; for (i = 0; i <= 7; i++) { - pir_val = xchg(&pir[i], 0); - if (pir_val) + pir_val = READ_ONCE(pir[i]); + if (pir_val) { + pir_val = xchg(&pir[i], 0); *((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val; + } } } EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index dcb840e8bfe3..a91a5b01f38e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -520,6 +520,12 @@ static inline void pi_set_sn(struct pi_desc *pi_desc) (unsigned long *)&pi_desc->control); } +static inline void pi_clear_on(struct pi_desc *pi_desc) +{ + clear_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->control); +} + static inline int pi_test_on(struct pi_desc *pi_desc) { return test_bit(POSTED_INTR_ON, @@ -4780,9 +4786,15 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - if (!pi_test_and_clear_on(&vmx->pi_desc)) + if (!pi_test_on(&vmx->pi_desc)) return; + pi_clear_on(&vmx->pi_desc); + /* + * IOMMU can write to PIR.ON, so the barrier matters even on UP. + * But on x86 this is just a compiler barrier anyway. + */ + smp_mb__after_atomic(); kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); } |