diff options
author | Jonathan Corbet <corbet@lwn.net> | 2019-07-22 13:42:10 -0600 |
---|---|---|
committer | Jonathan Corbet <corbet@lwn.net> | 2019-07-22 13:42:10 -0600 |
commit | e27a24210aa17b8a0cd462865130fe73afd7e001 (patch) | |
tree | b2d7b8311d280244fad3166792e8f447081a8bc9 /Documentation/virtual/kvm | |
parent | 224d5fd43d250f850d64fb6d668114aff29d7022 (diff) | |
parent | 5f9e832c137075045d15cd6899ab0505cfb2ca4b (diff) |
Merge tag 'v5.3-rc1' into docs-next
Pull in all of the massive docs changes from elsewhere.
Diffstat (limited to 'Documentation/virtual/kvm')
-rw-r--r-- | Documentation/virtual/kvm/api.txt | 81 | ||||
-rw-r--r-- | Documentation/virtual/kvm/arm/psci.txt | 31 | ||||
-rw-r--r-- | Documentation/virtual/kvm/cpuid.rst | 107 | ||||
-rw-r--r-- | Documentation/virtual/kvm/cpuid.txt | 83 | ||||
-rw-r--r-- | Documentation/virtual/kvm/hypercalls.txt | 11 | ||||
-rw-r--r-- | Documentation/virtual/kvm/index.rst | 11 | ||||
-rw-r--r-- | Documentation/virtual/kvm/locking.txt | 4 | ||||
-rw-r--r-- | Documentation/virtual/kvm/msr.txt | 9 |
8 files changed, 235 insertions, 102 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 68984c284c40..e54a3f51ddc5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1079,7 +1079,7 @@ yet and must be cleared on entry. 4.35 KVM_SET_USER_MEMORY_REGION -Capability: KVM_CAP_USER_MEM +Capability: KVM_CAP_USER_MEMORY Architectures: all Type: vm ioctl Parameters: struct kvm_userspace_memory_region (in) @@ -3857,43 +3857,59 @@ Type: vcpu ioctl Parameters: struct kvm_nested_state (in/out) Returns: 0 on success, -1 on error Errors: - E2BIG: the total state size (including the fixed-size part of struct - kvm_nested_state) exceeds the value of 'size' specified by + E2BIG: the total state size exceeds the value of 'size' specified by the user; the size required will be written into size. struct kvm_nested_state { __u16 flags; __u16 format; __u32 size; + union { - struct kvm_vmx_nested_state vmx; - struct kvm_svm_nested_state svm; + struct kvm_vmx_nested_state_hdr vmx; + struct kvm_svm_nested_state_hdr svm; + + /* Pad the header to 128 bytes. */ __u8 pad[120]; - }; - __u8 data[0]; + } hdr; + + union { + struct kvm_vmx_nested_state_data vmx[0]; + struct kvm_svm_nested_state_data svm[0]; + } data; }; #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 +#define KVM_STATE_NESTED_EVMCS 0x00000004 + +#define KVM_STATE_NESTED_FORMAT_VMX 0 +#define KVM_STATE_NESTED_FORMAT_SVM 1 -#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 -#define KVM_STATE_NESTED_SMM_VMXON 0x00000002 +#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000 -struct kvm_vmx_nested_state { +#define KVM_STATE_NESTED_VMX_SMM_GUEST_MODE 0x00000001 +#define KVM_STATE_NESTED_VMX_SMM_VMXON 0x00000002 + +struct kvm_vmx_nested_state_hdr { __u64 vmxon_pa; - __u64 vmcs_pa; + __u64 vmcs12_pa; struct { __u16 flags; } smm; }; +struct kvm_vmx_nested_state_data { + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; + __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; +}; + This ioctl copies the vcpu's nested virtualization state from the kernel to userspace. -The maximum size of the state, including the fixed-size part of struct -kvm_nested_state, can be retrieved by passing KVM_CAP_NESTED_STATE to -the KVM_CHECK_EXTENSION ioctl(). +The maximum size of the state can be retrieved by passing KVM_CAP_NESTED_STATE +to the KVM_CHECK_EXTENSION ioctl(). 4.115 KVM_SET_NESTED_STATE @@ -3903,8 +3919,8 @@ Type: vcpu ioctl Parameters: struct kvm_nested_state (in) Returns: 0 on success, -1 on error -This copies the vcpu's kvm_nested_state struct from userspace to the kernel. For -the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE. +This copies the vcpu's kvm_nested_state struct from userspace to the kernel. +For the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE. 4.116 KVM_(UN)REGISTER_COALESCED_MMIO @@ -4065,6 +4081,37 @@ KVM_ARM_VCPU_FINALIZE call. See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization using this ioctl. +4.120 KVM_SET_PMU_EVENT_FILTER + +Capability: KVM_CAP_PMU_EVENT_FILTER +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_pmu_event_filter (in) +Returns: 0 on success, -1 on error + +struct kvm_pmu_event_filter { + __u32 action; + __u32 nevents; + __u32 fixed_counter_bitmap; + __u32 flags; + __u32 pad[4]; + __u64 events[0]; +}; + +This ioctl restricts the set of PMU events that the guest can program. +The argument holds a list of events which will be allowed or denied. +The eventsel+umask of each event the guest attempts to program is compared +against the events field to determine whether the guest should have access. +The events field only controls general purpose counters; fixed purpose +counters are controlled by the fixed_counter_bitmap. + +No flags are defined yet, the field must be zero. + +Valid values for 'action': +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + + 5. The kvm_run structure ------------------------ @@ -4893,6 +4940,8 @@ Valid bits in args[0] are #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) +#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) +#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) Enabling this capability on a VM provides userspace with a way to no longer intercept some instructions for improved latency in some diff --git a/Documentation/virtual/kvm/arm/psci.txt b/Documentation/virtual/kvm/arm/psci.txt index aafdab887b04..559586fc9d37 100644 --- a/Documentation/virtual/kvm/arm/psci.txt +++ b/Documentation/virtual/kvm/arm/psci.txt @@ -28,3 +28,34 @@ The following register is defined: - Allows any PSCI version implemented by KVM and compatible with v0.2 to be set with SET_ONE_REG - Affects the whole VM (even if the register view is per-vcpu) + +* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + Holds the state of the firmware support to mitigate CVE-2017-5715, as + offered by KVM to the guest via a HVC call. The workaround is described + under SMCCC_ARCH_WORKAROUND_1 in [1]. + Accepted values are: + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer + firmware support for the workaround. The mitigation status for the + guest is unknown. + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is + available to the guest and required for the mitigation. + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call + is available to the guest, but it is not needed on this VCPU. + +* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + Holds the state of the firmware support to mitigate CVE-2018-3639, as + offered by KVM to the guest via a HVC call. The workaround is described + under SMCCC_ARCH_WORKAROUND_2 in [1]. + Accepted values are: + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not + available. KVM does not offer firmware support for the workaround. + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is + unknown. KVM does not offer firmware support for the workaround. + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available, + and can be disabled by a vCPU. If + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for + this vCPU. + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is + always active on this vCPU or it is not needed. + +[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf diff --git a/Documentation/virtual/kvm/cpuid.rst b/Documentation/virtual/kvm/cpuid.rst new file mode 100644 index 000000000000..01b081f6e7ea --- /dev/null +++ b/Documentation/virtual/kvm/cpuid.rst @@ -0,0 +1,107 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============== +KVM CPUID bits +============== + +:Author: Glauber Costa <glommer@gmail.com> + +A guest running on a kvm host, can check some of its features using +cpuid. This is not always guaranteed to work, since userspace can +mask-out some, or even all KVM-related cpuid features before launching +a guest. + +KVM cpuid functions are: + +function: KVM_CPUID_SIGNATURE (0x40000000) + +returns:: + + eax = 0x40000001 + ebx = 0x4b4d564b + ecx = 0x564b4d56 + edx = 0x4d + +Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM". +The value in eax corresponds to the maximum cpuid function present in this leaf, +and will be updated if more functions are added in the future. +Note also that old hosts set eax value to 0x0. This should +be interpreted as if the value was 0x40000001. +This function queries the presence of KVM cpuid leafs. + +function: define KVM_CPUID_FEATURES (0x40000001) + +returns:: + + ebx, ecx + eax = an OR'ed group of (1 << flag) + +where ``flag`` is defined as below: + +================================= =========== ================================ +flag value meaning +================================= =========== ================================ +KVM_FEATURE_CLOCKSOURCE 0 kvmclock available at msrs + 0x11 and 0x12 + +KVM_FEATURE_NOP_IO_DELAY 1 not necessary to perform delays + on PIO operations + +KVM_FEATURE_MMU_OP 2 deprecated + +KVM_FEATURE_CLOCKSOURCE2 3 kvmclock available at msrs + + 0x4b564d00 and 0x4b564d01 +KVM_FEATURE_ASYNC_PF 4 async pf can be enabled by + writing to msr 0x4b564d02 + +KVM_FEATURE_STEAL_TIME 5 steal time can be enabled by + writing to msr 0x4b564d03 + +KVM_FEATURE_PV_EOI 6 paravirtualized end of interrupt + handler can be enabled by + writing to msr 0x4b564d04 + +KVM_FEATURE_PV_UNHAULT 7 guest checks this feature bit + before enabling paravirtualized + spinlock support + +KVM_FEATURE_PV_TLB_FLUSH 9 guest checks this feature bit + before enabling paravirtualized + tlb flush + +KVM_FEATURE_ASYNC_PF_VMEXIT 10 paravirtualized async PF VM EXIT + can be enabled by setting bit 2 + when writing to msr 0x4b564d02 + +KVM_FEATURE_PV_SEND_IPI 11 guest checks this feature bit + before enabling paravirtualized + sebd IPIs + +KVM_FEATURE_PV_POLL_CONTROL 12 host-side polling on HLT can + be disabled by writing + to msr 0x4b564d05. + +KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit + before using paravirtualized + sched yield. + +KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side + per-cpu warps are expeced in + kvmclock +================================= =========== ================================ + +:: + + edx = an OR'ed group of (1 << flag) + +Where ``flag`` here is defined as below: + +================== ============ ================================= +flag value meaning +================== ============ ================================= +KVM_HINTS_REALTIME 0 guest checks this feature bit to + determine that vCPUs are never + preempted for an unlimited time + allowing optimizations +================== ============ ================================= diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt deleted file mode 100644 index 97ca1940a0dc..000000000000 --- a/Documentation/virtual/kvm/cpuid.txt +++ /dev/null @@ -1,83 +0,0 @@ -KVM CPUID bits -Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010 -===================================================== - -A guest running on a kvm host, can check some of its features using -cpuid. This is not always guaranteed to work, since userspace can -mask-out some, or even all KVM-related cpuid features before launching -a guest. - -KVM cpuid functions are: - -function: KVM_CPUID_SIGNATURE (0x40000000) -returns : eax = 0x40000001, - ebx = 0x4b4d564b, - ecx = 0x564b4d56, - edx = 0x4d. -Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM". -The value in eax corresponds to the maximum cpuid function present in this leaf, -and will be updated if more functions are added in the future. -Note also that old hosts set eax value to 0x0. This should -be interpreted as if the value was 0x40000001. -This function queries the presence of KVM cpuid leafs. - - -function: define KVM_CPUID_FEATURES (0x40000001) -returns : ebx, ecx - eax = an OR'ed group of (1 << flag), where each flags is: - - -flag || value || meaning -============================================================================= -KVM_FEATURE_CLOCKSOURCE || 0 || kvmclock available at msrs - || || 0x11 and 0x12. ------------------------------------------------------------------------------- -KVM_FEATURE_NOP_IO_DELAY || 1 || not necessary to perform delays - || || on PIO operations. ------------------------------------------------------------------------------- -KVM_FEATURE_MMU_OP || 2 || deprecated. ------------------------------------------------------------------------------- -KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs - || || 0x4b564d00 and 0x4b564d01 ------------------------------------------------------------------------------- -KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by - || || writing to msr 0x4b564d02 ------------------------------------------------------------------------------- -KVM_FEATURE_STEAL_TIME || 5 || steal time can be enabled by - || || writing to msr 0x4b564d03. ------------------------------------------------------------------------------- -KVM_FEATURE_PV_EOI || 6 || paravirtualized end of interrupt - || || handler can be enabled by writing - || || to msr 0x4b564d04. ------------------------------------------------------------------------------- -KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit - || || before enabling paravirtualized - || || spinlock support. ------------------------------------------------------------------------------- -KVM_FEATURE_PV_TLB_FLUSH || 9 || guest checks this feature bit - || || before enabling paravirtualized - || || tlb flush. ------------------------------------------------------------------------------- -KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit - || || can be enabled by setting bit 2 - || || when writing to msr 0x4b564d02 ------------------------------------------------------------------------------- -KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit - || || before using paravirtualized - || || send IPIs. ------------------------------------------------------------------------------- -KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side - || || per-cpu warps are expected in - || || kvmclock. ------------------------------------------------------------------------------- - - edx = an OR'ed group of (1 << flag), where each flags is: - - -flag || value || meaning -================================================================================== -KVM_HINTS_REALTIME || 0 || guest checks this feature bit to - || || determine that vCPUs are never - || || preempted for an unlimited time, - || || allowing optimizations ----------------------------------------------------------------------------------- diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index da24c138c8d1..da210651f714 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt @@ -141,3 +141,14 @@ a0 corresponds to the APIC ID in the third argument (a2), bit 1 corresponds to the APIC ID a2+1, and so on. Returns the number of CPUs to which the IPIs were delivered successfully. + +7. KVM_HC_SCHED_YIELD +------------------------ +Architecture: x86 +Status: active +Purpose: Hypercall used to yield if the IPI target vCPU is preempted + +a0: destination APIC ID + +Usage example: When sending a call-function IPI-many to vCPUs, yield if +any of the IPI target vCPUs was preempted. diff --git a/Documentation/virtual/kvm/index.rst b/Documentation/virtual/kvm/index.rst new file mode 100644 index 000000000000..0b206a06f5be --- /dev/null +++ b/Documentation/virtual/kvm/index.rst @@ -0,0 +1,11 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=== +KVM +=== + +.. toctree:: + :maxdepth: 2 + + amd-memory-encryption + cpuid diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index 1bb8bcaf8497..635cd6eaf714 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -15,8 +15,6 @@ The acquisition orders for mutexes are as follows: On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. -For spinlocks, kvm_lock is taken outside kvm->mmu_lock. - Everything else is a leaf: no other lock is taken inside the critical sections. @@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above. ------------ Name: kvm_lock -Type: spinlock_t +Type: mutex Arch: any Protects: - vm_list diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt index f3f0d57ced8e..df1f4338b3ca 100644 --- a/Documentation/virtual/kvm/msr.txt +++ b/Documentation/virtual/kvm/msr.txt @@ -273,3 +273,12 @@ MSR_KVM_EOI_EN: 0x4b564d04 guest must both read the least significant bit in the memory area and clear it using a single CPU instruction, such as test and clear, or compare and exchange. + +MSR_KVM_POLL_CONTROL: 0x4b564d05 + Control host-side polling. + + data: Bit 0 enables (1) or disables (0) host-side HLT polling logic. + + KVM guests can request the host not to poll on HLT, for example if + they are performing polling themselves. + |