diff options
-rw-r--r-- | Documentation/virt/kvm/api.rst | 36 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 19 | ||||
-rw-r--r-- | include/uapi/linux/kvm.h | 13 | ||||
-rw-r--r-- | virt/kvm/Kconfig | 4 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 216 |
5 files changed, 288 insertions, 0 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 27d945d5b4e4..081ef09d3148 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6214,6 +6214,42 @@ superset of the features supported by the system. See KVM_SET_USER_MEMORY_REGION. +4.141 KVM_SET_MEMORY_ATTRIBUTES +------------------------------- + +:Capability: KVM_CAP_MEMORY_ATTRIBUTES +:Architectures: x86 +:Type: vm ioctl +:Parameters: struct kvm_memory_attributes (in) +:Returns: 0 on success, <0 on error + +KVM_SET_MEMORY_ATTRIBUTES allows userspace to set memory attributes for a range +of guest physical memory. + +:: + + struct kvm_memory_attributes { + __u64 address; + __u64 size; + __u64 attributes; + __u64 flags; + }; + + #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) + +The address and size must be page aligned. The supported attributes can be +retrieved via ioctl(KVM_CHECK_EXTENSION) on KVM_CAP_MEMORY_ATTRIBUTES. If +executed on a VM, KVM_CAP_MEMORY_ATTRIBUTES precisely returns the attributes +supported by that VM. If executed at system scope, KVM_CAP_MEMORY_ATTRIBUTES +returns all attributes supported by KVM. The only attribute defined at this +time is KVM_MEMORY_ATTRIBUTE_PRIVATE, which marks the associated gfn as being +guest private memory. + +Note, there is no "get" API. Userspace is responsible for explicitly tracking +the state of a gfn/page as needed. + +The "flags" field is reserved for future extensions and must be '0'. + 5. The kvm_run structure ======================== diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 96aa930536b1..68a144cb7dbc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -256,6 +256,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER union kvm_mmu_notifier_arg { pte_t pte; + unsigned long attributes; }; struct kvm_gfn_range { @@ -807,6 +808,10 @@ struct kvm { #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER struct notifier_block pm_notifier; #endif +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES + /* Protected by slots_locks (for writes) and RCU (for reads) */ + struct xarray mem_attr_array; +#endif char stats_id[KVM_STATS_NAME_SIZE]; }; @@ -2338,4 +2343,18 @@ static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, vcpu->run->memory_fault.flags = 0; } +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn) +{ + return xa_to_value(xa_load(&kvm->mem_attr_array, gfn)); +} + +bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, + unsigned long attrs); +bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range); +bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range); +#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ + #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 59010a685007..e8d167e54980 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1220,6 +1220,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 #define KVM_CAP_USER_MEMORY2 231 #define KVM_CAP_MEMORY_FAULT_INFO 232 +#define KVM_CAP_MEMORY_ATTRIBUTES 233 #ifdef KVM_CAP_IRQ_ROUTING @@ -2288,4 +2289,16 @@ struct kvm_s390_zpci_op { /* flags for kvm_s390_zpci_op->u.reg_aen.flags */ #define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) +/* Available with KVM_CAP_MEMORY_ATTRIBUTES */ +#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) + +struct kvm_memory_attributes { + __u64 address; + __u64 size; + __u64 attributes; + __u64 flags; +}; + +#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) + #endif /* __LINUX_KVM_H */ diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index ecae2914c97e..5bd7fcaf9089 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -96,3 +96,7 @@ config KVM_GENERIC_HARDWARE_ENABLING config KVM_GENERIC_MMU_NOTIFIER select MMU_NOTIFIER bool + +config KVM_GENERIC_MEMORY_ATTRIBUTES + select KVM_GENERIC_MMU_NOTIFIER + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7f3291dec7a6..f1a575d39b3b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1211,6 +1211,9 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) spin_lock_init(&kvm->mn_invalidate_lock); rcuwait_init(&kvm->mn_memslots_update_rcuwait); xa_init(&kvm->vcpu_array); +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES + xa_init(&kvm->mem_attr_array); +#endif INIT_LIST_HEAD(&kvm->gpc_list); spin_lock_init(&kvm->gpc_lock); @@ -1391,6 +1394,9 @@ static void kvm_destroy_vm(struct kvm *kvm) } cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES + xa_destroy(&kvm->mem_attr_array); +#endif kvm_arch_free_vm(kvm); preempt_notifier_dec(); hardware_disable_all(); @@ -2397,6 +2403,200 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, } #endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +/* + * Returns true if _all_ gfns in the range [@start, @end) have attributes + * matching @attrs. + */ +bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, + unsigned long attrs) +{ + XA_STATE(xas, &kvm->mem_attr_array, start); + unsigned long index; + bool has_attrs; + void *entry; + + rcu_read_lock(); + + if (!attrs) { + has_attrs = !xas_find(&xas, end - 1); + goto out; + } + + has_attrs = true; + for (index = start; index < end; index++) { + do { + entry = xas_next(&xas); + } while (xas_retry(&xas, entry)); + + if (xas.xa_index != index || xa_to_value(entry) != attrs) { + has_attrs = false; + break; + } + } + +out: + rcu_read_unlock(); + return has_attrs; +} + +static u64 kvm_supported_mem_attributes(struct kvm *kvm) +{ + if (!kvm) + return KVM_MEMORY_ATTRIBUTE_PRIVATE; + + return 0; +} + +static __always_inline void kvm_handle_gfn_range(struct kvm *kvm, + struct kvm_mmu_notifier_range *range) +{ + struct kvm_gfn_range gfn_range; + struct kvm_memory_slot *slot; + struct kvm_memslots *slots; + struct kvm_memslot_iter iter; + bool found_memslot = false; + bool ret = false; + int i; + + gfn_range.arg = range->arg; + gfn_range.may_block = range->may_block; + + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { + slots = __kvm_memslots(kvm, i); + + kvm_for_each_memslot_in_gfn_range(&iter, slots, range->start, range->end) { + slot = iter.slot; + gfn_range.slot = slot; + + gfn_range.start = max(range->start, slot->base_gfn); + gfn_range.end = min(range->end, slot->base_gfn + slot->npages); + if (gfn_range.start >= gfn_range.end) + continue; + + if (!found_memslot) { + found_memslot = true; + KVM_MMU_LOCK(kvm); + if (!IS_KVM_NULL_FN(range->on_lock)) + range->on_lock(kvm); + } + + ret |= range->handler(kvm, &gfn_range); + } + } + + if (range->flush_on_ret && ret) + kvm_flush_remote_tlbs(kvm); + + if (found_memslot) + KVM_MMU_UNLOCK(kvm); +} + +static bool kvm_pre_set_memory_attributes(struct kvm *kvm, + struct kvm_gfn_range *range) +{ + /* + * Unconditionally add the range to the invalidation set, regardless of + * whether or not the arch callback actually needs to zap SPTEs. E.g. + * if KVM supports RWX attributes in the future and the attributes are + * going from R=>RW, zapping isn't strictly necessary. Unconditionally + * adding the range allows KVM to require that MMU invalidations add at + * least one range between begin() and end(), e.g. allows KVM to detect + * bugs where the add() is missed. Relaxing the rule *might* be safe, + * but it's not obvious that allowing new mappings while the attributes + * are in flux is desirable or worth the complexity. + */ + kvm_mmu_invalidate_range_add(kvm, range->start, range->end); + + return kvm_arch_pre_set_memory_attributes(kvm, range); +} + +/* Set @attributes for the gfn range [@start, @end). */ +static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, + unsigned long attributes) +{ + struct kvm_mmu_notifier_range pre_set_range = { + .start = start, + .end = end, + .handler = kvm_pre_set_memory_attributes, + .on_lock = kvm_mmu_invalidate_begin, + .flush_on_ret = true, + .may_block = true, + }; + struct kvm_mmu_notifier_range post_set_range = { + .start = start, + .end = end, + .arg.attributes = attributes, + .handler = kvm_arch_post_set_memory_attributes, + .on_lock = kvm_mmu_invalidate_end, + .may_block = true, + }; + unsigned long i; + void *entry; + int r = 0; + + entry = attributes ? xa_mk_value(attributes) : NULL; + + mutex_lock(&kvm->slots_lock); + + /* Nothing to do if the entire range as the desired attributes. */ + if (kvm_range_has_memory_attributes(kvm, start, end, attributes)) + goto out_unlock; + + /* + * Reserve memory ahead of time to avoid having to deal with failures + * partway through setting the new attributes. + */ + for (i = start; i < end; i++) { + r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT); + if (r) + goto out_unlock; + } + + kvm_handle_gfn_range(kvm, &pre_set_range); + + for (i = start; i < end; i++) { + r = xa_err(xa_store(&kvm->mem_attr_array, i, entry, + GFP_KERNEL_ACCOUNT)); + KVM_BUG_ON(r, kvm); + } + + kvm_handle_gfn_range(kvm, &post_set_range); + +out_unlock: + mutex_unlock(&kvm->slots_lock); + + return r; +} +static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm, + struct kvm_memory_attributes *attrs) +{ + gfn_t start, end; + + /* flags is currently not used. */ + if (attrs->flags) + return -EINVAL; + if (attrs->attributes & ~kvm_supported_mem_attributes(kvm)) + return -EINVAL; + if (attrs->size == 0 || attrs->address + attrs->size < attrs->address) + return -EINVAL; + if (!PAGE_ALIGNED(attrs->address) || !PAGE_ALIGNED(attrs->size)) + return -EINVAL; + + start = attrs->address >> PAGE_SHIFT; + end = (attrs->address + attrs->size) >> PAGE_SHIFT; + + /* + * xarray tracks data using "unsigned long", and as a result so does + * KVM. For simplicity, supports generic attributes only on 64-bit + * architectures. + */ + BUILD_BUG_ON(sizeof(attrs->attributes) != sizeof(unsigned long)); + + return kvm_vm_set_mem_attributes(kvm, start, end, attrs->attributes); +} +#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ + struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { return __gfn_to_memslot(kvm_memslots(kvm), gfn); @@ -4641,6 +4841,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_BINARY_STATS_FD: case KVM_CAP_SYSTEM_EVENT_DATA: return 1; +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES + case KVM_CAP_MEMORY_ATTRIBUTES: + return kvm_supported_mem_attributes(kvm); +#endif default: break; } @@ -5034,6 +5238,18 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ +#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES + case KVM_SET_MEMORY_ATTRIBUTES: { + struct kvm_memory_attributes attrs; + + r = -EFAULT; + if (copy_from_user(&attrs, argp, sizeof(attrs))) + goto out; + + r = kvm_vm_ioctl_set_mem_attributes(kvm, &attrs); + break; + } +#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ case KVM_CREATE_DEVICE: { struct kvm_create_device cd; |