diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2023-07-01 07:00:11 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2023-07-01 07:00:11 -0400 |
commit | a443e2609c01479c7c0c3367059d7b9f2e8a6697 (patch) | |
tree | 90c9c2e6acfdf539523301f239fbac69dff4ed0f | |
parent | 6995e2de6891c724bfeb2db33d7b87775f913ad1 (diff) | |
parent | db54dfc9f71cd2df7afd1e88535ef6099cb0333e (diff) |
Merge tag 'kvm-s390-next-6.5-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
* New uvdevice secret API
* New CMM selftest
* cmm fix
* diag 9c racy access of target cpu fix
-rw-r--r-- | arch/s390/boot/uv.c | 4 | ||||
-rw-r--r-- | arch/s390/include/asm/uv.h | 32 | ||||
-rw-r--r-- | arch/s390/include/uapi/asm/uvdevice.h | 53 | ||||
-rw-r--r-- | arch/s390/kernel/uv.c | 108 | ||||
-rw-r--r-- | arch/s390/kvm/diag.c | 8 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 4 | ||||
-rw-r--r-- | arch/s390/kvm/vsie.c | 6 | ||||
-rw-r--r-- | drivers/s390/char/Kconfig | 2 | ||||
-rw-r--r-- | drivers/s390/char/uvdevice.c | 231 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/Makefile | 1 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/s390x/cmma_test.c | 700 |
11 files changed, 1100 insertions, 49 deletions
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index 0a077c0a2056..1e66d2cbb096 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -47,6 +47,10 @@ void uv_query_info(void) uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len; uv_info.supp_att_req_hdr_ver = uvcb.supp_att_req_hdr_ver; uv_info.supp_att_pflags = uvcb.supp_att_pflags; + uv_info.supp_add_secret_req_ver = uvcb.supp_add_secret_req_ver; + uv_info.supp_add_secret_pcf = uvcb.supp_add_secret_pcf; + uv_info.supp_secret_types = uvcb.supp_secret_types; + uv_info.max_secrets = uvcb.max_secrets; } #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 28a9ad57b6f1..d6bb2f4f78d1 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -58,6 +58,9 @@ #define UVC_CMD_SET_SHARED_ACCESS 0x1000 #define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001 #define UVC_CMD_RETR_ATTEST 0x1020 +#define UVC_CMD_ADD_SECRET 0x1031 +#define UVC_CMD_LIST_SECRETS 0x1033 +#define UVC_CMD_LOCK_SECRETS 0x1034 /* Bits in installed uv calls */ enum uv_cmds_inst { @@ -88,6 +91,9 @@ enum uv_cmds_inst { BIT_UVC_CMD_DUMP_CPU = 26, BIT_UVC_CMD_DUMP_COMPLETE = 27, BIT_UVC_CMD_RETR_ATTEST = 28, + BIT_UVC_CMD_ADD_SECRET = 29, + BIT_UVC_CMD_LIST_SECRETS = 30, + BIT_UVC_CMD_LOCK_SECRETS = 31, }; enum uv_feat_ind { @@ -117,7 +123,7 @@ struct uv_cb_qui { u32 reserved70[3]; /* 0x0070 */ u32 max_num_sec_conf; /* 0x007c */ u64 max_guest_stor_addr; /* 0x0080 */ - u8 reserved88[158 - 136]; /* 0x0088 */ + u8 reserved88[0x9e - 0x88]; /* 0x0088 */ u16 max_guest_cpu_id; /* 0x009e */ u64 uv_feature_indications; /* 0x00a0 */ u64 reserveda8; /* 0x00a8 */ @@ -129,7 +135,12 @@ struct uv_cb_qui { u64 reservedd8; /* 0x00d8 */ u64 supp_att_req_hdr_ver; /* 0x00e0 */ u64 supp_att_pflags; /* 0x00e8 */ - u8 reservedf0[256 - 240]; /* 0x00f0 */ + u64 reservedf0; /* 0x00f0 */ + u64 supp_add_secret_req_ver; /* 0x00f8 */ + u64 supp_add_secret_pcf; /* 0x0100 */ + u64 supp_secret_types; /* 0x0180 */ + u16 max_secrets; /* 0x0110 */ + u8 reserved112[0x120 - 0x112]; /* 0x0112 */ } __packed __aligned(8); /* Initialize Ultravisor */ @@ -292,6 +303,19 @@ struct uv_cb_dump_complete { u64 reserved30[5]; } __packed __aligned(8); +/* + * A common UV call struct for pv guests that contains a single address + * Examples: + * Add Secret + * List Secrets + */ +struct uv_cb_guest_addr { + struct uv_cb_header header; + u64 reserved08[3]; + u64 addr; + u64 reserved28[4]; +} __packed __aligned(8); + static inline int __uv_call(unsigned long r1, unsigned long r2) { int cc; @@ -365,6 +389,10 @@ struct uv_info { unsigned long conf_dump_finalize_len; unsigned long supp_att_req_hdr_ver; unsigned long supp_att_pflags; + unsigned long supp_add_secret_req_ver; + unsigned long supp_add_secret_pcf; + unsigned long supp_secret_types; + unsigned short max_secrets; }; extern struct uv_info uv_info; diff --git a/arch/s390/include/uapi/asm/uvdevice.h b/arch/s390/include/uapi/asm/uvdevice.h index 10a5ac918e02..b9c2f14a6af3 100644 --- a/arch/s390/include/uapi/asm/uvdevice.h +++ b/arch/s390/include/uapi/asm/uvdevice.h @@ -32,6 +32,33 @@ struct uvio_attest { __u16 reserved136; /* 0x0136 */ }; +/** + * uvio_uvdev_info - Information of supported functions + * @supp_uvio_cmds - supported IOCTLs by this device + * @supp_uv_cmds - supported UVCs corresponding to the IOCTL + * + * UVIO request to get information about supported request types by this + * uvdevice and the Ultravisor. Everything is output. Bits are in LSB0 + * ordering. If the bit is set in both, @supp_uvio_cmds and @supp_uv_cmds, the + * uvdevice and the Ultravisor support that call. + * + * Note that bit 0 (UVIO_IOCTL_UVDEV_INFO_NR) is always zero for `supp_uv_cmds` + * as there is no corresponding UV-call. + */ +struct uvio_uvdev_info { + /* + * If bit `n` is set, this device supports the IOCTL with nr `n`. + */ + __u64 supp_uvio_cmds; + /* + * If bit `n` is set, the Ultravisor(UV) supports the UV-call + * corresponding to the IOCTL with nr `n` in the calling contextx (host + * or guest). The value is only valid if the corresponding bit in + * @supp_uvio_cmds is set as well. + */ + __u64 supp_uv_cmds; +}; + /* * The following max values define an upper length for the IOCTL in/out buffers. * However, they do not represent the maximum the Ultravisor allows which is @@ -42,10 +69,34 @@ struct uvio_attest { #define UVIO_ATT_ARCB_MAX_LEN 0x100000 #define UVIO_ATT_MEASUREMENT_MAX_LEN 0x8000 #define UVIO_ATT_ADDITIONAL_MAX_LEN 0x8000 +#define UVIO_ADD_SECRET_MAX_LEN 0x100000 +#define UVIO_LIST_SECRETS_LEN 0x1000 #define UVIO_DEVICE_NAME "uv" #define UVIO_TYPE_UVC 'u' -#define UVIO_IOCTL_ATT _IOWR(UVIO_TYPE_UVC, 0x01, struct uvio_ioctl_cb) +enum UVIO_IOCTL_NR { + UVIO_IOCTL_UVDEV_INFO_NR = 0x00, + UVIO_IOCTL_ATT_NR, + UVIO_IOCTL_ADD_SECRET_NR, + UVIO_IOCTL_LIST_SECRETS_NR, + UVIO_IOCTL_LOCK_SECRETS_NR, + /* must be the last entry */ + UVIO_IOCTL_NUM_IOCTLS +}; + +#define UVIO_IOCTL(nr) _IOWR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb) +#define UVIO_IOCTL_UVDEV_INFO UVIO_IOCTL(UVIO_IOCTL_UVDEV_INFO_NR) +#define UVIO_IOCTL_ATT UVIO_IOCTL(UVIO_IOCTL_ATT_NR) +#define UVIO_IOCTL_ADD_SECRET UVIO_IOCTL(UVIO_IOCTL_ADD_SECRET_NR) +#define UVIO_IOCTL_LIST_SECRETS UVIO_IOCTL(UVIO_IOCTL_LIST_SECRETS_NR) +#define UVIO_IOCTL_LOCK_SECRETS UVIO_IOCTL(UVIO_IOCTL_LOCK_SECRETS_NR) + +#define UVIO_SUPP_CALL(nr) (1ULL << (nr)) +#define UVIO_SUPP_UDEV_INFO UVIO_SUPP_CALL(UVIO_IOCTL_UDEV_INFO_NR) +#define UVIO_SUPP_ATT UVIO_SUPP_CALL(UVIO_IOCTL_ATT_NR) +#define UVIO_SUPP_ADD_SECRET UVIO_SUPP_CALL(UVIO_IOCTL_ADD_SECRET_NR) +#define UVIO_SUPP_LIST_SECRETS UVIO_SUPP_CALL(UVIO_IOCTL_LIST_SECRETS_NR) +#define UVIO_SUPP_LOCK_SECRETS UVIO_SUPP_CALL(UVIO_IOCTL_LOCK_SECRETS_NR) #endif /* __S390_ASM_UVDEVICE_H */ diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index cb2ee06df286..273a0281a189 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -23,12 +23,20 @@ int __bootdata_preserved(prot_virt_guest); #endif +/* + * uv_info contains both host and guest information but it's currently only + * expected to be used within modules if it's the KVM module or for + * any PV guest module. + * + * The kernel itself will write these values once in uv_query_info() + * and then make some of them readable via a sysfs interface. + */ struct uv_info __bootdata_preserved(uv_info); +EXPORT_SYMBOL(uv_info); #if IS_ENABLED(CONFIG_KVM) int __bootdata_preserved(prot_virt_host); EXPORT_SYMBOL(prot_virt_host); -EXPORT_SYMBOL(uv_info); static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len) { @@ -460,13 +468,13 @@ EXPORT_SYMBOL_GPL(arch_make_page_accessible); #if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) static ssize_t uv_query_facilities(struct kobject *kobj, - struct kobj_attribute *attr, char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", - uv_info.inst_calls_list[0], - uv_info.inst_calls_list[1], - uv_info.inst_calls_list[2], - uv_info.inst_calls_list[3]); + return sysfs_emit(buf, "%lx\n%lx\n%lx\n%lx\n", + uv_info.inst_calls_list[0], + uv_info.inst_calls_list[1], + uv_info.inst_calls_list[2], + uv_info.inst_calls_list[3]); } static struct kobj_attribute uv_query_facilities_attr = @@ -491,30 +499,27 @@ static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr = __ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL); static ssize_t uv_query_dump_cpu_len(struct kobject *kobj, - struct kobj_attribute *attr, char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "%lx\n", - uv_info.guest_cpu_stor_len); + return sysfs_emit(buf, "%lx\n", uv_info.guest_cpu_stor_len); } static struct kobj_attribute uv_query_dump_cpu_len_attr = __ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL); static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj, - struct kobj_attribute *attr, char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "%lx\n", - uv_info.conf_dump_storage_state_len); + return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_storage_state_len); } static struct kobj_attribute uv_query_dump_storage_state_len_attr = __ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL); static ssize_t uv_query_dump_finalize_len(struct kobject *kobj, - struct kobj_attribute *attr, char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "%lx\n", - uv_info.conf_dump_finalize_len); + return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_finalize_len); } static struct kobj_attribute uv_query_dump_finalize_len_attr = @@ -530,53 +535,86 @@ static struct kobj_attribute uv_query_feature_indications_attr = __ATTR(feature_indications, 0444, uv_query_feature_indications, NULL); static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, - struct kobj_attribute *attr, char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "%d\n", - uv_info.max_guest_cpu_id + 1); + return sysfs_emit(buf, "%d\n", uv_info.max_guest_cpu_id + 1); } static struct kobj_attribute uv_query_max_guest_cpus_attr = __ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL); static ssize_t uv_query_max_guest_vms(struct kobject *kobj, - struct kobj_attribute *attr, char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "%d\n", - uv_info.max_num_sec_conf); + return sysfs_emit(buf, "%d\n", uv_info.max_num_sec_conf); } static struct kobj_attribute uv_query_max_guest_vms_attr = __ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL); static ssize_t uv_query_max_guest_addr(struct kobject *kobj, - struct kobj_attribute *attr, char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "%lx\n", - uv_info.max_sec_stor_addr); + return sysfs_emit(buf, "%lx\n", uv_info.max_sec_stor_addr); } static struct kobj_attribute uv_query_max_guest_addr_attr = __ATTR(max_address, 0444, uv_query_max_guest_addr, NULL); static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj, - struct kobj_attribute *attr, char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_req_hdr_ver); + return sysfs_emit(buf, "%lx\n", uv_info.supp_att_req_hdr_ver); } static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr = __ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL); static ssize_t uv_query_supp_att_pflags(struct kobject *kobj, - struct kobj_attribute *attr, char *page) + struct kobj_attribute *attr, char *buf) { - return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_pflags); + return sysfs_emit(buf, "%lx\n", uv_info.supp_att_pflags); } static struct kobj_attribute uv_query_supp_att_pflags_attr = __ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL); +static ssize_t uv_query_supp_add_secret_req_ver(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_req_ver); +} + +static struct kobj_attribute uv_query_supp_add_secret_req_ver_attr = + __ATTR(supp_add_secret_req_ver, 0444, uv_query_supp_add_secret_req_ver, NULL); + +static ssize_t uv_query_supp_add_secret_pcf(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_pcf); +} + +static struct kobj_attribute uv_query_supp_add_secret_pcf_attr = + __ATTR(supp_add_secret_pcf, 0444, uv_query_supp_add_secret_pcf, NULL); + +static ssize_t uv_query_supp_secret_types(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lx\n", uv_info.supp_secret_types); +} + +static struct kobj_attribute uv_query_supp_secret_types_attr = + __ATTR(supp_secret_types, 0444, uv_query_supp_secret_types, NULL); + +static ssize_t uv_query_max_secrets(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", uv_info.max_secrets); +} + +static struct kobj_attribute uv_query_max_secrets_attr = + __ATTR(max_secrets, 0444, uv_query_max_secrets, NULL); + static struct attribute *uv_query_attrs[] = { &uv_query_facilities_attr.attr, &uv_query_feature_indications_attr.attr, @@ -590,6 +628,10 @@ static struct attribute *uv_query_attrs[] = { &uv_query_dump_cpu_len_attr.attr, &uv_query_supp_att_req_hdr_ver_attr.attr, &uv_query_supp_att_pflags_attr.attr, + &uv_query_supp_add_secret_req_ver_attr.attr, + &uv_query_supp_add_secret_pcf_attr.attr, + &uv_query_supp_secret_types_attr.attr, + &uv_query_max_secrets_attr.attr, NULL, }; @@ -598,18 +640,18 @@ static struct attribute_group uv_query_attr_group = { }; static ssize_t uv_is_prot_virt_guest(struct kobject *kobj, - struct kobj_attribute *attr, char *page) + struct kobj_attribute *attr, char *buf) { int val = 0; #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST val = prot_virt_guest; #endif - return scnprintf(page, PAGE_SIZE, "%d\n", val); + return sysfs_emit(buf, "%d\n", val); } static ssize_t uv_is_prot_virt_host(struct kobject *kobj, - struct kobj_attribute *attr, char *page) + struct kobj_attribute *attr, char *buf) { int val = 0; @@ -617,7 +659,7 @@ static ssize_t uv_is_prot_virt_host(struct kobject *kobj, val = prot_virt_host; #endif - return scnprintf(page, PAGE_SIZE, "%d\n", val); + return sysfs_emit(buf, "%d\n", val); } static struct kobj_attribute uv_prot_virt_guest = diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 807fa9da1e72..3c65b8258ae6 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -166,6 +166,7 @@ static int diag9c_forwarding_overrun(void) static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) { struct kvm_vcpu *tcpu; + int tcpu_cpu; int tid; tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; @@ -181,14 +182,15 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) goto no_yield; /* target guest VCPU already running */ - if (READ_ONCE(tcpu->cpu) >= 0) { + tcpu_cpu = READ_ONCE(tcpu->cpu); + if (tcpu_cpu >= 0) { if (!diag9c_forwarding_hz || diag9c_forwarding_overrun()) goto no_yield; /* target host CPU already running */ - if (!vcpu_is_preempted(tcpu->cpu)) + if (!vcpu_is_preempted(tcpu_cpu)) goto no_yield; - smp_yield_cpu(tcpu->cpu); + smp_yield_cpu(tcpu_cpu); VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: yield forwarded", tid); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 17b81659cdb2..670019696464 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2156,6 +2156,10 @@ static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); ofs = 0; } + + if (cur_gfn < ms->base_gfn) + ofs = 0; + ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); while (ofs >= ms->npages && (mnode = rb_next(mnode))) { ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 8d6b765abf29..0333ee482eb8 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -177,7 +177,8 @@ static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s, sizeof(struct kvm_s390_apcb0))) return -EFAULT; - bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb0)); + bitmap_and(apcb_s, apcb_s, apcb_h, + BITS_PER_BYTE * sizeof(struct kvm_s390_apcb0)); return 0; } @@ -203,7 +204,8 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s, sizeof(struct kvm_s390_apcb1))) return -EFAULT; - bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb1)); + bitmap_and(apcb_s, apcb_s, apcb_h, + BITS_PER_BYTE * sizeof(struct kvm_s390_apcb1)); return 0; } diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index 80c4e5101c97..8a03af5ee5b3 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -96,7 +96,7 @@ config SCLP_OFB config S390_UV_UAPI def_tristate m prompt "Ultravisor userspace API" - depends on S390 + depends on S390 && (KVM || PROTECTED_VIRTUALIZATION_GUEST) help Selecting exposes parts of the UV interface to userspace by providing a misc character device at /dev/uv. diff --git a/drivers/s390/char/uvdevice.c b/drivers/s390/char/uvdevice.c index 1d40457c7b10..144cd2e03590 100644 --- a/drivers/s390/char/uvdevice.c +++ b/drivers/s390/char/uvdevice.c @@ -32,6 +32,55 @@ #include <asm/uvdevice.h> #include <asm/uv.h> +#define BIT_UVIO_INTERNAL U32_MAX +/* Mapping from IOCTL-nr to UVC-bit */ +static const u32 ioctl_nr_to_uvc_bit[] __initconst = { + [UVIO_IOCTL_UVDEV_INFO_NR] = BIT_UVIO_INTERNAL, + [UVIO_IOCTL_ATT_NR] = BIT_UVC_CMD_RETR_ATTEST, + [UVIO_IOCTL_ADD_SECRET_NR] = BIT_UVC_CMD_ADD_SECRET, + [UVIO_IOCTL_LIST_SECRETS_NR] = BIT_UVC_CMD_LIST_SECRETS, + [UVIO_IOCTL_LOCK_SECRETS_NR] = BIT_UVC_CMD_LOCK_SECRETS, +}; + +static_assert(ARRAY_SIZE(ioctl_nr_to_uvc_bit) == UVIO_IOCTL_NUM_IOCTLS); + +static struct uvio_uvdev_info uvdev_info = { + .supp_uvio_cmds = GENMASK_ULL(UVIO_IOCTL_NUM_IOCTLS - 1, 0), +}; + +static void __init set_supp_uv_cmds(unsigned long *supp_uv_cmds) +{ + int i; + + for (i = 0; i < UVIO_IOCTL_NUM_IOCTLS; i++) { + if (ioctl_nr_to_uvc_bit[i] == BIT_UVIO_INTERNAL) + continue; + if (!test_bit_inv(ioctl_nr_to_uvc_bit[i], uv_info.inst_calls_list)) + continue; + __set_bit(i, supp_uv_cmds); + } +} + +/** + * uvio_uvdev_info() - get information about the uvdevice + * + * @uv_ioctl: ioctl control block + * + * Lists all IOCTLs that are supported by this uvdevice + */ +static int uvio_uvdev_info(struct uvio_ioctl_cb *uv_ioctl) +{ + void __user *user_buf_arg = (void __user *)uv_ioctl->argument_addr; + + if (uv_ioctl->argument_len < sizeof(uvdev_info)) + return -EINVAL; + if (copy_to_user(user_buf_arg, &uvdev_info, sizeof(uvdev_info))) + return -EFAULT; + + uv_ioctl->uv_rc = UVC_RC_EXECUTED; + return 0; +} + static int uvio_build_uvcb_attest(struct uv_cb_attest *uvcb_attest, u8 *arcb, u8 *meas, u8 *add_data, struct uvio_attest *uvio_attest) { @@ -185,8 +234,161 @@ out: return ret; } -static int uvio_copy_and_check_ioctl(struct uvio_ioctl_cb *ioctl, void __user *argp) +/** uvio_add_secret() - perform an Add Secret UVC + * + * @uv_ioctl: ioctl control block + * + * uvio_add_secret() performs the Add Secret Ultravisor Call. + * + * The given userspace argument address and size are verified to be + * valid but every other check is made by the Ultravisor + * (UV). Therefore UV errors won't result in a negative return + * value. The request is then copied to kernelspace, the UV-call is + * performed and the results are copied back to userspace. + * + * The argument has to point to an Add Secret Request Control Block + * which is an encrypted and cryptographically verified request that + * inserts a protected guest's secrets into the Ultravisor for later + * use. + * + * If the Add Secret UV facility is not present, UV will return + * invalid command rc. This won't be fenced in the driver and does not + * result in a negative return value. + * + * Context: might sleep + * + * Return: 0 on success or a negative error code on error. + */ +static int uvio_add_secret(struct uvio_ioctl_cb *uv_ioctl) { + void __user *user_buf_arg = (void __user *)uv_ioctl->argument_addr; + struct uv_cb_guest_addr uvcb = { + .header.len = sizeof(uvcb), + .header.cmd = UVC_CMD_ADD_SECRET, + }; + void *asrcb = NULL; + int ret; + + if (uv_ioctl->argument_len > UVIO_ADD_SECRET_MAX_LEN) + return -EINVAL; + if (uv_ioctl->argument_len == 0) + return -EINVAL; + + asrcb = kvzalloc(uv_ioctl->argument_len, GFP_KERNEL); + if (!asrcb) + return -ENOMEM; + + ret = -EFAULT; + if (copy_from_user(asrcb, user_buf_arg, uv_ioctl->argument_len)) + goto out; + + ret = 0; + uvcb.addr = (u64)asrcb; + uv_call_sched(0, (u64)&uvcb); + uv_ioctl->uv_rc = uvcb.header.rc; + uv_ioctl->uv_rrc = uvcb.header.rrc; + +out: + kvfree(asrcb); + return ret; +} + +/** uvio_list_secrets() - perform a List Secret UVC + * @uv_ioctl: ioctl control block + * + * uvio_list_secrets() performs the List Secret Ultravisor Call. It verifies + * that the given userspace argument address is valid and its size is sane. + * Every other check is made by the Ultravisor (UV) and won't result in a + * negative return value. It builds the request, performs the UV-call, and + * copies the result to userspace. + * + * The argument specifies the location for the result of the UV-Call. + * + * If the List Secrets UV facility is not present, UV will return invalid + * command rc. This won't be fenced in the driver and does not result in a + * negative return value. + * + * Context: might sleep + * + * Return: 0 on success or a negative error code on error. + */ +static int uvio_list_secrets(struct uvio_ioctl_cb *uv_ioctl) +{ + void __user *user_buf_arg = (void __user *)uv_ioctl->argument_addr; + struct uv_cb_guest_addr uvcb = { + .header.len = sizeof(uvcb), + .header.cmd = UVC_CMD_LIST_SECRETS, + }; + void *secrets = NULL; + int ret = 0; + + if (uv_ioctl->argument_len != UVIO_LIST_SECRETS_LEN) + return -EINVAL; + + secrets = kvzalloc(UVIO_LIST_SECRETS_LEN, GFP_KERNEL); + if (!secrets) + return -ENOMEM; + + uvcb.addr = (u64)secrets; + uv_call_sched(0, (u64)&uvcb); + uv_ioctl->uv_rc = uvcb.header.rc; + uv_ioctl->uv_rrc = uvcb.header.rrc; + + if (copy_to_user(user_buf_arg, secrets, UVIO_LIST_SECRETS_LEN)) + ret = -EFAULT; + + kvfree(secrets); + return ret; +} + +/** uvio_lock_secrets() - perform a Lock Secret Store UVC + * @uv_ioctl: ioctl control block + * + * uvio_lock_secrets() performs the Lock Secret Store Ultravisor Call. It + * performs the UV-call and copies the return codes to the ioctl control block. + * After this call was dispatched successfully every following Add Secret UVC + * and Lock Secrets UVC will fail with return code 0x102. + * + * The argument address and size must be 0. + * + * If the Lock Secrets UV facility is not present, UV will return invalid + * command rc. This won't be fenced in the driver and does not result in a + * negative return value. + * + * Context: might sleep + * + * Return: 0 on success or a negative error code on error. + */ +static int uvio_lock_secrets(struct uvio_ioctl_cb *ioctl) +{ + struct uv_cb_nodata uvcb = { + .header.len = sizeof(uvcb), + .header.cmd = UVC_CMD_LOCK_SECRETS, + }; + + if (ioctl->argument_addr || ioctl->argument_len) + return -EINVAL; + + uv_call(0, (u64)&uvcb); + ioctl->uv_rc = uvcb.header.rc; + ioctl->uv_rrc = uvcb.header.rrc; + + return 0; +} + +static int uvio_copy_and_check_ioctl(struct uvio_ioctl_cb *ioctl, void __user *argp, + unsigned long cmd) +{ + u8 nr = _IOC_NR(cmd); + + if (_IOC_DIR(cmd) != (_IOC_READ | _IOC_WRITE)) + return -ENOIOCTLCMD; + if (_IOC_TYPE(cmd) != UVIO_TYPE_UVC) + return -ENOIOCTLCMD; + if (nr >= UVIO_IOCTL_NUM_IOCTLS) + return -ENOIOCTLCMD; + if (_IOC_SIZE(cmd) != sizeof(*ioctl)) + return -ENOIOCTLCMD; if (copy_from_user(ioctl, argp, sizeof(*ioctl))) return -EFAULT; if (ioctl->flags != 0) @@ -194,7 +396,7 @@ static int uvio_copy_and_check_ioctl(struct uvio_ioctl_cb *ioctl, void __user *a if (memchr_inv(ioctl->reserved14, 0, sizeof(ioctl->reserved14))) return -EINVAL; - return 0; + return nr; } /* @@ -205,14 +407,28 @@ static long uvio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) void __user *argp = (void __user *)arg; struct uvio_ioctl_cb uv_ioctl = { }; long ret; + int nr; - switch (cmd) { - case UVIO_IOCTL_ATT: - ret = uvio_copy_and_check_ioctl(&uv_ioctl, argp); - if (ret) - return ret; + nr = uvio_copy_and_check_ioctl(&uv_ioctl, argp, cmd); + if (nr < 0) + return nr; + + switch (nr) { + case UVIO_IOCTL_UVDEV_INFO_NR: + ret = uvio_uvdev_info(&uv_ioctl); + break; + case UVIO_IOCTL_ATT_NR: ret = uvio_attestation(&uv_ioctl); break; + case UVIO_IOCTL_ADD_SECRET_NR: + ret = uvio_add_secret(&uv_ioctl); + break; + case UVIO_IOCTL_LIST_SECRETS_NR: + ret = uvio_list_secrets(&uv_ioctl); + break; + case UVIO_IOCTL_LOCK_SECRETS_NR: + ret = uvio_lock_secrets(&uv_ioctl); + break; default: ret = -ENOIOCTLCMD; break; @@ -245,6 +461,7 @@ static void __exit uvio_dev_exit(void) static int __init uvio_dev_init(void) { + set_supp_uv_cmds((unsigned long *)&uvdev_info.supp_uv_cmds); return misc_register(&uvio_dev_miscdev); } diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 4761b768b773..74e0a44f9216 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -164,6 +164,7 @@ TEST_GEN_PROGS_s390x = s390x/memop TEST_GEN_PROGS_s390x += s390x/resets TEST_GEN_PROGS_s390x += s390x/sync_regs_test TEST_GEN_PROGS_s390x += s390x/tprot +TEST_GEN_PROGS_s390x += s390x/cmma_test TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c new file mode 100644 index 000000000000..1d73e78e8fa7 --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/cmma_test.c @@ -0,0 +1,700 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for s390x CMMA migration + * + * Copyright IBM Corp. 2023 + * + * Authors: + * Nico Boehr <nrb@linux.ibm.com> + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" + +#define MAIN_PAGE_COUNT 512 + +#define TEST_DATA_PAGE_COUNT 512 +#define TEST_DATA_MEMSLOT 1 +#define TEST_DATA_START_GFN 4096 + +#define TEST_DATA_TWO_PAGE_COUNT 256 +#define TEST_DATA_TWO_MEMSLOT 2 +#define TEST_DATA_TWO_START_GFN 8192 + +static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT]; + +/** + * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot, + * so use_cmma goes on and the CMMA related ioctls do something. + */ +static void guest_do_one_essa(void) +{ + asm volatile( + /* load TEST_DATA_START_GFN into r1 */ + " llilf 1,%[start_gfn]\n" + /* calculate the address from the gfn */ + " sllg 1,1,12(0)\n" + /* set the first page in TEST_DATA memslot to STABLE */ + " .insn rrf,0xb9ab0000,2,1,1,0\n" + /* hypercall */ + " diag 0,0,0x501\n" + "0: j 0b" + : + : [start_gfn] "L"(TEST_DATA_START_GFN) + : "r1", "r2", "memory", "cc" + ); +} + +/** + * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable + * state. + */ +static void guest_dirty_test_data(void) +{ + asm volatile( + /* r1 = TEST_DATA_START_GFN */ + " xgr 1,1\n" + " llilf 1,%[start_gfn]\n" + /* r5 = TEST_DATA_PAGE_COUNT */ + " lghi 5,%[page_count]\n" + /* r5 += r1 */ + "2: agfr 5,1\n" + /* r2 = r1 << 12 */ + "1: sllg 2,1,12(0)\n" + /* essa(r4, r2, SET_STABLE) */ + " .insn rrf,0xb9ab0000,4,2,1,0\n" + /* i++ */ + " agfi 1,1\n" + /* if r1 < r5 goto 1 */ + " cgrjl 1,5,1b\n" + /* hypercall */ + " diag 0,0,0x501\n" + "0: j 0b" + : + : [start_gfn] "L"(TEST_DATA_START_GFN), + [page_count] "L"(TEST_DATA_PAGE_COUNT) + : + /* the counter in our loop over the pages */ + "r1", + /* the calculated page physical address */ + "r2", + /* ESSA output register */ + "r4", + /* last page */ + "r5", + "cc", "memory" + ); +} + +static struct kvm_vm *create_vm(void) +{ + return ____vm_create(VM_MODE_DEFAULT); +} + +static void create_main_memslot(struct kvm_vm *vm) +{ + int i; + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0); + /* set the array of memslots to zero like __vm_create does */ + for (i = 0; i < NR_MEM_REGIONS; i++) + vm->memslots[i] = 0; +} + +static void create_test_memslot(struct kvm_vm *vm) +{ + vm_userspace_mem_region_add(vm, + VM_MEM_SRC_ANONYMOUS, + TEST_DATA_START_GFN << vm->page_shift, + TEST_DATA_MEMSLOT, + TEST_DATA_PAGE_COUNT, + 0 + ); + vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; +} + +static void create_memslots(struct kvm_vm *vm) +{ + /* + * Our VM has the following memory layout: + * +------+---------------------------+ + * | GFN | Memslot | + * +------+---------------------------+ + * | 0 | | + * | ... | MAIN (Code, Stack, ...) | + * | 511 | | + * +------+---------------------------+ + * | 4096 | | + * | ... | TEST_DATA | + * | 4607 | | + * +------+---------------------------+ + */ + create_main_memslot(vm); + create_test_memslot(vm); +} + +static void finish_vm_setup(struct kvm_vm *vm) +{ + struct userspace_mem_region *slot0; + + kvm_vm_elf_load(vm, program_invocation_name); + + slot0 = memslot2region(vm, 0); + ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); + + kvm_arch_vm_post_create(vm); +} + +static struct kvm_vm *create_vm_two_memslots(void) +{ + struct kvm_vm *vm; + + vm = create_vm(); + + create_memslots(vm); + + finish_vm_setup(vm); + + return vm; +} + +static void enable_cmma(struct kvm_vm *vm) +{ + int r; + + r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL); + TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno); +} + +static void enable_dirty_tracking(struct kvm_vm *vm) +{ + vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES); + vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES); +} + +static int __enable_migration_mode(struct kvm_vm *vm) +{ + return __kvm_device_attr_set(vm->fd, + KVM_S390_VM_MIGRATION, + KVM_S390_VM_MIGRATION_START, + NULL + ); +} + +static void enable_migration_mode(struct kvm_vm *vm) +{ + int r = __enable_migration_mode(vm); + + TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno); +} + +static bool is_migration_mode_on(struct kvm_vm *vm) +{ + u64 out; + int r; + + r = __kvm_device_attr_get(vm->fd, + KVM_S390_VM_MIGRATION, + KVM_S390_VM_MIGRATION_STATUS, + &out + ); + TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno); + return out; +} + +static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out) +{ + struct kvm_s390_cmma_log args; + int rc; + + errno = 0; + + args = (struct kvm_s390_cmma_log){ + .start_gfn = 0, + .count = sizeof(cmma_value_buf), + .flags = flags, + .values = (__u64)&cmma_value_buf[0] + }; + rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + + *errno_out = errno; + return rc; +} + +static void test_get_cmma_basic(void) +{ + struct kvm_vm *vm = create_vm_two_memslots(); + struct kvm_vcpu *vcpu; + int rc, errno_out; + + /* GET_CMMA_BITS without CMMA enabled should fail */ + rc = vm_get_cmma_bits(vm, 0, &errno_out); + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_out, ENXIO); + + enable_cmma(vm); + vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); + + vcpu_run(vcpu); + + /* GET_CMMA_BITS without migration mode and without peeking should fail */ + rc = vm_get_cmma_bits(vm, 0, &errno_out); + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_out, EINVAL); + + /* GET_CMMA_BITS without migration mode and with peeking should work */ + rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out); + ASSERT_EQ(rc, 0); + ASSERT_EQ(errno_out, 0); + + enable_dirty_tracking(vm); + enable_migration_mode(vm); + + /* GET_CMMA_BITS with invalid flags */ + rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out); + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_out, EINVAL); + + kvm_vm_free(vm); +} + +static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu) +{ + ASSERT_EQ(vcpu->run->exit_reason, 13); + ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4); + ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300); + ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000); +} + +static void test_migration_mode(void) +{ + struct kvm_vm *vm = create_vm(); + struct kvm_vcpu *vcpu; + u64 orig_psw; + int rc; + + /* enabling migration mode on a VM without memory should fail */ + rc = __enable_migration_mode(vm); + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno, EINVAL); + TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); + errno = 0; + + create_memslots(vm); + finish_vm_setup(vm); + + enable_cmma(vm); + vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); + orig_psw = vcpu->run->psw_addr; + + /* + * Execute one essa instruction in the guest. Otherwise the guest will + * not have use_cmm enabled and GET_CMMA_BITS will return no pages. + */ + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + /* migration mode when memslots have dirty tracking off should fail */ + rc = __enable_migration_mode(vm); + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno, EINVAL); + TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); + errno = 0; + + /* enable dirty tracking */ + enable_dirty_tracking(vm); + + /* enabling migration mode should work now */ + rc = __enable_migration_mode(vm); + ASSERT_EQ(rc, 0); + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); + errno = 0; + + /* execute another ESSA instruction to see this goes fine */ + vcpu->run->psw_addr = orig_psw; + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + /* + * With migration mode on, create a new memslot with dirty tracking off. + * This should turn off migration mode. + */ + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); + vm_userspace_mem_region_add(vm, + VM_MEM_SRC_ANONYMOUS, + TEST_DATA_TWO_START_GFN << vm->page_shift, + TEST_DATA_TWO_MEMSLOT, + TEST_DATA_TWO_PAGE_COUNT, + 0 + ); + TEST_ASSERT(!is_migration_mode_on(vm), + "creating memslot without dirty tracking turns off migration mode" + ); + + /* ESSA instructions should still execute fine */ + vcpu->run->psw_addr = orig_psw; + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + /* + * Turn on dirty tracking on the new memslot. + * It should be possible to turn migration mode back on again. + */ + vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES); + rc = __enable_migration_mode(vm); + ASSERT_EQ(rc, 0); + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); + errno = 0; + + /* + * Turn off dirty tracking again, this time with just a flag change. + * Again, migration mode should turn off. + */ + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); + vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0); + TEST_ASSERT(!is_migration_mode_on(vm), + "disabling dirty tracking should turn off migration mode" + ); + + /* ESSA instructions should still execute fine */ + vcpu->run->psw_addr = orig_psw; + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + kvm_vm_free(vm); +} + +/** + * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have + * CMMA attributes of all pages in both memslots and nothing more dirty. + * This has the useful side effect of ensuring nothing is CMMA dirty after this + * function. + */ +static void assert_all_slots_cmma_dirty(struct kvm_vm *vm) +{ + struct kvm_s390_cmma_log args; + + /* + * First iteration - everything should be dirty. + * Start at the main memslot... + */ + args = (struct kvm_s390_cmma_log){ + .start_gfn = 0, + .count = sizeof(cmma_value_buf), + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + ASSERT_EQ(args.count, MAIN_PAGE_COUNT); + ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT); + ASSERT_EQ(args.start_gfn, 0); + + /* ...and then - after a hole - the TEST_DATA memslot should follow */ + args = (struct kvm_s390_cmma_log){ + .start_gfn = MAIN_PAGE_COUNT, + .count = sizeof(cmma_value_buf), + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT); + ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN); + ASSERT_EQ(args.remaining, 0); + + /* ...and nothing else should be there */ + args = (struct kvm_s390_cmma_log){ + .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, + .count = sizeof(cmma_value_buf), + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + ASSERT_EQ(args.count, 0); + ASSERT_EQ(args.start_gfn, 0); + ASSERT_EQ(args.remaining, 0); +} + +/** + * Given a VM, assert no pages are CMMA dirty. + */ +static void assert_no_pages_cmma_dirty(struct kvm_vm *vm) +{ + struct kvm_s390_cmma_log args; + + /* If we start from GFN 0 again, nothing should be dirty. */ + args = (struct kvm_s390_cmma_log){ + .start_gfn = 0, + .count = sizeof(cmma_value_buf), + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + if (args.count || args.remaining || args.start_gfn) + TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu", + args.start_gfn, + args.count, + args.remaining + ); +} + +static void test_get_inital_dirty(void) +{ + struct kvm_vm *vm = create_vm_two_memslots(); + struct kvm_vcpu *vcpu; + + enable_cmma(vm); + vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); + + /* + * Execute one essa instruction in the guest. Otherwise the guest will + * not have use_cmm enabled and GET_CMMA_BITS will return no pages. + */ + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + enable_dirty_tracking(vm); + enable_migration_mode(vm); + + assert_all_slots_cmma_dirty(vm); + + /* Start from the beginning again and make sure nothing else is dirty */ + assert_no_pages_cmma_dirty(vm); + + kvm_vm_free(vm); +} + +static void query_cmma_range(struct kvm_vm *vm, + u64 start_gfn, u64 gfn_count, + struct kvm_s390_cmma_log *res_out) +{ + *res_out = (struct kvm_s390_cmma_log){ + .start_gfn = start_gfn, + .count = gfn_count, + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out); +} + +/** + * Assert the given cmma_log struct that was executed by query_cmma_range() + * indicates the first dirty gfn is at first_dirty_gfn and contains exactly + * dirty_gfn_count CMMA values. + */ +static void assert_cmma_dirty(u64 first_dirty_gfn, + u64 dirty_gfn_count, + const struct kvm_s390_cmma_log *res) +{ + ASSERT_EQ(res->start_gfn, first_dirty_gfn); + ASSERT_EQ(res->count, dirty_gfn_count); + for (size_t i = 0; i < dirty_gfn_count; i++) + ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */ + ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */ +} + +static void test_get_skip_holes(void) +{ + size_t gfn_offset; + struct kvm_vm *vm = create_vm_two_memslots(); + struct kvm_s390_cmma_log log; + struct kvm_vcpu *vcpu; + u64 orig_psw; + + enable_cmma(vm); + vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data); + + orig_psw = vcpu->run->psw_addr; + + /* + * Execute some essa instructions in the guest. Otherwise the guest will + * not have use_cmm enabled and GET_CMMA_BITS will return no pages. + */ + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + enable_dirty_tracking(vm); + enable_migration_mode(vm); + + /* un-dirty all pages */ + assert_all_slots_cmma_dirty(vm); + + /* Then, dirty just the TEST_DATA memslot */ + vcpu->run->psw_addr = orig_psw; + vcpu_run(vcpu); + + gfn_offset = TEST_DATA_START_GFN; + /** + * Query CMMA attributes of one page, starting at page 0. Since the + * main memslot was not touched by the VM, this should yield the first + * page of the TEST_DATA memslot. + * The dirty bitmap should now look like this: + * 0: not dirty + * [0x1, 0x200): dirty + */ + query_cmma_range(vm, 0, 1, &log); + assert_cmma_dirty(gfn_offset, 1, &log); + gfn_offset++; + + /** + * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA + * memslot. This should wrap back to the beginning of the TEST_DATA + * memslot, page 1. + * The dirty bitmap should now look like this: + * [0, 0x21): not dirty + * [0x21, 0x200): dirty + */ + query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log); + assert_cmma_dirty(gfn_offset, 0x20, &log); + gfn_offset += 0x20; + + /* Skip 32 pages */ + gfn_offset += 0x20; + + /** + * After skipping 32 pages, query the next 32 (0x20) pages. + * The dirty bitmap should now look like this: + * [0, 0x21): not dirty + * [0x21, 0x41): dirty + * [0x41, 0x61): not dirty + * [0x61, 0x200): dirty + */ + query_cmma_range(vm, gfn_offset, 0x20, &log); + assert_cmma_dirty(gfn_offset, 0x20, &log); + gfn_offset += 0x20; + + /** + * Query 1 page from the beginning of the TEST_DATA memslot. This should + * yield page 0x21. + * The dirty bitmap should now look like this: + * [0, 0x22): not dirty + * [0x22, 0x41): dirty + * [0x41, 0x61): not dirty + * [0x61, 0x200): dirty + */ + query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log); + assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log); + gfn_offset++; + + /** + * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot. + * This should yield pages [0x23, 0x33). + * The dirty bitmap should now look like this: + * [0, 0x22): not dirty + * 0x22: dirty + * [0x23, 0x33): not dirty + * [0x33, 0x41): dirty + * [0x41, 0x61): not dirty + * [0x61, 0x200): dirty + */ + gfn_offset = TEST_DATA_START_GFN + 0x23; + query_cmma_range(vm, gfn_offset, 15, &log); + assert_cmma_dirty(gfn_offset, 15, &log); + + /** + * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot. + * This should yield page [0x22, 0x33) + * The dirty bitmap should now look like this: + * [0, 0x33): not dirty + * [0x33, 0x41): dirty + * [0x41, 0x61): not dirty + * [0x61, 0x200): dirty + */ + gfn_offset = TEST_DATA_START_GFN + 0x22; + query_cmma_range(vm, gfn_offset, 17, &log); + assert_cmma_dirty(gfn_offset, 17, &log); + + /** + * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot. + * This should yield page 0x40 and nothing more, since there are more + * than 16 non-dirty pages after page 0x40. + * The dirty bitmap should now look like this: + * [0, 0x33): not dirty + * [0x33, 0x40): dirty + * [0x40, 0x61): not dirty + * [0x61, 0x200): dirty + */ + gfn_offset = TEST_DATA_START_GFN + 0x40; + query_cmma_range(vm, gfn_offset, 25, &log); + assert_cmma_dirty(gfn_offset, 1, &log); + + /** + * Query pages [0x33, 0x40). + * The dirty bitmap should now look like this: + * [0, 0x61): not dirty + * [0x61, 0x200): dirty + */ + gfn_offset = TEST_DATA_START_GFN + 0x33; + query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log); + assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log); + + /** + * Query the remaining pages [0x61, 0x200). + */ + gfn_offset = TEST_DATA_START_GFN; + query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log); + assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log); + + assert_no_pages_cmma_dirty(vm); +} + +struct testdef { + const char *name; + void (*test)(void); +} testlist[] = { + { "migration mode and dirty tracking", test_migration_mode }, + { "GET_CMMA_BITS: basic calls", test_get_cmma_basic }, + { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty }, + { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes }, +}; + +/** + * The kernel may support CMMA, but the machine may not (i.e. if running as + * guest-3). + * + * In this case, the CMMA capabilities are all there, but the CMMA-related + * ioctls fail. To find out whether the machine supports CMMA, create a + * temporary VM and then query the CMMA feature of the VM. + */ +static int machine_has_cmma(void) +{ + struct kvm_vm *vm = create_vm(); + int r; + + r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA); + kvm_vm_free(vm); + + return r; +} + +int main(int argc, char *argv[]) +{ + int idx; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION)); + TEST_REQUIRE(machine_has_cmma()); + + ksft_print_header(); + + ksft_set_plan(ARRAY_SIZE(testlist)); + + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } + + ksft_finished(); /* Print results and exit() accordingly */ +} |