From 4ed3d478c63dc65a02eba774c35116618ea5ff10 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 9 May 2017 14:27:35 +0100 Subject: i386: rewrite way CPUID index is validated Change the nested if statements into a flat format, to make it clearer what validation / capping is being performed on different CPUID index values. NB this changes behaviour when "index > env->cpuid_xlevel2". This won't have any guest-visible effect because no there is no CPUID[0xC0000001] feature supported by TCG, and KVM code will never call cpu_x86_cpuid() with such an index value. Reviewed-by: Eduardo Habkost Signed-off-by: Daniel P. Berrange Message-Id: <20170509132736.10071-2-berrange@redhat.com> Signed-off-by: Eduardo Habkost --- target/i386/cpu.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 7e87031fad..e3182b296f 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2635,28 +2635,23 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, X86CPU *cpu = x86_env_get_cpu(env); CPUState *cs = CPU(cpu); uint32_t pkg_offset; + uint32_t limit; - /* test if maximum index reached */ - if (index & 0x80000000) { - if (index > env->cpuid_xlevel) { - if (env->cpuid_xlevel2 > 0) { - /* Handle the Centaur's CPUID instruction. */ - if (index > env->cpuid_xlevel2) { - index = env->cpuid_xlevel2; - } else if (index < 0xC0000000) { - index = env->cpuid_xlevel; - } - } else { - /* Intel documentation states that invalid EAX input will - * return the same information as EAX=cpuid_level - * (Intel SDM Vol. 2A - Instruction Set Reference - CPUID) - */ - index = env->cpuid_level; - } - } + /* Calculate & apply limits for different index ranges */ + if (index >= 0xC0000000) { + limit = env->cpuid_xlevel2; + } else if (index >= 0x80000000) { + limit = env->cpuid_xlevel; } else { - if (index > env->cpuid_level) - index = env->cpuid_level; + limit = env->cpuid_level; + } + + if (index > limit) { + /* Intel documentation states that invalid EAX input will + * return the same information as EAX=cpuid_level + * (Intel SDM Vol. 2A - Instruction Set Reference - CPUID) + */ + index = env->cpuid_level; } switch(index) { -- cgit v1.2.3 From 0f203430dd88cc6270310956ace58aca639edb59 Mon Sep 17 00:00:00 2001 From: He Chen Date: Thu, 27 Apr 2017 10:35:58 +0800 Subject: numa: Allow setting NUMA distance for different NUMA nodes This patch is going to add SLIT table support in QEMU, and provides additional option `dist` for command `-numa` to allow user set vNUMA distance by QEMU command. With this patch, when a user wants to create a guest that contains several vNUMA nodes and also wants to set distance among those nodes, the QEMU command would like: ``` -numa node,nodeid=0,cpus=0 \ -numa node,nodeid=1,cpus=1 \ -numa node,nodeid=2,cpus=2 \ -numa node,nodeid=3,cpus=3 \ -numa dist,src=0,dst=1,val=21 \ -numa dist,src=0,dst=2,val=31 \ -numa dist,src=0,dst=3,val=41 \ -numa dist,src=1,dst=2,val=21 \ -numa dist,src=1,dst=3,val=31 \ -numa dist,src=2,dst=3,val=21 \ ``` Signed-off-by: He Chen Message-Id: <1493260558-20728-1-git-send-email-he.chen@linux.intel.com> Reviewed-by: Igor Mammedov Reviewed-by: Andrew Jones Signed-off-by: Eduardo Habkost --- hw/acpi/aml-build.c | 26 +++++++++ hw/i386/acpi-build.c | 4 ++ include/hw/acpi/aml-build.h | 1 + include/sysemu/numa.h | 2 + include/sysemu/sysemu.h | 4 ++ numa.c | 137 +++++++++++++++++++++++++++++++++++++++++++- qapi-schema.json | 30 +++++++++- qemu-options.hx | 16 +++++- 8 files changed, 215 insertions(+), 5 deletions(-) diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index c6f2032dec..be496c817c 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -24,6 +24,7 @@ #include "hw/acpi/aml-build.h" #include "qemu/bswap.h" #include "qemu/bitops.h" +#include "sysemu/numa.h" static GArray *build_alloc_array(void) { @@ -1609,3 +1610,28 @@ void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, numamem->base_addr = cpu_to_le64(base); numamem->range_length = cpu_to_le64(len); } + +/* + * ACPI spec 5.2.17 System Locality Distance Information Table + * (Revision 2.0 or later) + */ +void build_slit(GArray *table_data, BIOSLinker *linker) +{ + int slit_start, i, j; + slit_start = table_data->len; + + acpi_data_push(table_data, sizeof(AcpiTableHeader)); + + build_append_int_noprefix(table_data, nb_numa_nodes, 8); + for (i = 0; i < nb_numa_nodes; i++) { + for (j = 0; j < nb_numa_nodes; j++) { + assert(numa_info[i].distance[j]); + build_append_int_noprefix(table_data, numa_info[i].distance[j], 1); + } + } + + build_header(linker, table_data, + (void *)(table_data->data + slit_start), + "SLIT", + table_data->len - slit_start, 1, NULL, NULL); +} diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 1d8c645ed3..c7cc45cc4b 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2707,6 +2707,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) if (pcms->numa_nodes) { acpi_add_table(table_offsets, tables_blob); build_srat(tables_blob, tables->linker, machine); + if (have_numa_distance) { + acpi_add_table(table_offsets, tables_blob); + build_slit(tables_blob, tables->linker); + } } if (acpi_get_mcfg(&mcfg)) { acpi_add_table(table_offsets, tables_blob); diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 00c21f160c..329a0d0c90 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -389,4 +389,5 @@ GCC_FMT_ATTR(2, 3); void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, uint64_t len, int node, MemoryAffinityFlags flags); +void build_slit(GArray *table_data, BIOSLinker *linker); #endif diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 8f09dcf918..0ea1bc086e 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -8,6 +8,7 @@ #include "hw/boards.h" extern int nb_numa_nodes; /* Number of NUMA nodes */ +extern bool have_numa_distance; struct numa_addr_range { ram_addr_t mem_start; @@ -21,6 +22,7 @@ typedef struct node_info { struct HostMemoryBackend *node_memdev; bool present; QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */ + uint8_t distance[MAX_NODES]; } NodeInfo; extern NodeInfo numa_info[MAX_NODES]; diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 15656b7c36..be9e22c955 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -166,6 +166,10 @@ extern int mem_prealloc; #define MAX_NODES 128 #define NUMA_NODE_UNASSIGNED MAX_NODES +#define NUMA_DISTANCE_MIN 10 +#define NUMA_DISTANCE_DEFAULT 20 +#define NUMA_DISTANCE_MAX 254 +#define NUMA_DISTANCE_UNREACHABLE 255 #define MAX_OPTION_ROMS 16 typedef struct QEMUOptionRom { diff --git a/numa.c b/numa.c index 6fc2393ddd..2b3fc69915 100644 --- a/numa.c +++ b/numa.c @@ -51,6 +51,7 @@ static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one. * For all nodes, nodeid < max_numa_nodeid */ int nb_numa_nodes; +bool have_numa_distance; NodeInfo numa_info[MAX_NODES]; void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node) @@ -140,7 +141,7 @@ uint32_t numa_get_node(ram_addr_t addr, Error **errp) return -1; } -static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) +static void parse_numa_node(NumaNodeOptions *node, QemuOpts *opts, Error **errp) { uint16_t nodenr; uint16List *cpus = NULL; @@ -212,6 +213,43 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1); } +static void parse_numa_distance(NumaDistOptions *dist, Error **errp) +{ + uint16_t src = dist->src; + uint16_t dst = dist->dst; + uint8_t val = dist->val; + + if (src >= MAX_NODES || dst >= MAX_NODES) { + error_setg(errp, + "Invalid node %" PRIu16 + ", max possible could be %" PRIu16, + MAX(src, dst), MAX_NODES); + return; + } + + if (!numa_info[src].present || !numa_info[dst].present) { + error_setg(errp, "Source/Destination NUMA node is missing. " + "Please use '-numa node' option to declare it first."); + return; + } + + if (val < NUMA_DISTANCE_MIN) { + error_setg(errp, "NUMA distance (%" PRIu8 ") is invalid, " + "it shouldn't be less than %d.", + val, NUMA_DISTANCE_MIN); + return; + } + + if (src == dst && val != NUMA_DISTANCE_MIN) { + error_setg(errp, "Local distance of node %d should be %d.", + src, NUMA_DISTANCE_MIN); + return; + } + + numa_info[src].distance[dst] = val; + have_numa_distance = true; +} + static int parse_numa(void *opaque, QemuOpts *opts, Error **errp) { NumaOptions *object = NULL; @@ -229,12 +267,18 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp) switch (object->type) { case NUMA_OPTIONS_TYPE_NODE: - numa_node_parse(&object->u.node, opts, &err); + parse_numa_node(&object->u.node, opts, &err); if (err) { goto end; } nb_numa_nodes++; break; + case NUMA_OPTIONS_TYPE_DIST: + parse_numa_distance(&object->u.dist, &err); + if (err) { + goto end; + } + break; default: abort(); } @@ -294,6 +338,75 @@ static void validate_numa_cpus(void) g_free(seen_cpus); } +/* If all node pair distances are symmetric, then only distances + * in one direction are enough. If there is even one asymmetric + * pair, though, then all distances must be provided. The + * distance from a node to itself is always NUMA_DISTANCE_MIN, + * so providing it is never necessary. + */ +static void validate_numa_distance(void) +{ + int src, dst; + bool is_asymmetrical = false; + + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = src; dst < nb_numa_nodes; dst++) { + if (numa_info[src].distance[dst] == 0 && + numa_info[dst].distance[src] == 0) { + if (src != dst) { + error_report("The distance between node %d and %d is " + "missing, at least one distance value " + "between each nodes should be provided.", + src, dst); + exit(EXIT_FAILURE); + } + } + + if (numa_info[src].distance[dst] != 0 && + numa_info[dst].distance[src] != 0 && + numa_info[src].distance[dst] != + numa_info[dst].distance[src]) { + is_asymmetrical = true; + } + } + } + + if (is_asymmetrical) { + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = 0; dst < nb_numa_nodes; dst++) { + if (src != dst && numa_info[src].distance[dst] == 0) { + error_report("At least one asymmetrical pair of " + "distances is given, please provide distances " + "for both directions of all node pairs."); + exit(EXIT_FAILURE); + } + } + } + } +} + +static void complete_init_numa_distance(void) +{ + int src, dst; + + /* Fixup NUMA distance by symmetric policy because if it is an + * asymmetric distance table, it should be a complete table and + * there would not be any missing distance except local node, which + * is verified by validate_numa_distance above. + */ + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = 0; dst < nb_numa_nodes; dst++) { + if (numa_info[src].distance[dst] == 0) { + if (src == dst) { + numa_info[src].distance[dst] = NUMA_DISTANCE_MIN; + } else { + numa_info[src].distance[dst] = numa_info[dst].distance[src]; + } + } + } + } +} + void parse_numa_opts(MachineClass *mc) { int i; @@ -390,6 +503,26 @@ void parse_numa_opts(MachineClass *mc) } validate_numa_cpus(); + + /* QEMU needs at least all unique node pair distances to build + * the whole NUMA distance table. QEMU treats the distance table + * as symmetric by default, i.e. distance A->B == distance B->A. + * Thus, QEMU is able to complete the distance table + * initialization even though only distance A->B is provided and + * distance B->A is not. QEMU knows the distance of a node to + * itself is always 10, so A->A distances may be omitted. When + * the distances of two nodes of a pair differ, i.e. distance + * A->B != distance B->A, then that means the distance table is + * asymmetric. In this case, the distances for both directions + * of all node pairs are required. + */ + if (have_numa_distance) { + /* Validate enough NUMA distance information was provided. */ + validate_numa_distance(); + + /* Validation succeeded, now fill in any missing distances. */ + complete_init_numa_distance(); + } } else { numa_set_mem_node_id(0, ram_size, 0); } diff --git a/qapi-schema.json b/qapi-schema.json index 5728b7f363..f4eef33a44 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -5682,10 +5682,14 @@ ## # @NumaOptionsType: # +# @node: NUMA nodes configuration +# +# @dist: NUMA distance configuration (since 2.10) +# # Since: 2.1 ## { 'enum': 'NumaOptionsType', - 'data': [ 'node' ] } + 'data': [ 'node', 'dist' ] } ## # @NumaOptions: @@ -5698,7 +5702,8 @@ 'base': { 'type': 'NumaOptionsType' }, 'discriminator': 'type', 'data': { - 'node': 'NumaNodeOptions' }} + 'node': 'NumaNodeOptions', + 'dist': 'NumaDistOptions' }} ## # @NumaNodeOptions: @@ -5726,6 +5731,27 @@ '*mem': 'size', '*memdev': 'str' }} +## +# @NumaDistOptions: +# +# Set the distance between 2 NUMA nodes. +# +# @src: source NUMA node. +# +# @dst: destination NUMA node. +# +# @val: NUMA distance from source node to destination node. +# When a node is unreachable from another node, set the distance +# between them to 255. +# +# Since: 2.10 +## +{ 'struct': 'NumaDistOptions', + 'data': { + 'src': 'uint16', + 'dst': 'uint16', + 'val': 'uint8' }} + ## # @HostMemPolicy: # diff --git a/qemu-options.hx b/qemu-options.hx index 70c0ded12e..e10c1454d1 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -139,12 +139,15 @@ ETEXI DEF("numa", HAS_ARG, QEMU_OPTION_numa, "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n" - "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n", QEMU_ARCH_ALL) + "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n" + "-numa dist,src=source,dst=destination,val=distance\n", QEMU_ARCH_ALL) STEXI @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] @itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] +@itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance} @findex -numa Define a NUMA node and assign RAM and VCPUs to it. +Set the NUMA distance from a source node to a destination node. @var{firstcpu} and @var{lastcpu} are CPU indexes. Each @samp{cpus} option represent a contiguous range of CPU indexes @@ -167,6 +170,17 @@ split equally between them. @samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, if one node uses @samp{memdev}, all of them have to use it. +@var{source} and @var{destination} are NUMA node IDs. +@var{distance} is the NUMA distance from @var{source} to @var{destination}. +The distance from a node to itself is always 10. If any pair of nodes is +given a distance, then all pairs must be given distances. Although, when +distances are only given in one direction for each pair of nodes, then +the distances in the opposite directions are assumed to be the same. If, +however, an asymmetrical pair of distances is given for even one node +pair, then all node pairs must be provided distance values for both +directions, even when they are symmetrical. When a node is unreachable +from another node, set the pair's distance to 255. + Note that the -@option{numa} option doesn't allocate any of the specified resources, it just assigns existing resources to NUMA nodes. This means that one still has to use the @option{-m}, -- cgit v1.2.3 From 3bfe57165b4bf86a431099078df422f54598f5c6 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Tue, 2 May 2017 18:29:55 +0200 Subject: numa: equally distribute memory on nodes When there are more nodes than available memory to put the minimum allowed memory by node, all the memory is put on the last node. This is because we put (ram_size / nb_numa_nodes) & ~((1 << mc->numa_mem_align_shift) - 1); on each node, and in this case the value is 0. This is particularly true with pseries, as the memory must be aligned to 256MB. To avoid this problem, this patch uses an error diffusion algorithm [1] to distribute equally the memory on nodes. We introduce numa_auto_assign_ram() function in MachineClass to keep compatibility between machine type versions. The legacy function is used with pseries-2.9, pc-q35-2.9 and pc-i440fx-2.9 (and previous), the new one with all others. Example: qemu-system-ppc64 -S -nographic -nodefaults -monitor stdio -m 1G -smp 8 \ -numa node -numa node -numa node \ -numa node -numa node -numa node Before: (qemu) info numa 6 nodes node 0 cpus: 0 6 node 0 size: 0 MB node 1 cpus: 1 7 node 1 size: 0 MB node 2 cpus: 2 node 2 size: 0 MB node 3 cpus: 3 node 3 size: 0 MB node 4 cpus: 4 node 4 size: 0 MB node 5 cpus: 5 node 5 size: 1024 MB After: (qemu) info numa 6 nodes node 0 cpus: 0 6 node 0 size: 0 MB node 1 cpus: 1 7 node 1 size: 256 MB node 2 cpus: 2 node 2 size: 0 MB node 3 cpus: 3 node 3 size: 256 MB node 4 cpus: 4 node 4 size: 256 MB node 5 cpus: 5 node 5 size: 256 MB [1] https://en.wikipedia.org/wiki/Error_diffusion Signed-off-by: Laurent Vivier Message-Id: <20170502162955.1610-2-lvivier@redhat.com> Reviewed-by: Eduardo Habkost [ehabkost: s/ram_size/size/ at numa_default_auto_assign_ram()] Signed-off-by: Eduardo Habkost --- hw/core/machine.c | 2 ++ hw/i386/pc_piix.c | 2 ++ hw/i386/pc_q35.c | 2 ++ hw/ppc/spapr.c | 1 + include/hw/boards.h | 2 ++ include/qemu/typedefs.h | 1 + include/sysemu/numa.h | 9 +++++++-- numa.c | 49 ++++++++++++++++++++++++++++++++++++++----------- 8 files changed, 55 insertions(+), 13 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index ada9eea483..2482c630c1 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -17,6 +17,7 @@ #include "qapi/visitor.h" #include "hw/sysbus.h" #include "sysemu/sysemu.h" +#include "sysemu/numa.h" #include "qemu/error-report.h" #include "qemu/cutils.h" @@ -400,6 +401,7 @@ static void machine_class_init(ObjectClass *oc, void *data) * On Linux, each node's border has to be 8MB aligned */ mc->numa_mem_align_shift = 23; + mc->numa_auto_assign_ram = numa_default_auto_assign_ram; object_class_property_add_str(oc, "accel", machine_get_accel, machine_set_accel, &error_abort); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 9f102aa388..d468b963fb 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -54,6 +54,7 @@ #endif #include "migration/migration.h" #include "kvm_i386.h" +#include "sysemu/numa.h" #define MAX_IDE_BUS 2 @@ -442,6 +443,7 @@ static void pc_i440fx_2_9_machine_options(MachineClass *m) pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = 1; + m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; } DEFINE_I440FX_MACHINE(v2_9, "pc-i440fx-2.9", NULL, diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index dd792a8547..66303a78cf 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -47,6 +47,7 @@ #include "hw/usb.h" #include "qemu/error-report.h" #include "migration/migration.h" +#include "sysemu/numa.h" /* ICH9 AHCI has 6 ports */ #define MAX_SATA_PORTS 6 @@ -305,6 +306,7 @@ static void pc_q35_2_9_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; + m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; } DEFINE_Q35_MACHINE(v2_9, "pc-q35-2.9", NULL, diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 80d12d005c..bdc31ce56c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3242,6 +3242,7 @@ static void spapr_machine_2_9_class_options(MachineClass *mc) { spapr_machine_2_10_class_options(mc); SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9); + mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; } DEFINE_SPAPR_MACHINE(2_9, "2.9", false); diff --git a/include/hw/boards.h b/include/hw/boards.h index 31d9c72fb0..99458eb859 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -136,6 +136,8 @@ struct MachineClass { int minimum_page_bits; bool has_hotpluggable_cpus; int numa_mem_align_shift; + void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size); HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index f08d327aec..7d8505730c 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -97,5 +97,6 @@ typedef struct SSIBus SSIBus; typedef struct uWireSlave uWireSlave; typedef struct VirtIODevice VirtIODevice; typedef struct Visitor Visitor; +typedef struct node_info NodeInfo; #endif /* QEMU_TYPEDEFS_H */ diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 0ea1bc086e..70e56214e5 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -16,14 +16,14 @@ struct numa_addr_range { QLIST_ENTRY(numa_addr_range) entry; }; -typedef struct node_info { +struct node_info { uint64_t node_mem; unsigned long *node_cpu; struct HostMemoryBackend *node_memdev; bool present; QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */ uint8_t distance[MAX_NODES]; -} NodeInfo; +}; extern NodeInfo numa_info[MAX_NODES]; void parse_numa_opts(MachineClass *mc); @@ -33,6 +33,11 @@ extern QemuOptsList qemu_numa_opts; void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); uint32_t numa_get_node(ram_addr_t addr, Error **errp); +void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size); +void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size); + /* on success returns node index in numa_info, * on failure returns nb_numa_nodes */ diff --git a/numa.c b/numa.c index 2b3fc69915..d753687dec 100644 --- a/numa.c +++ b/numa.c @@ -407,6 +407,42 @@ static void complete_init_numa_distance(void) } } +void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size) +{ + int i; + uint64_t usedmem = 0; + + /* Align each node according to the alignment + * requirements of the machine class + */ + + for (i = 0; i < nb_nodes - 1; i++) { + nodes[i].node_mem = (size / nb_nodes) & + ~((1 << mc->numa_mem_align_shift) - 1); + usedmem += nodes[i].node_mem; + } + nodes[i].node_mem = size - usedmem; +} + +void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size) +{ + int i; + uint64_t usedmem = 0, node_mem; + uint64_t granularity = size / nb_nodes; + uint64_t propagate = 0; + + for (i = 0; i < nb_nodes - 1; i++) { + node_mem = (granularity + propagate) & + ~((1 << mc->numa_mem_align_shift) - 1); + propagate = granularity + propagate - node_mem; + nodes[i].node_mem = node_mem; + usedmem += node_mem; + } + nodes[i].node_mem = size - usedmem; +} + void parse_numa_opts(MachineClass *mc) { int i; @@ -449,17 +485,8 @@ void parse_numa_opts(MachineClass *mc) } } if (i == nb_numa_nodes) { - uint64_t usedmem = 0; - - /* Align each node according to the alignment - * requirements of the machine class - */ - for (i = 0; i < nb_numa_nodes - 1; i++) { - numa_info[i].node_mem = (ram_size / nb_numa_nodes) & - ~((1 << mc->numa_mem_align_shift) - 1); - usedmem += numa_info[i].node_mem; - } - numa_info[i].node_mem = ram_size - usedmem; + assert(mc->numa_auto_assign_ram); + mc->numa_auto_assign_ram(mc, numa_info, nb_numa_nodes, ram_size); } numa_total = 0; -- cgit v1.2.3 From fda4096fca83dcdc72e0fc0e4a1ae6e7724fb5e0 Mon Sep 17 00:00:00 2001 From: He Chen Date: Wed, 3 May 2017 17:17:16 +0800 Subject: tests: acpi: extend cphp and memhp testcase with numa distance check Signed-off-by: He Chen Message-Id: <1493803036-4048-1-git-send-email-he.chen@linux.intel.com> Reviewed-by: Igor Mammedov [ehabkost: regenerated tests/acpi-tst-data, included SLIT table] Signed-off-by: Eduardo Habkost --- tests/acpi-test-data/pc/SLIT.cphp | Bin 0 -> 48 bytes tests/acpi-test-data/pc/SLIT.memhp | Bin 0 -> 48 bytes tests/acpi-test-data/pc/SRAT.memhp | Bin 224 -> 264 bytes tests/acpi-test-data/q35/SLIT.cphp | Bin 0 -> 48 bytes tests/acpi-test-data/q35/SLIT.memhp | Bin 0 -> 48 bytes tests/acpi-test-data/q35/SRAT.memhp | Bin 224 -> 264 bytes tests/bios-tables-test.c | 16 ++++++++++++---- 7 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 tests/acpi-test-data/pc/SLIT.cphp create mode 100644 tests/acpi-test-data/pc/SLIT.memhp create mode 100644 tests/acpi-test-data/q35/SLIT.cphp create mode 100644 tests/acpi-test-data/q35/SLIT.memhp diff --git a/tests/acpi-test-data/pc/SLIT.cphp b/tests/acpi-test-data/pc/SLIT.cphp new file mode 100644 index 0000000000..74ec3b4b46 Binary files /dev/null and b/tests/acpi-test-data/pc/SLIT.cphp differ diff --git a/tests/acpi-test-data/pc/SLIT.memhp b/tests/acpi-test-data/pc/SLIT.memhp new file mode 100644 index 0000000000..74ec3b4b46 Binary files /dev/null and b/tests/acpi-test-data/pc/SLIT.memhp differ diff --git a/tests/acpi-test-data/pc/SRAT.memhp b/tests/acpi-test-data/pc/SRAT.memhp index 66ce9a8981..a7dddf7760 100644 Binary files a/tests/acpi-test-data/pc/SRAT.memhp and b/tests/acpi-test-data/pc/SRAT.memhp differ diff --git a/tests/acpi-test-data/q35/SLIT.cphp b/tests/acpi-test-data/q35/SLIT.cphp new file mode 100644 index 0000000000..74ec3b4b46 Binary files /dev/null and b/tests/acpi-test-data/q35/SLIT.cphp differ diff --git a/tests/acpi-test-data/q35/SLIT.memhp b/tests/acpi-test-data/q35/SLIT.memhp new file mode 100644 index 0000000000..74ec3b4b46 Binary files /dev/null and b/tests/acpi-test-data/q35/SLIT.memhp differ diff --git a/tests/acpi-test-data/q35/SRAT.memhp b/tests/acpi-test-data/q35/SRAT.memhp index 66ce9a8981..a7dddf7760 100644 Binary files a/tests/acpi-test-data/q35/SRAT.memhp and b/tests/acpi-test-data/q35/SRAT.memhp differ diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index 9c96a67053..4e5c65a022 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -723,7 +723,8 @@ static void test_acpi_piix4_tcg_cphp(void) data.machine = MACHINE_PC; data.variant = ".cphp"; test_acpi_one("-smp 2,cores=3,sockets=2,maxcpus=6" - " -numa node -numa node", + " -numa node -numa node" + " -numa dist,src=0,dst=1,val=21", &data); free_test_data(&data); } @@ -736,7 +737,8 @@ static void test_acpi_q35_tcg_cphp(void) data.machine = MACHINE_Q35; data.variant = ".cphp"; test_acpi_one(" -smp 2,cores=3,sockets=2,maxcpus=6" - " -numa node -numa node", + " -numa node -numa node" + " -numa dist,src=0,dst=1,val=21", &data); free_test_data(&data); } @@ -785,7 +787,10 @@ static void test_acpi_q35_tcg_memhp(void) memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".memhp"; - test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", &data); + test_acpi_one(" -m 128,slots=3,maxmem=1G" + " -numa node -numa node" + " -numa dist,src=0,dst=1,val=21", + &data); free_test_data(&data); } @@ -796,7 +801,10 @@ static void test_acpi_piix4_tcg_memhp(void) memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".memhp"; - test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", &data); + test_acpi_one(" -m 128,slots=3,maxmem=1G" + " -numa node -numa node" + " -numa dist,src=0,dst=1,val=21", + &data); free_test_data(&data); } -- cgit v1.2.3 From 63baf8bf014fa00f0a22ce7e47f9b3ddbe891cd9 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 3 May 2017 14:56:55 +0200 Subject: tests: add CPUs to numa node mapping test Signed-off-by: Igor Mammedov Reviewed-by: David Gibson Message-Id: <1493816238-33120-2-git-send-email-imammedo@redhat.com> Reviewed-by: Eduardo Habkost Signed-off-by: Eduardo Habkost --- tests/Makefile.include | 5 +++ tests/numa-test.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 tests/numa-test.c diff --git a/tests/Makefile.include b/tests/Makefile.include index 31931c0d77..16ff8f399f 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -260,6 +260,7 @@ check-qtest-i386-y += tests/test-filter-mirror$(EXESUF) check-qtest-i386-y += tests/test-filter-redirector$(EXESUF) check-qtest-i386-y += tests/postcopy-test$(EXESUF) check-qtest-i386-y += tests/test-x86-cpuid-compat$(EXESUF) +check-qtest-i386-y += tests/numa-test$(EXESUF) check-qtest-x86_64-y += $(check-qtest-i386-y) gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) @@ -300,6 +301,7 @@ check-qtest-ppc64-y += tests/test-netfilter$(EXESUF) check-qtest-ppc64-y += tests/test-filter-mirror$(EXESUF) check-qtest-ppc64-y += tests/test-filter-redirector$(EXESUF) check-qtest-ppc64-y += tests/display-vga-test$(EXESUF) +check-qtest-ppc64-y += tests/numa-test$(EXESUF) check-qtest-ppc64-$(CONFIG_EVENTFD) += tests/ivshmem-test$(EXESUF) check-qtest-sh4-y = tests/endianness-test$(EXESUF) @@ -324,6 +326,8 @@ gcov-files-arm-y += arm-softmmu/hw/block/virtio-blk.c check-qtest-arm-y += tests/test-arm-mptimer$(EXESUF) gcov-files-arm-y += hw/timer/arm_mptimer.c +check-qtest-aarch64-y = tests/numa-test$(EXESUF) + check-qtest-microblazeel-y = $(check-qtest-microblaze-y) check-qtest-xtensaeb-y = $(check-qtest-xtensa-y) @@ -753,6 +757,7 @@ tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-use tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y) tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o tests/test-qapi-util$(EXESUF): tests/test-qapi-util.o $(test-util-obj-y) +tests/numa-test$(EXESUF): tests/numa-test.o tests/migration/stress$(EXESUF): tests/migration/stress.o $(call quiet-command, $(LINKPROG) -static -O3 $(PTHREAD_LIB) -o $@ $< ,"LINK","$(TARGET_DIR)$@") diff --git a/tests/numa-test.c b/tests/numa-test.c new file mode 100644 index 0000000000..f5da0c845b --- /dev/null +++ b/tests/numa-test.c @@ -0,0 +1,106 @@ +/* + * NUMA configuration test cases + * + * Copyright (c) 2017 Red Hat Inc. + * Authors: + * Igor Mammedov + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "libqtest.h" + +static char *make_cli(const char *generic_cli, const char *test_cli) +{ + return g_strdup_printf("%s %s", generic_cli ? generic_cli : "", test_cli); +} + +static char *hmp_info_numa(void) +{ + QDict *resp; + char *s; + + resp = qmp("{ 'execute': 'human-monitor-command', 'arguments': " + "{ 'command-line': 'info numa '} }"); + g_assert(resp); + g_assert(qdict_haskey(resp, "return")); + s = g_strdup(qdict_get_str(resp, "return")); + g_assert(s); + QDECREF(resp); + return s; +} + +static void test_mon_explicit(const void *data) +{ + char *s; + char *cli; + + cli = make_cli(data, "-smp 8 " + "-numa node,nodeid=0,cpus=0-3 " + "-numa node,nodeid=1,cpus=4-7 "); + qtest_start(cli); + + s = hmp_info_numa(); + g_assert(strstr(s, "node 0 cpus: 0 1 2 3")); + g_assert(strstr(s, "node 1 cpus: 4 5 6 7")); + g_free(s); + + qtest_end(); + g_free(cli); +} + +static void test_mon_default(const void *data) +{ + char *s; + char *cli; + + cli = make_cli(data, "-smp 8 -numa node -numa node"); + qtest_start(cli); + + s = hmp_info_numa(); + g_assert(strstr(s, "node 0 cpus: 0 2 4 6")); + g_assert(strstr(s, "node 1 cpus: 1 3 5 7")); + g_free(s); + + qtest_end(); + g_free(cli); +} + +static void test_mon_partial(const void *data) +{ + char *s; + char *cli; + + cli = make_cli(data, "-smp 8 " + "-numa node,nodeid=0,cpus=0-1 " + "-numa node,nodeid=1,cpus=4-5 "); + qtest_start(cli); + + s = hmp_info_numa(); + g_assert(strstr(s, "node 0 cpus: 0 1 2 3 6 7")); + g_assert(strstr(s, "node 1 cpus: 4 5")); + g_free(s); + + qtest_end(); + g_free(cli); +} + +int main(int argc, char **argv) +{ + const char *args = NULL; + const char *arch = qtest_get_arch(); + + if (strcmp(arch, "aarch64") == 0) { + args = "-machine virt"; + } + + g_test_init(&argc, &argv, NULL); + + qtest_add_data_func("/numa/mon/default", args, test_mon_default); + qtest_add_data_func("/numa/mon/cpus/explicit", args, test_mon_explicit); + qtest_add_data_func("/numa/mon/cpus/partial", args, test_mon_partial); + + return g_test_run(); +} -- cgit v1.2.3 From 46de5913b6779b5cf656f62f945409be00f45b94 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 3 May 2017 14:56:56 +0200 Subject: hw/arm/virt: extract mp-affinity calculation in separate function Signed-off-by: Igor Mammedov Message-Id: <1493816238-33120-3-git-send-email-imammedo@redhat.com> Reviewed-by: Andrew Jones Signed-off-by: Eduardo Habkost --- hw/arm/virt.c | 43 ++++++++++++++++++++++++++----------------- target/arm/cpu.c | 12 +++++++++--- target/arm/cpu.h | 2 ++ 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 5f62a0321e..61ae43762a 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1194,6 +1194,29 @@ void virt_machine_done(Notifier *notifier, void *data) virt_build_smbios(vms); } +static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) +{ + uint8_t clustersz = ARM_DEFAULT_CPUS_PER_CLUSTER; + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); + + if (!vmc->disallow_affinity_adjustment) { + /* Adjust MPIDR like 64-bit KVM hosts, which incorporate the + * GIC's target-list limitations. 32-bit KVM hosts currently + * always create clusters of 4 CPUs, but that is expected to + * change when they gain support for gicv3. When KVM is enabled + * it will override the changes we make here, therefore our + * purposes are to make TCG consistent (with 64-bit KVM hosts) + * and to improve SGI efficiency. + */ + if (vms->gic_version == 3) { + clustersz = GICV3_TARGETLIST_BITS; + } else { + clustersz = GIC_TARGETLIST_BITS; + } + } + return arm_cpu_mp_affinity(idx, clustersz); +} + static void machvirt_init(MachineState *machine) { VirtMachineState *vms = VIRT_MACHINE(machine); @@ -1210,7 +1233,6 @@ static void machvirt_init(MachineState *machine) CPUClass *cc; Error *err = NULL; bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0); - uint8_t clustersz; if (!cpu_model) { cpu_model = "cortex-a15"; @@ -1263,10 +1285,8 @@ static void machvirt_init(MachineState *machine) */ if (vms->gic_version == 3) { virt_max_cpus = vms->memmap[VIRT_GIC_REDIST].size / 0x20000; - clustersz = GICV3_TARGETLIST_BITS; } else { virt_max_cpus = GIC_NCPU; - clustersz = GIC_TARGETLIST_BITS; } if (max_cpus > virt_max_cpus) { @@ -1326,20 +1346,9 @@ static void machvirt_init(MachineState *machine) for (n = 0; n < smp_cpus; n++) { Object *cpuobj = object_new(typename); - if (!vmc->disallow_affinity_adjustment) { - /* Adjust MPIDR like 64-bit KVM hosts, which incorporate the - * GIC's target-list limitations. 32-bit KVM hosts currently - * always create clusters of 4 CPUs, but that is expected to - * change when they gain support for gicv3. When KVM is enabled - * it will override the changes we make here, therefore our - * purposes are to make TCG consistent (with 64-bit KVM hosts) - * and to improve SGI efficiency. - */ - uint8_t aff1 = n / clustersz; - uint8_t aff0 = n % clustersz; - object_property_set_int(cpuobj, (aff1 << ARM_AFF1_SHIFT) | aff0, - "mp-affinity", NULL); - } + + object_property_set_int(cpuobj, virt_cpu_mp_affinity(vms, n), + "mp-affinity", NULL); if (!vms->secure) { object_property_set_bool(cpuobj, false, "has_el3", NULL); diff --git a/target/arm/cpu.c b/target/arm/cpu.c index b357aee778..ee1406da12 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -458,6 +458,13 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) } } +uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz) +{ + uint32_t Aff1 = idx / clustersz; + uint32_t Aff0 = idx % clustersz; + return (Aff1 << ARM_AFF1_SHIFT) | Aff0; +} + static void arm_cpu_initfn(Object *obj) { CPUState *cs = CPU(obj); @@ -709,9 +716,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * so these bits always RAZ. */ if (cpu->mp_affinity == ARM64_AFFINITY_INVALID) { - uint32_t Aff1 = cs->cpu_index / ARM_DEFAULT_CPUS_PER_CLUSTER; - uint32_t Aff0 = cs->cpu_index % ARM_DEFAULT_CPUS_PER_CLUSTER; - cpu->mp_affinity = (Aff1 << ARM_AFF1_SHIFT) | Aff0; + cpu->mp_affinity = arm_cpu_mp_affinity(cs->cpu_index, + ARM_DEFAULT_CPUS_PER_CLUSTER); } if (cpu->reset_hivecs) { diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 1055bfef3d..048faed9b9 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -710,6 +710,8 @@ static inline ARMCPU *arm_env_get_cpu(CPUARMState *env) return container_of(env, ARMCPU, env); } +uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz); + #define ENV_GET_CPU(e) CPU(arm_env_get_cpu(e)) #define ENV_OFFSET offsetof(ARMCPU, env) -- cgit v1.2.3 From 17d3d0e2d9fc70631de3116eba33e3b2a63887eb Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 3 May 2017 14:56:57 +0200 Subject: hw/arm/virt: use machine->possible_cpus for storing possible topology info for now precalculate and store mp_afinity in possible_cpus as ARM cpus don't have socket/core/thread-id properties yet. In follow patches possible_cpus will be used for storing and setting NUMA node mapping and replace legacy bitmap based numa_info[node_id].node_cpu/numa_get_node_for_cpu() For the lack of better idea, this patch cannibalizes possible_cpus.cpus[x].props.thread_id so that *_cpu_index_to_props() callback could return addressable by props CPU which will be used by machine_set_cpu_numa_node() in follow up patches to assign a CPU to node. But cannibalizing is fine for now as that thread_id isn't exposed to users (no hotpluggable_cpus callback support for ARM yet) and it will be used only internally until 'device_add cpu' is supported where we can decide on which properties to use. Signed-off-by: Igor Mammedov Message-Id: <1493816238-33120-4-git-send-email-imammedo@redhat.com> Reviewed-by: Andrew Jones Signed-off-by: Eduardo Habkost --- hw/arm/virt.c | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 61ae43762a..e2c5626c52 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1221,6 +1221,8 @@ static void machvirt_init(MachineState *machine) { VirtMachineState *vms = VIRT_MACHINE(machine); VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *possible_cpus; qemu_irq pic[NUM_IRQS]; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *secure_sysmem = NULL; @@ -1344,10 +1346,16 @@ static void machvirt_init(MachineState *machine) exit(1); } - for (n = 0; n < smp_cpus; n++) { - Object *cpuobj = object_new(typename); + possible_cpus = mc->possible_cpu_arch_ids(machine); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; - object_property_set_int(cpuobj, virt_cpu_mp_affinity(vms, n), + if (n >= smp_cpus) { + break; + } + + cpuobj = object_new(typename); + object_property_set_int(cpuobj, possible_cpus->cpus[n].arch_id, "mp-affinity", NULL); if (!vms->secure) { @@ -1527,6 +1535,31 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) } } +static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) +{ + int n; + VirtMachineState *vms = VIRT_MACHINE(ms); + + if (ms->possible_cpus) { + assert(ms->possible_cpus->len == max_cpus); + return ms->possible_cpus; + } + + ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + + sizeof(CPUArchId) * max_cpus); + ms->possible_cpus->len = max_cpus; + for (n = 0; n < ms->possible_cpus->len; n++) { + ms->possible_cpus->cpus[n].arch_id = + virt_cpu_mp_affinity(vms, n); + ms->possible_cpus->cpus[n].props.has_thread_id = true; + ms->possible_cpus->cpus[n].props.thread_id = n; + + /* TODO: add 'has_node/node' here to describe + to which node core belongs */ + } + return ms->possible_cpus; +} + static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -1543,6 +1576,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->pci_allow_0_address = true; /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ mc->minimum_page_bits = 12; + mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; } static const TypeInfo virt_machine_info = { -- cgit v1.2.3 From d9c34f9c6c745f6a8123a872b33990ab564c614a Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 3 May 2017 14:56:58 +0200 Subject: hw/arm/virt: explicitly allocate cpu_index for cpus Currently cpu_index is implicitly auto assigned during cpu.realize() time cpu_exec_realizefn()->cpu_list_add(). It happens to match index in possible_cpus so take control over it and make board initialize cpu_index to possible_cpus index explicitly. It will at least document that board is in control of it and when '-device cpu' support comes it will keep cpu_index stable regardless of order cpus are created so it won't break migration. Within this series it will be used for internal conversion from storing cpu_index based NUMA node bitmaps to property based mapping with possible_cpus, And will allow map cpu_index to a CPU entry in possible_cpus array. Signed-off-by: Igor Mammedov Reviewed-by: Andrew Jones Message-Id: <1493816238-33120-5-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- hw/arm/virt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index e2c5626c52..acc748ec40 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1349,6 +1349,7 @@ static void machvirt_init(MachineState *machine) possible_cpus = mc->possible_cpu_arch_ids(machine); for (n = 0; n < possible_cpus->len; n++) { Object *cpuobj; + CPUState *cs; if (n >= smp_cpus) { break; @@ -1358,6 +1359,9 @@ static void machvirt_init(MachineState *machine) object_property_set_int(cpuobj, possible_cpus->cpus[n].arch_id, "mp-affinity", NULL); + cs = CPU(cpuobj); + cs->cpu_index = n; + if (!vms->secure) { object_property_set_bool(cpuobj, false, "has_el3", NULL); } -- cgit v1.2.3 From ea089eebbd80e61d3c3cd03741dd5d9535c551fc Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:45 +0200 Subject: numa: move source of default CPUs to NUMA node mapping into boards Originally CPU threads were by default assigned in round-robin fashion. However it was causing issues in guest since CPU threads from the same socket/core could be placed on different NUMA nodes. Commit fb43b73b (pc: fix default VCPU to NUMA node mapping) fixed it by grouping threads within a socket on the same node introducing cpu_index_to_socket_id() callback and commit 20bb648d (spapr: Fix default NUMA node allocation for threads) reused callback to fix similar issues for SPAPR machine even though socket doesn't make much sense there. As result QEMU ended up having 3 default distribution rules used by 3 targets /virt-arm, spapr, pc/. In effort of moving NUMA mapping for CPUs into possible_cpus, generalize default mapping in numa.c by making boards decide on default mapping and let them explicitly tell generic numa code to which node a CPU thread belongs to by replacing cpu_index_to_socket_id() with @cpu_index_to_instance_props() which provides default node_id assigned by board to specified cpu_index. Signed-off-by: Igor Mammedov Reviewed-by: Eduardo Habkost Message-Id: <1494415802-227633-2-git-send-email-imammedo@redhat.com> Reviewed-by: David Gibson Signed-off-by: Eduardo Habkost --- hw/arm/virt.c | 20 ++++++++++++++++++-- hw/i386/pc.c | 23 +++++++++++++++++------ hw/ppc/spapr.c | 28 +++++++++++++++++++++------- include/hw/boards.h | 8 ++++++-- include/sysemu/numa.h | 2 +- numa.c | 24 +++++++++++------------- vl.c | 2 +- 7 files changed, 75 insertions(+), 32 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index acc748ec40..dfd6fd446c 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1539,6 +1539,16 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) } } +static CpuInstanceProperties +virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) +{ + MachineClass *mc = MACHINE_GET_CLASS(ms); + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); + + assert(cpu_index < possible_cpus->len); + return possible_cpus->cpus[cpu_index].props; +} + static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) { int n; @@ -1558,8 +1568,13 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) ms->possible_cpus->cpus[n].props.has_thread_id = true; ms->possible_cpus->cpus[n].props.thread_id = n; - /* TODO: add 'has_node/node' here to describe - to which node core belongs */ + /* default distribution of CPUs over NUMA nodes */ + if (nb_numa_nodes) { + /* preset values but do not enable them i.e. 'has_node_id = false', + * numa init code will enable them later if manual mapping wasn't + * present on CLI */ + ms->possible_cpus->cpus[n].props.node_id = n % nb_numa_nodes; + } } return ms->possible_cpus; } @@ -1581,6 +1596,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ mc->minimum_page_bits = 12; mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; + mc->cpu_index_to_instance_props = virt_cpu_index_to_props; } static const TypeInfo virt_machine_info = { diff --git a/hw/i386/pc.c b/hw/i386/pc.c index f3b372a18f..01693d54ca 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2243,12 +2243,14 @@ static void pc_machine_reset(void) } } -static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index) +static CpuInstanceProperties +pc_cpu_index_to_props(MachineState *ms, unsigned cpu_index) { - X86CPUTopoInfo topo; - x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index, - &topo); - return topo.pkg_id; + MachineClass *mc = MACHINE_GET_CLASS(ms); + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); + + assert(cpu_index < possible_cpus->len); + return possible_cpus->cpus[cpu_index].props; } static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) @@ -2280,6 +2282,15 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) ms->possible_cpus->cpus[i].props.core_id = topo.core_id; ms->possible_cpus->cpus[i].props.has_thread_id = true; ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id; + + /* default distribution of CPUs over NUMA nodes */ + if (nb_numa_nodes) { + /* preset values but do not enable them i.e. 'has_node_id = false', + * numa init code will enable them later if manual mapping wasn't + * present on CLI */ + ms->possible_cpus->cpus[i].props.node_id = + topo.pkg_id % nb_numa_nodes; + } } return ms->possible_cpus; } @@ -2322,7 +2333,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->acpi_data_size = 0x20000 + 0x8000; pcmc->save_tsc_khz = true; mc->get_hotplug_handler = pc_get_hotpug_handler; - mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id; + mc->cpu_index_to_instance_props = pc_cpu_index_to_props; mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids; mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index bdc31ce56c..2077e4b3c6 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2981,11 +2981,18 @@ static HotplugHandler *spapr_get_hotplug_handler(MachineState *machine, return NULL; } -static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index) +static CpuInstanceProperties +spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index) { - /* Allocate to NUMA nodes on a "socket" basis (not that concept of - * socket means much for the paravirtualized PAPR platform) */ - return cpu_index / smp_threads / smp_cores; + CPUArchId *core_slot; + MachineClass *mc = MACHINE_GET_CLASS(machine); + + /* make sure possible_cpu are intialized */ + mc->possible_cpu_arch_ids(machine); + /* get CPU core slot containing thread that matches cpu_index */ + core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL); + assert(core_slot); + return core_slot->props; } static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine) @@ -3012,8 +3019,15 @@ static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine) machine->possible_cpus->cpus[i].arch_id = core_id; machine->possible_cpus->cpus[i].props.has_core_id = true; machine->possible_cpus->cpus[i].props.core_id = core_id; - /* TODO: add 'has_node/node' here to describe - to which node core belongs */ + + /* default distribution of CPUs over NUMA nodes */ + if (nb_numa_nodes) { + /* preset values but do not enable them i.e. 'has_node_id = false', + * numa init code will enable them later if manual mapping wasn't + * present on CLI */ + machine->possible_cpus->cpus[i].props.node_id = + core_id / smp_threads / smp_cores % nb_numa_nodes; + } } return machine->possible_cpus; } @@ -3138,7 +3152,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) hc->pre_plug = spapr_machine_device_pre_plug; hc->plug = spapr_machine_device_plug; hc->unplug = spapr_machine_device_unplug; - mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; + mc->cpu_index_to_instance_props = spapr_cpu_index_to_props; mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids; hc->unplug_request = spapr_machine_device_unplug_request; diff --git a/include/hw/boards.h b/include/hw/boards.h index 99458eb859..3ffa255fb8 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -74,7 +74,10 @@ typedef struct { * of HotplugHandler object, which handles hotplug operation * for a given @dev. It may return NULL if @dev doesn't require * any actions to be performed by hotplug handler. - * @cpu_index_to_socket_id: + * @cpu_index_to_instance_props: + * used to provide @cpu_index to socket/core/thread number mapping, allowing + * legacy code to perform maping from cpu_index to topology properties + * Returns: tuple of socket/core/thread ids given cpu_index belongs to. * used to provide @cpu_index to socket number mapping, allowing * a machine to group CPU threads belonging to the same socket/package * Returns: socket number given cpu_index belongs to. @@ -141,7 +144,8 @@ struct MachineClass { HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); - unsigned (*cpu_index_to_socket_id)(unsigned cpu_index); + CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine, + unsigned cpu_index); const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine); }; diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 70e56214e5..027830cf7e 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -26,7 +26,7 @@ struct node_info { }; extern NodeInfo numa_info[MAX_NODES]; -void parse_numa_opts(MachineClass *mc); +void parse_numa_opts(MachineState *ms); void numa_post_machine_init(void); void query_numa_node_mem(uint64_t node_mem[]); extern QemuOptsList qemu_numa_opts; diff --git a/numa.c b/numa.c index d753687dec..bcdfca2309 100644 --- a/numa.c +++ b/numa.c @@ -443,9 +443,10 @@ void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, nodes[i].node_mem = size - usedmem; } -void parse_numa_opts(MachineClass *mc) +void parse_numa_opts(MachineState *ms) { int i; + MachineClass *mc = MACHINE_GET_CLASS(ms); for (i = 0; i < MAX_NODES; i++) { numa_info[i].node_cpu = bitmap_new(max_cpus); @@ -511,21 +512,18 @@ void parse_numa_opts(MachineClass *mc) break; } } - /* Historically VCPUs were assigned in round-robin order to NUMA - * nodes. However it causes issues with guest not handling it nice - * in case where cores/threads from a multicore CPU appear on - * different nodes. So allow boards to override default distribution - * rule grouping VCPUs by socket so that VCPUs from the same socket - * would be on the same node. - */ + + /* assign CPUs to nodes using board provided default mapping */ + if (!mc->cpu_index_to_instance_props) { + error_report("default CPUs to NUMA node mapping isn't supported"); + exit(1); + } if (i == nb_numa_nodes) { for (i = 0; i < max_cpus; i++) { - unsigned node_id = i % nb_numa_nodes; - if (mc->cpu_index_to_socket_id) { - node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes; - } + CpuInstanceProperties props; + props = mc->cpu_index_to_instance_props(ms, i); - set_bit(i, numa_info[node_id].node_cpu); + set_bit(i, numa_info[props.node_id].node_cpu); } } diff --git a/vl.c b/vl.c index 58023fca02..c4705b3335 100644 --- a/vl.c +++ b/vl.c @@ -4503,7 +4503,7 @@ int main(int argc, char **argv, char **envp) default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); - parse_numa_opts(machine_class); + parse_numa_opts(current_machine); if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, NULL)) { -- cgit v1.2.3 From 0b8497f08cb6a933b31c1639ead2da2b536062ec Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:46 +0200 Subject: spapr: add node-id property to sPAPR core it will allow switching from cpu_index to core based numa mapping in follow up patches. Signed-off-by: Igor Mammedov Reviewed-by: David Gibson Message-Id: <1494415802-227633-3-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- hw/ppc/spapr.c | 17 +++++++++++++++++ hw/ppc/spapr_cpu_core.c | 11 ++++++++--- include/hw/ppc/spapr_cpu_core.h | 1 + include/qom/cpu.h | 2 ++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 2077e4b3c6..a952a39836 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2824,9 +2824,11 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); Error *local_err = NULL; CPUCore *cc = CPU_CORE(dev); + sPAPRCPUCore *sc = SPAPR_CPU_CORE(dev); char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model); const char *type = object_get_typename(OBJECT(dev)); CPUArchId *core_slot; + int node_id; int index; if (dev->hotplugged && !mc->has_hotpluggable_cpus) { @@ -2861,6 +2863,21 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } + node_id = numa_get_node_for_cpu(cc->core_id); + if (node_id == nb_numa_nodes) { + /* by default CPUState::numa_node was 0 if it's not set via CLI + * keep it this way for now but in future we probably should + * refuse to start up with incomplete numa mapping */ + node_id = 0; + } + if (sc->node_id == CPU_UNSET_NUMA_NODE_ID) { + sc->node_id = node_id; + } else if (sc->node_id != node_id) { + error_setg(&local_err, "node-id %d must match numa node specified" + "with -numa option for cpu-index %d", sc->node_id, cc->core_id); + goto out; + } + out: g_free(base_core_type); error_propagate(errp, local_err); diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 4389ef4c2a..9de7a5610c 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -176,7 +176,6 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) const char *typename = object_class_get_name(scc->cpu_class); size_t size = object_type_get_instance_size(typename); Error *local_err = NULL; - int core_node_id = numa_get_node_for_cpu(cc->core_id);; void *obj; int i, j; @@ -194,10 +193,10 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) /* Set NUMA node for the added CPUs */ node_id = numa_get_node_for_cpu(cs->cpu_index); - if (node_id != core_node_id) { + if (node_id != sc->node_id) { error_setg(&local_err, "Invalid node-id=%d of thread[cpu-index: %d]" " on CPU[core-id: %d, node-id: %d], node-id must be the same", - node_id, cs->cpu_index, cc->core_id, core_node_id); + node_id, cs->cpu_index, cc->core_id, sc->node_id); goto err; } if (node_id < nb_numa_nodes) { @@ -263,6 +262,11 @@ static const char *spapr_core_models[] = { "POWER9_v1.0", }; +static Property spapr_cpu_core_properties[] = { + DEFINE_PROP_INT32("node-id", sPAPRCPUCore, node_id, CPU_UNSET_NUMA_NODE_ID), + DEFINE_PROP_END_OF_LIST() +}; + void spapr_cpu_core_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); @@ -270,6 +274,7 @@ void spapr_cpu_core_class_init(ObjectClass *oc, void *data) dc->realize = spapr_cpu_core_realize; dc->unrealize = spapr_cpu_core_unrealizefn; + dc->props = spapr_cpu_core_properties; scc->cpu_class = cpu_class_by_name(TYPE_POWERPC_CPU, data); g_assert(scc->cpu_class); } diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h index 3c35665221..93051e9ecf 100644 --- a/include/hw/ppc/spapr_cpu_core.h +++ b/include/hw/ppc/spapr_cpu_core.h @@ -27,6 +27,7 @@ typedef struct sPAPRCPUCore { /*< public >*/ void *threads; + int node_id; } sPAPRCPUCore; typedef struct sPAPRCPUCoreClass { diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 5d10359c8f..55214ce131 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -258,6 +258,8 @@ typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data); struct qemu_work_item; +#define CPU_UNSET_NUMA_NODE_ID -1 + /** * CPUState: * @cpu_index: CPU index (informative). -- cgit v1.2.3 From 93b2a8cb0bb6195ab6001fe05d4896a7aaaed639 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:47 +0200 Subject: pc: add node-id property to CPU it will allow switching from cpu_index to property based numa mapping in follow up patches. PS: patch changes default value of CPUState::numa_node from 0 to CPU_UNSET_NUMA_NODE_ID. The only place for x86 that would affected is monitor's 'infor numa' command which uses that field. However legacy 0 value is still preserved by pc_cpu_pre_plug() in this patch if user/numa.c hasn't set it explicitly, so there is no change in behavior. Signed-off-by: Igor Mammedov Message-Id: <1494415802-227633-4-git-send-email-imammedo@redhat.com> Reviewed-by: Eduardo Habkost Signed-off-by: Eduardo Habkost --- hw/i386/pc.c | 17 +++++++++++++++++ target/i386/cpu.c | 1 + 2 files changed, 18 insertions(+) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 01693d54ca..455300f776 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1893,6 +1893,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { int idx; + int node_id; CPUState *cs; CPUArchId *cpu_slot; X86CPUTopoInfo topo; @@ -1982,6 +1983,22 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, cs = CPU(cpu); cs->cpu_index = idx; + + node_id = numa_get_node_for_cpu(cs->cpu_index); + if (node_id == nb_numa_nodes) { + /* by default CPUState::numa_node was 0 if it's not set via CLI + * keep it this way for now but in future we probably should + * refuse to start up with incomplete numa mapping */ + node_id = 0; + } + if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) { + cs->numa_node = node_id; + } else if (cs->numa_node != node_id) { + error_setg(errp, "node-id %d must match numa node specified" + "with -numa option for cpu-index %d", + cs->numa_node, cs->cpu_index); + return; + } } static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, diff --git a/target/i386/cpu.c b/target/i386/cpu.c index e3182b296f..5e768404a1 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3986,6 +3986,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1), DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1), #endif + DEFINE_PROP_INT32("node-id", CPUState, numa_node, CPU_UNSET_NUMA_NODE_ID), DEFINE_PROP_BOOL("pmu", X86CPU, enable_pmu, false), { .name = "hv-spinlocks", .info = &qdev_prop_spinlocks }, DEFINE_PROP_BOOL("hv-relaxed", X86CPU, hyperv_relaxed_timing, false), -- cgit v1.2.3 From bd4c1bfe3e2736e5bb3e5238ede718843b175cc6 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:48 +0200 Subject: virt-arm: add node-id property to CPU it will allow switching from cpu_index to property based numa mapping in follow up patches. Signed-off-by: Igor Mammedov Reviewed-by: Andrew Jones Message-Id: <1494415802-227633-5-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- hw/arm/virt.c | 15 +++++++++++++++ target/arm/cpu.c | 1 + 2 files changed, 16 insertions(+) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index dfd6fd446c..653b4d76fd 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1350,6 +1350,7 @@ static void machvirt_init(MachineState *machine) for (n = 0; n < possible_cpus->len; n++) { Object *cpuobj; CPUState *cs; + int node_id; if (n >= smp_cpus) { break; @@ -1362,6 +1363,20 @@ static void machvirt_init(MachineState *machine) cs = CPU(cpuobj); cs->cpu_index = n; + node_id = numa_get_node_for_cpu(cs->cpu_index); + if (node_id == nb_numa_nodes) { + /* by default CPUState::numa_node was 0 if it's not set via CLI + * keep it this way for now but in future we probably should + * refuse to start up with incomplete numa mapping */ + node_id = 0; + } + if (cs->numa_node == CPU_UNSET_NUMA_NODE_ID) { + cs->numa_node = node_id; + } else { + /* CPU isn't device_add compatible yet, this shouldn't happen */ + error_setg(&error_abort, "user set node-id not implemented"); + } + if (!vms->secure) { object_property_set_bool(cpuobj, false, "has_el3", NULL); } diff --git a/target/arm/cpu.c b/target/arm/cpu.c index ee1406da12..c185eb19ac 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1573,6 +1573,7 @@ static Property arm_cpu_properties[] = { DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0), DEFINE_PROP_UINT64("mp-affinity", ARMCPU, mp_affinity, ARM64_AFFINITY_INVALID), + DEFINE_PROP_INT32("node-id", CPUState, numa_node, CPU_UNSET_NUMA_NODE_ID), DEFINE_PROP_END_OF_LIST() }; -- cgit v1.2.3 From 64c2a8f6d3facc2f758907c3b95686fe9e999590 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:49 +0200 Subject: numa: add check that board supports cpu_index to node mapping Default node mapping initialization already checks that board supports cpu_index to node mapping and refuses to start if it's not supported. Do the same for explicitly provided mapping "-numa node,cpus=..." Signed-off-by: Igor Mammedov Reviewed-by: Andrew Jones Reviewed-by: David Gibson Message-Id: <1494415802-227633-6-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- numa.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/numa.c b/numa.c index bcdfca2309..718248161c 100644 --- a/numa.c +++ b/numa.c @@ -141,10 +141,12 @@ uint32_t numa_get_node(ram_addr_t addr, Error **errp) return -1; } -static void parse_numa_node(NumaNodeOptions *node, QemuOpts *opts, Error **errp) +static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, + QemuOpts *opts, Error **errp) { uint16_t nodenr; uint16List *cpus = NULL; + MachineClass *mc = MACHINE_GET_CLASS(ms); if (node->has_nodeid) { nodenr = node->nodeid; @@ -163,6 +165,10 @@ static void parse_numa_node(NumaNodeOptions *node, QemuOpts *opts, Error **errp) return; } + if (!mc->cpu_index_to_instance_props) { + error_report("NUMA is not supported by this machine-type"); + exit(1); + } for (cpus = node->cpus; cpus; cpus = cpus->next) { if (cpus->value >= max_cpus) { error_setg(errp, @@ -253,6 +259,7 @@ static void parse_numa_distance(NumaDistOptions *dist, Error **errp) static int parse_numa(void *opaque, QemuOpts *opts, Error **errp) { NumaOptions *object = NULL; + MachineState *ms = opaque; Error *err = NULL; { @@ -267,7 +274,7 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp) switch (object->type) { case NUMA_OPTIONS_TYPE_NODE: - parse_numa_node(&object->u.node, opts, &err); + parse_numa_node(ms, &object->u.node, opts, &err); if (err) { goto end; } @@ -452,7 +459,7 @@ void parse_numa_opts(MachineState *ms) numa_info[i].node_cpu = bitmap_new(max_cpus); } - if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, NULL, NULL)) { + if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, NULL)) { exit(1); } -- cgit v1.2.3 From 7c88e65d9e9ff7df7fa9cff1869d64a0eaac63a1 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:50 +0200 Subject: numa: mirror cpu to node mapping in MachineState::possible_cpus Introduce machine_set_cpu_numa_node() helper that stores node mapping for CPU in MachineState::possible_cpus. CPU and node it belongs to is specified by 'props' argument. Patch doesn't remove old way of storing mapping in numa_info[X].node_cpu as removing it at the same time makes patch rather big. Instead it just mirrors mapping in possible_cpus and follow up per target patches will switch to possible_cpus and numa_info[X].node_cpu will be removed once there isn't any users left. Signed-off-by: Igor Mammedov Reviewed-by: David Gibson Reviewed-by: Andrew Jones Message-Id: <1494415802-227633-7-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- hw/core/machine.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++ include/hw/boards.h | 3 ++ numa.c | 8 +++++ 3 files changed, 107 insertions(+) diff --git a/hw/core/machine.c b/hw/core/machine.c index 2482c630c1..420c8c4d16 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -389,6 +389,102 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine) return head; } +/** + * machine_set_cpu_numa_node: + * @machine: machine object to modify + * @props: specifies which cpu objects to assign to + * numa node specified by @props.node_id + * @errp: if an error occurs, a pointer to an area to store the error + * + * Associate NUMA node specified by @props.node_id with cpu slots that + * match socket/core/thread-ids specified by @props. It's recommended to use + * query-hotpluggable-cpus.props values to specify affected cpu slots, + * which would lead to exact 1:1 mapping of cpu slots to NUMA node. + * + * However for CLI convenience it's possible to pass in subset of properties, + * which would affect all cpu slots that match it. + * Ex for pc machine: + * -smp 4,cores=2,sockets=2 -numa node,nodeid=0 -numa node,nodeid=1 \ + * -numa cpu,node-id=0,socket_id=0 \ + * -numa cpu,node-id=1,socket_id=1 + * will assign all child cores of socket 0 to node 0 and + * of socket 1 to node 1. + * + * On attempt of reassigning (already assigned) cpu slot to another NUMA node, + * return error. + * Empty subset is disallowed and function will return with error in this case. + */ +void machine_set_cpu_numa_node(MachineState *machine, + const CpuInstanceProperties *props, Error **errp) +{ + MachineClass *mc = MACHINE_GET_CLASS(machine); + bool match = false; + int i; + + if (!mc->possible_cpu_arch_ids) { + error_setg(errp, "mapping of CPUs to NUMA node is not supported"); + return; + } + + /* disabling node mapping is not supported, forbid it */ + assert(props->has_node_id); + + /* force board to initialize possible_cpus if it hasn't been done yet */ + mc->possible_cpu_arch_ids(machine); + + for (i = 0; i < machine->possible_cpus->len; i++) { + CPUArchId *slot = &machine->possible_cpus->cpus[i]; + + /* reject unsupported by board properties */ + if (props->has_thread_id && !slot->props.has_thread_id) { + error_setg(errp, "thread-id is not supported"); + return; + } + + if (props->has_core_id && !slot->props.has_core_id) { + error_setg(errp, "core-id is not supported"); + return; + } + + if (props->has_socket_id && !slot->props.has_socket_id) { + error_setg(errp, "socket-id is not supported"); + return; + } + + /* skip slots with explicit mismatch */ + if (props->has_thread_id && props->thread_id != slot->props.thread_id) { + continue; + } + + if (props->has_core_id && props->core_id != slot->props.core_id) { + continue; + } + + if (props->has_socket_id && props->socket_id != slot->props.socket_id) { + continue; + } + + /* reject assignment if slot is already assigned, for compatibility + * of legacy cpu_index mapping with SPAPR core based mapping do not + * error out if cpu thread and matched core have the same node-id */ + if (slot->props.has_node_id && + slot->props.node_id != props->node_id) { + error_setg(errp, "CPU is already assigned to node-id: %" PRId64, + slot->props.node_id); + return; + } + + /* assign slot to node as it's matched '-numa cpu' key */ + match = true; + slot->props.node_id = props->node_id; + slot->props.has_node_id = props->has_node_id; + } + + if (!match) { + error_setg(errp, "no match found"); + } +} + static void machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); diff --git a/include/hw/boards.h b/include/hw/boards.h index 3ffa255fb8..4e14ff060e 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -42,6 +42,9 @@ bool machine_dump_guest_core(MachineState *machine); bool machine_mem_merge(MachineState *machine); void machine_register_compat_props(MachineState *machine); HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine); +void machine_set_cpu_numa_node(MachineState *machine, + const CpuInstanceProperties *props, + Error **errp); /** * CPUArchId: diff --git a/numa.c b/numa.c index 718248161c..7db5dde873 100644 --- a/numa.c +++ b/numa.c @@ -170,6 +170,7 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, exit(1); } for (cpus = node->cpus; cpus; cpus = cpus->next) { + CpuInstanceProperties props; if (cpus->value >= max_cpus) { error_setg(errp, "CPU index (%" PRIu16 ")" @@ -178,6 +179,10 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, return; } bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); + props = mc->cpu_index_to_instance_props(ms, cpus->value); + props.node_id = nodenr; + props.has_node_id = true; + machine_set_cpu_numa_node(ms, &props, &error_fatal); } if (node->has_mem && node->has_memdev) { @@ -528,9 +533,12 @@ void parse_numa_opts(MachineState *ms) if (i == nb_numa_nodes) { for (i = 0; i < max_cpus; i++) { CpuInstanceProperties props; + /* fetch default mapping from board and enable it */ props = mc->cpu_index_to_instance_props(ms, i); + props.has_node_id = true; set_bit(i, numa_info[props.node_id].node_cpu); + machine_set_cpu_numa_node(ms, &props, &error_fatal); } } -- cgit v1.2.3 From af9b20e8d21cb692e9411963a532b2486f2a1e65 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:51 +0200 Subject: numa: do default mapping based on possible_cpus instead of node_cpu bitmaps Signed-off-by: Igor Mammedov Reviewed-by: David Gibson Reviewed-by: Andrew Jones Message-Id: <1494415802-227633-8-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- numa.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/numa.c b/numa.c index 7db5dde873..c89fc2d4a5 100644 --- a/numa.c +++ b/numa.c @@ -458,6 +458,7 @@ void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, void parse_numa_opts(MachineState *ms) { int i; + const CPUArchIdList *possible_cpus; MachineClass *mc = MACHINE_GET_CLASS(ms); for (i = 0; i < MAX_NODES; i++) { @@ -519,18 +520,21 @@ void parse_numa_opts(MachineState *ms) numa_set_mem_ranges(); - for (i = 0; i < nb_numa_nodes; i++) { - if (!bitmap_empty(numa_info[i].node_cpu, max_cpus)) { - break; - } - } - /* assign CPUs to nodes using board provided default mapping */ - if (!mc->cpu_index_to_instance_props) { + if (!mc->cpu_index_to_instance_props || !mc->possible_cpu_arch_ids) { error_report("default CPUs to NUMA node mapping isn't supported"); exit(1); } - if (i == nb_numa_nodes) { + + possible_cpus = mc->possible_cpu_arch_ids(ms); + for (i = 0; i < possible_cpus->len; i++) { + if (possible_cpus->cpus[i].props.has_node_id) { + break; + } + } + + /* no CPUs are assigned to NUMA nodes */ + if (i == possible_cpus->len) { for (i = 0; i < max_cpus; i++) { CpuInstanceProperties props; /* fetch default mapping from board and enable it */ -- cgit v1.2.3 From ea2650724ceb15903eb25473252047784fae5672 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:52 +0200 Subject: pc: get numa node mapping from possible_cpus instead of numa_get_node_for_cpu() Signed-off-by: Igor Mammedov Reviewed-by: Andrew Jones Message-Id: <1494415802-227633-9-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- hw/acpi/cpu.c | 7 +++---- hw/i386/acpi-build.c | 11 ++++------- hw/i386/pc.c | 18 ++++++++++-------- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 8c719d3f9d..a233fe17cf 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -503,7 +503,6 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, /* build Processor object for each processor */ for (i = 0; i < arch_ids->len; i++) { - int j; Aml *dev; Aml *uid = aml_int(i); GArray *madt_buf = g_array_new(0, 1, 1); @@ -557,9 +556,9 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, * as a result _PXM is required for all CPUs which might * be hot-plugged. For simplicity, add it for all CPUs. */ - j = numa_get_node_for_cpu(i); - if (j < nb_numa_nodes) { - aml_append(dev, aml_name_decl("_PXM", aml_int(j))); + if (arch_ids->cpus[i].props.has_node_id) { + aml_append(dev, aml_name_decl("_PXM", + aml_int(arch_ids->cpus[i].props.node_id))); } aml_append(cpus_dev, dev); diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index c7cc45cc4b..cc0418f327 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2335,7 +2335,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) srat->reserved1 = cpu_to_le32(1); for (i = 0; i < apic_ids->len; i++) { - int j = numa_get_node_for_cpu(i); + int node_id = apic_ids->cpus[i].props.has_node_id ? + apic_ids->cpus[i].props.node_id : 0; uint32_t apic_id = apic_ids->cpus[i].arch_id; if (apic_id < 255) { @@ -2345,9 +2346,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) core->type = ACPI_SRAT_PROCESSOR_APIC; core->length = sizeof(*core); core->local_apic_id = apic_id; - if (j < nb_numa_nodes) { - core->proximity_lo = j; - } + core->proximity_lo = node_id; memset(core->proximity_hi, 0, 3); core->local_sapic_eid = 0; core->flags = cpu_to_le32(1); @@ -2358,9 +2357,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) core->type = ACPI_SRAT_PROCESSOR_x2APIC; core->length = sizeof(*core); core->x2apic_id = cpu_to_le32(apic_id); - if (j < nb_numa_nodes) { - core->proximity_domain = cpu_to_le32(j); - } + core->proximity_domain = cpu_to_le32(node_id); core->flags = cpu_to_le32(1); } } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 455300f776..e36a375683 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -747,7 +747,9 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) { FWCfgState *fw_cfg; uint64_t *numa_fw_cfg; - int i, j; + int i; + const CPUArchIdList *cpus; + MachineClass *mc = MACHINE_GET_CLASS(pcms); fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as); fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); @@ -782,12 +784,12 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) */ numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes); numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); - for (i = 0; i < max_cpus; i++) { - unsigned int apic_id = x86_cpu_apic_id_from_index(i); + cpus = mc->possible_cpu_arch_ids(MACHINE(pcms)); + for (i = 0; i < cpus->len; i++) { + unsigned int apic_id = cpus->cpus[i].arch_id; assert(apic_id < pcms->apic_id_limit); - j = numa_get_node_for_cpu(i); - if (j < nb_numa_nodes) { - numa_fw_cfg[apic_id + 1] = cpu_to_le64(j); + if (cpus->cpus[i].props.has_node_id) { + numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id); } } for (i = 0; i < nb_numa_nodes; i++) { @@ -1984,8 +1986,8 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, cs = CPU(cpu); cs->cpu_index = idx; - node_id = numa_get_node_for_cpu(cs->cpu_index); - if (node_id == nb_numa_nodes) { + node_id = cpu_slot->props.node_id; + if (!cpu_slot->props.has_node_id) { /* by default CPUState::numa_node was 0 if it's not set via CLI * keep it this way for now but in future we probably should * refuse to start up with incomplete numa mapping */ -- cgit v1.2.3 From 722387e78daf6a330220082934cfaaf68fa4d492 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:53 +0200 Subject: spapr: get numa node mapping from possible_cpus instead of numa_get_node_for_cpu() it's safe to remove thread node_id != core node_id error branch as machine_set_cpu_numa_node() also does mismatch check and is called even before any CPU is created. Signed-off-by: Igor Mammedov Acked-by: David Gibson Message-Id: <1494415802-227633-10-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- hw/ppc/spapr.c | 4 ++-- hw/ppc/spapr_cpu_core.c | 14 ++------------ 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a952a39836..504161fa45 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2863,8 +2863,8 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } - node_id = numa_get_node_for_cpu(cc->core_id); - if (node_id == nb_numa_nodes) { + node_id = core_slot->props.node_id; + if (!core_slot->props.has_node_id) { /* by default CPUState::numa_node was 0 if it's not set via CLI * keep it this way for now but in future we probably should * refuse to start up with incomplete numa mapping */ diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 9de7a5610c..a17ea07ef1 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -181,7 +181,6 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) sc->threads = g_malloc0(size * cc->nr_threads); for (i = 0; i < cc->nr_threads; i++) { - int node_id; char id[32]; CPUState *cs; @@ -191,17 +190,8 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) cs = CPU(obj); cs->cpu_index = cc->core_id + i; - /* Set NUMA node for the added CPUs */ - node_id = numa_get_node_for_cpu(cs->cpu_index); - if (node_id != sc->node_id) { - error_setg(&local_err, "Invalid node-id=%d of thread[cpu-index: %d]" - " on CPU[core-id: %d, node-id: %d], node-id must be the same", - node_id, cs->cpu_index, cc->core_id, sc->node_id); - goto err; - } - if (node_id < nb_numa_nodes) { - cs->numa_node = node_id; - } + /* Set NUMA node for the threads belonged to core */ + cs->numa_node = sc->node_id; snprintf(id, sizeof(id), "thread[%d]", i); object_property_add_child(OBJECT(sc), id, obj, &local_err); -- cgit v1.2.3 From 4ccf5826f96e766e32385e9f6ee0350403054ce1 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:54 +0200 Subject: virt-arm: get numa node mapping from possible_cpus instead of numa_get_node_for_cpu() Signed-off-by: Igor Mammedov Reviewed-by: Andrew Jones Message-Id: <1494415802-227633-11-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- hw/arm/virt-acpi-build.c | 19 +++++++------------ hw/arm/virt.c | 13 +++++++------ 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 0835e59bb2..ce7499c9ca 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -486,30 +486,25 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) AcpiSystemResourceAffinityTable *srat; AcpiSratProcessorGiccAffinity *core; AcpiSratMemoryAffinity *numamem; - int i, j, srat_start; + int i, srat_start; uint64_t mem_base; - uint32_t *cpu_node = g_malloc0(vms->smp_cpus * sizeof(uint32_t)); - - for (i = 0; i < vms->smp_cpus; i++) { - j = numa_get_node_for_cpu(i); - if (j < nb_numa_nodes) { - cpu_node[i] = j; - } - } + MachineClass *mc = MACHINE_GET_CLASS(vms); + const CPUArchIdList *cpu_list = mc->possible_cpu_arch_ids(MACHINE(vms)); srat_start = table_data->len; srat = acpi_data_push(table_data, sizeof(*srat)); srat->reserved1 = cpu_to_le32(1); - for (i = 0; i < vms->smp_cpus; ++i) { + for (i = 0; i < cpu_list->len; ++i) { + int node_id = cpu_list->cpus[i].props.has_node_id ? + cpu_list->cpus[i].props.node_id : 0; core = acpi_data_push(table_data, sizeof(*core)); core->type = ACPI_SRAT_PROCESSOR_GICC; core->length = sizeof(*core); - core->proximity = cpu_to_le32(cpu_node[i]); + core->proximity = cpu_to_le32(node_id); core->acpi_processor_uid = cpu_to_le32(i); core->flags = cpu_to_le32(1); } - g_free(cpu_node); mem_base = vms->memmap[VIRT_MEM].base; for (i = 0; i < nb_numa_nodes; ++i) { diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 653b4d76fd..c7c8159dfd 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -338,7 +338,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) { int cpu; int addr_cells = 1; - unsigned int i; + const MachineState *ms = MACHINE(vms); /* * From Documentation/devicetree/bindings/arm/cpus.txt @@ -369,6 +369,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) { char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu); ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu)); + CPUState *cs = CPU(armcpu); qemu_fdt_add_subnode(vms->fdt, nodename); qemu_fdt_setprop_string(vms->fdt, nodename, "device_type", "cpu"); @@ -389,9 +390,9 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) armcpu->mp_affinity); } - i = numa_get_node_for_cpu(cpu); - if (i < nb_numa_nodes) { - qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", i); + if (ms->possible_cpus->cpus[cs->cpu_index].props.has_node_id) { + qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", + ms->possible_cpus->cpus[cs->cpu_index].props.node_id); } g_free(nodename); @@ -1363,8 +1364,8 @@ static void machvirt_init(MachineState *machine) cs = CPU(cpuobj); cs->cpu_index = n; - node_id = numa_get_node_for_cpu(cs->cpu_index); - if (node_id == nb_numa_nodes) { + node_id = possible_cpus->cpus[cs->cpu_index].props.node_id; + if (!possible_cpus->cpus[cs->cpu_index].props.has_node_id) { /* by default CPUState::numa_node was 0 if it's not set via CLI * keep it this way for now but in future we probably should * refuse to start up with incomplete numa mapping */ -- cgit v1.2.3 From afed5a5a7030a074a181d2a0ce8202de71a6ada4 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:55 +0200 Subject: QMP: include CpuInstanceProperties into query_cpus output output if board supports CpuInstanceProperties, report them for each CPU thread listed. Main motivation for this is to provide these properties introspection via QMP interface for using in test cases to verify numa node to cpu mapping, which includes not only boards that support cpu hotplug and have this info in query-hotpluggable-cpus (pc/spapr) but also for boards that don't not support hotpluggable-cpus but support numa mapping (virt-arm). Signed-off-by: Igor Mammedov Reviewed-by: Eric Blake Message-Id: <1494415802-227633-12-git-send-email-imammedo@redhat.com> Reviewed-by: David Gibson Signed-off-by: Eduardo Habkost --- cpus.c | 10 ++++++++++ qapi-schema.json | 6 +++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cpus.c b/cpus.c index 740b8dc3f8..4f91d25513 100644 --- a/cpus.c +++ b/cpus.c @@ -50,6 +50,7 @@ #include "qapi-event.h" #include "hw/nmi.h" #include "sysemu/replay.h" +#include "hw/boards.h" #ifdef CONFIG_LINUX @@ -1859,6 +1860,8 @@ void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg) CpuInfoList *qmp_query_cpus(Error **errp) { + MachineState *ms = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(ms); CpuInfoList *head = NULL, *cur_item = NULL; CPUState *cpu; @@ -1909,6 +1912,13 @@ CpuInfoList *qmp_query_cpus(Error **errp) #else info->value->arch = CPU_INFO_ARCH_OTHER; #endif + info->value->has_props = !!mc->cpu_index_to_instance_props; + if (info->value->has_props) { + CpuInstanceProperties *props; + props = g_malloc0(sizeof(*props)); + *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index); + info->value->props = props; + } /* XXX: waiting for the qapi to support GSList */ if (!cur_item) { diff --git a/qapi-schema.json b/qapi-schema.json index f4eef33a44..c3051fb901 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1325,6 +1325,9 @@ # # @thread_id: ID of the underlying host thread # +# @props: properties describing to which node/socket/core/thread +# virtual CPU belongs to, provided if supported by board (since 2.10) +# # @arch: architecture of the cpu, which determines which additional fields # will be listed (since 2.6) # @@ -1335,7 +1338,8 @@ ## { 'union': 'CpuInfo', 'base': {'CPU': 'int', 'current': 'bool', 'halted': 'bool', - 'qom_path': 'str', 'thread_id': 'int', 'arch': 'CpuInfoArch' }, + 'qom_path': 'str', 'thread_id': 'int', + '*props': 'CpuInstanceProperties', 'arch': 'CpuInfoArch' }, 'discriminator': 'arch', 'data': { 'x86': 'CpuInfoX86', 'sparc': 'CpuInfoSPARC', -- cgit v1.2.3 From 6accfb782321de5a15e9c9ff5482a2d830055a8f Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:56 +0200 Subject: tests: numa: add case for QMP command query-cpus Signed-off-by: Igor Mammedov Reviewed-by: David Gibson Message-Id: <1494415802-227633-13-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- numa.c | 14 -------------- tests/numa-test.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/numa.c b/numa.c index c89fc2d4a5..f16a6a8ade 100644 --- a/numa.c +++ b/numa.c @@ -737,20 +737,6 @@ MemdevList *qmp_query_memdev(Error **errp) return list; } -int numa_get_node_for_cpu(int idx) -{ - int i; - - assert(idx < max_cpus); - - for (i = 0; i < nb_numa_nodes; i++) { - if (test_bit(idx, numa_info[i].node_cpu)) { - break; - } - } - return i; -} - void ram_block_notifier_add(RAMBlockNotifier *n) { QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next); diff --git a/tests/numa-test.c b/tests/numa-test.c index f5da0c845b..27226877af 100644 --- a/tests/numa-test.c +++ b/tests/numa-test.c @@ -87,6 +87,50 @@ static void test_mon_partial(const void *data) g_free(cli); } +static QList *get_cpus(QDict **resp) +{ + *resp = qmp("{ 'execute': 'query-cpus' }"); + g_assert(*resp); + g_assert(qdict_haskey(*resp, "return")); + return qdict_get_qlist(*resp, "return"); +} + +static void test_query_cpus(const void *data) +{ + char *cli; + QDict *resp; + QList *cpus; + const QObject *e; + + cli = make_cli(data, "-smp 8 -numa node,cpus=0-3 -numa node,cpus=4-7"); + qtest_start(cli); + cpus = get_cpus(&resp); + g_assert(cpus); + + while ((e = qlist_pop(cpus))) { + QDict *cpu, *props; + int64_t cpu_idx, node; + + cpu = qobject_to_qdict(e); + g_assert(qdict_haskey(cpu, "CPU")); + g_assert(qdict_haskey(cpu, "props")); + + cpu_idx = qdict_get_int(cpu, "CPU"); + props = qdict_get_qdict(cpu, "props"); + g_assert(qdict_haskey(props, "node-id")); + node = qdict_get_int(props, "node-id"); + if (cpu_idx >= 0 && cpu_idx < 4) { + g_assert_cmpint(node, ==, 0); + } else { + g_assert_cmpint(node, ==, 1); + } + } + + QDECREF(resp); + qtest_end(); + g_free(cli); +} + int main(int argc, char **argv) { const char *args = NULL; @@ -101,6 +145,7 @@ int main(int argc, char **argv) qtest_add_data_func("/numa/mon/default", args, test_mon_default); qtest_add_data_func("/numa/mon/cpus/explicit", args, test_mon_explicit); qtest_add_data_func("/numa/mon/cpus/partial", args, test_mon_partial); + qtest_add_data_func("/numa/qmp/cpus/query-cpus", args, test_query_cpus); return g_test_run(); } -- cgit v1.2.3 From 3b8a8557f781c4216744d9ab69bee43b526b0c64 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:57 +0200 Subject: numa: remove no longer need numa_post_machine_init() CPUState::numa_node is still in use but now it's set by board when it creates CPU objects. So there isn't any need to set it again after all CPU's are created, since it's been already set. Signed-off-by: Igor Mammedov Reviewed-by: David Gibson Reviewed-by: Andrew Jones Message-Id: <1494415802-227633-14-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- include/sysemu/numa.h | 6 ------ numa.c | 15 --------------- vl.c | 2 -- 3 files changed, 23 deletions(-) diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 027830cf7e..8cb3ebc233 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -27,7 +27,6 @@ struct node_info { extern NodeInfo numa_info[MAX_NODES]; void parse_numa_opts(MachineState *ms); -void numa_post_machine_init(void); void query_numa_node_mem(uint64_t node_mem[]); extern QemuOptsList qemu_numa_opts; void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); @@ -37,9 +36,4 @@ void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, int nb_nodes, ram_addr_t size); void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, int nb_nodes, ram_addr_t size); - - -/* on success returns node index in numa_info, - * on failure returns nb_numa_nodes */ -int numa_get_node_for_cpu(int idx); #endif diff --git a/numa.c b/numa.c index f16a6a8ade..dc739eadfa 100644 --- a/numa.c +++ b/numa.c @@ -572,21 +572,6 @@ void parse_numa_opts(MachineState *ms) } } -void numa_post_machine_init(void) -{ - CPUState *cpu; - int i; - - CPU_FOREACH(cpu) { - for (i = 0; i < nb_numa_nodes; i++) { - assert(cpu->cpu_index < max_cpus); - if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) { - cpu->numa_node = i; - } - } - } -} - static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, const char *name, uint64_t ram_size) diff --git a/vl.c b/vl.c index c4705b3335..5cd0c17ba0 100644 --- a/vl.c +++ b/vl.c @@ -4592,8 +4592,6 @@ int main(int argc, char **argv, char **envp) cpu_synchronize_all_post_init(); - numa_post_machine_init(); - rom_reset_order_override(); /* -- cgit v1.2.3 From 482dfe9a9e8fe72d6a96c927e23078808f9cacd2 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:58 +0200 Subject: machine: call machine init from wrapper add machine_run_board_init() wrapper that calls machine init for now but in follow up patches it will be used to run generic machine code that should run before machine init. Signed-off-by: Igor Mammedov Reviewed-by: Andrew Jones Reviewed-by: David Gibson Reviewed-by: Eduardo Habkost Message-Id: <1494415802-227633-15-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- hw/core/machine.c | 6 ++++++ include/hw/boards.h | 1 + vl.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 420c8c4d16..64e2a4ff2c 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -678,6 +678,12 @@ bool machine_mem_merge(MachineState *machine) return machine->mem_merge; } +void machine_run_board_init(MachineState *machine) +{ + MachineClass *machine_class = MACHINE_GET_CLASS(machine); + machine_class->init(machine); +} + static void machine_class_finalize(ObjectClass *klass, void *data) { MachineClass *mc = MACHINE_CLASS(klass); diff --git a/include/hw/boards.h b/include/hw/boards.h index 4e14ff060e..76ce0219ff 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -32,6 +32,7 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, MachineClass *find_default_machine(void); extern MachineState *current_machine; +void machine_run_board_init(MachineState *machine); bool machine_usb(MachineState *machine); bool machine_kernel_irqchip_allowed(MachineState *machine); bool machine_kernel_irqchip_required(MachineState *machine); diff --git a/vl.c b/vl.c index 5cd0c17ba0..3d8c140011 100644 --- a/vl.c +++ b/vl.c @@ -4559,7 +4559,7 @@ int main(int argc, char **argv, char **envp) current_machine->boot_order = boot_order; current_machine->cpu_model = cpu_model; - machine_class->init(current_machine); + machine_run_board_init(current_machine); realtime_init(); -- cgit v1.2.3 From ec78f8114bc4c133fc56fefa7f2af99725e42857 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:29:59 +0200 Subject: numa: use possible_cpus for not mapped CPUs check and remove corresponding part in numa.c that uses node_cpu bitmaps. Signed-off-by: Igor Mammedov Reviewed-by: David Gibson Reviewed-by: Andrew Jones Message-Id: <1494415802-227633-16-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- hw/core/machine.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ numa.c | 10 ---------- 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 64e2a4ff2c..fd6a436064 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -20,6 +20,7 @@ #include "sysemu/numa.h" #include "qemu/error-report.h" #include "qemu/cutils.h" +#include "sysemu/numa.h" static char *machine_get_accel(Object *obj, Error **errp) { @@ -678,9 +679,66 @@ bool machine_mem_merge(MachineState *machine) return machine->mem_merge; } +static char *cpu_slot_to_string(const CPUArchId *cpu) +{ + GString *s = g_string_new(NULL); + if (cpu->props.has_socket_id) { + g_string_append_printf(s, "socket-id: %"PRId64, cpu->props.socket_id); + } + if (cpu->props.has_core_id) { + if (s->len) { + g_string_append_printf(s, ", "); + } + g_string_append_printf(s, "core-id: %"PRId64, cpu->props.core_id); + } + if (cpu->props.has_thread_id) { + if (s->len) { + g_string_append_printf(s, ", "); + } + g_string_append_printf(s, "thread-id: %"PRId64, cpu->props.thread_id); + } + return g_string_free(s, false); +} + +static void machine_numa_validate(MachineState *machine) +{ + int i; + GString *s = g_string_new(NULL); + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(machine); + + assert(nb_numa_nodes); + for (i = 0; i < possible_cpus->len; i++) { + const CPUArchId *cpu_slot = &possible_cpus->cpus[i]; + + /* at this point numa mappings are initilized by CLI options + * or with default mappings so it's sufficient to list + * all not yet mapped CPUs here */ + /* TODO: make it hard error in future */ + if (!cpu_slot->props.has_node_id) { + char *cpu_str = cpu_slot_to_string(cpu_slot); + g_string_append_printf(s, "%sCPU %d [%s]", s->len ? ", " : "", i, + cpu_str); + g_free(cpu_str); + } + } + if (s->len) { + error_report("warning: CPU(s) not present in any NUMA nodes: %s", + s->str); + error_report("warning: All CPU(s) up to maxcpus should be described " + "in NUMA config, ability to start up with partial NUMA " + "mappings is obsoleted and will be removed in future"); + } + g_string_free(s, true); +} + void machine_run_board_init(MachineState *machine) { MachineClass *machine_class = MACHINE_GET_CLASS(machine); + + if (nb_numa_nodes) { + machine_numa_validate(machine); + } machine_class->init(machine); } diff --git a/numa.c b/numa.c index dc739eadfa..63bff5a54d 100644 --- a/numa.c +++ b/numa.c @@ -337,16 +337,6 @@ static void validate_numa_cpus(void) bitmap_or(seen_cpus, seen_cpus, numa_info[i].node_cpu, max_cpus); } - - if (!bitmap_full(seen_cpus, max_cpus)) { - char *msg; - bitmap_complement(seen_cpus, seen_cpus, max_cpus); - msg = enumerate_cpus(seen_cpus, max_cpus); - error_report("warning: CPU(s) not present in any NUMA nodes: %s", msg); - error_report("warning: All CPU(s) up to maxcpus should be described " - "in NUMA config"); - g_free(msg); - } g_free(seen_cpus); } -- cgit v1.2.3 From 1171ae9a5b132dc631728ff17688d05ed4534181 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:30:00 +0200 Subject: numa: remove node_cpu bitmaps as they are no longer used Postfactum "CPU(s) present in multiple NUMA nodes" check was the last user of node_cpu bitmaps, but it's not need as machine_set_cpu_numa_node() does the similar check at the time mapping is set for cpus (i.e. when -numa cpus= is parsed) and ensures that cpu can be mapped only to one node. Remove duplicate check based on node_cpu bitmaps and since the last user is gone remove node_cpu as well, which completes internal transition from legacy bitmap based mapping storage to possible_cpus storage. Signed-off-by: Igor Mammedov Reviewed-by: David Gibson Reviewed-by: Andrew Jones Message-Id: <1494415802-227633-17-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- include/sysemu/numa.h | 1 - numa.c | 43 ------------------------------------------- 2 files changed, 44 deletions(-) diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index 8cb3ebc233..7ffde5b119 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -18,7 +18,6 @@ struct numa_addr_range { struct node_info { uint64_t node_mem; - unsigned long *node_cpu; struct HostMemoryBackend *node_memdev; bool present; QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */ diff --git a/numa.c b/numa.c index 63bff5a54d..ca122ccb13 100644 --- a/numa.c +++ b/numa.c @@ -178,7 +178,6 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, cpus->value, max_cpus); return; } - bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); props = mc->cpu_index_to_instance_props(ms, cpus->value); props.node_id = nodenr; props.has_node_id = true; @@ -305,41 +304,6 @@ end: return 0; } -static char *enumerate_cpus(unsigned long *cpus, int max_cpus) -{ - int cpu; - bool first = true; - GString *s = g_string_new(NULL); - - for (cpu = find_first_bit(cpus, max_cpus); - cpu < max_cpus; - cpu = find_next_bit(cpus, max_cpus, cpu + 1)) { - g_string_append_printf(s, "%s%d", first ? "" : " ", cpu); - first = false; - } - return g_string_free(s, FALSE); -} - -static void validate_numa_cpus(void) -{ - int i; - unsigned long *seen_cpus = bitmap_new(max_cpus); - - for (i = 0; i < nb_numa_nodes; i++) { - if (bitmap_intersects(seen_cpus, numa_info[i].node_cpu, max_cpus)) { - bitmap_and(seen_cpus, seen_cpus, - numa_info[i].node_cpu, max_cpus); - error_report("CPU(s) present in multiple NUMA nodes: %s", - enumerate_cpus(seen_cpus, max_cpus)); - g_free(seen_cpus); - exit(EXIT_FAILURE); - } - bitmap_or(seen_cpus, seen_cpus, - numa_info[i].node_cpu, max_cpus); - } - g_free(seen_cpus); -} - /* If all node pair distances are symmetric, then only distances * in one direction are enough. If there is even one asymmetric * pair, though, then all distances must be provided. The @@ -451,10 +415,6 @@ void parse_numa_opts(MachineState *ms) const CPUArchIdList *possible_cpus; MachineClass *mc = MACHINE_GET_CLASS(ms); - for (i = 0; i < MAX_NODES; i++) { - numa_info[i].node_cpu = bitmap_new(max_cpus); - } - if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, NULL)) { exit(1); } @@ -531,13 +491,10 @@ void parse_numa_opts(MachineState *ms) props = mc->cpu_index_to_instance_props(ms, i); props.has_node_id = true; - set_bit(i, numa_info[props.node_id].node_cpu); machine_set_cpu_numa_node(ms, &props, &error_fatal); } } - validate_numa_cpus(); - /* QEMU needs at least all unique node pair distances to build * the whole NUMA distance table. QEMU treats the distance table * as symmetric by default, i.e. distance A->B == distance B->A. -- cgit v1.2.3 From 419fcdec3c1ff545cd33d90ade99236c9bcc37cc Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:30:01 +0200 Subject: numa: add '-numa cpu,...' option for property based node mapping legacy cpu to node mapping is using cpu index values to map VCPU to node with help of '-numa node,nodeid=node,cpus=x[-y]' option. However cpu index is internal concept and QEMU users have to guess /reimplement qemu's logic/ to map it to a concrete cpu socket/core/thread to make sane CPUs placement across numa nodes. This patch allows to map cpu objects to numa nodes using the same properties as used for cpus with -device/device_add (socket-id/core-id/thread-id/node-id). At present valid properties/values to address CPUs could be fetched using hotpluggable-cpus monitor/qmp command, it will require user to start qemu twice when creating domain to fetch possible CPUs for a machine type/-smp layout first and then the second time with numa explicit mapping for actual usage. The first step results could be saved and reused to set/change mapping later as far as machine type/-smp stays the same. Proposed impl. supports exact and wildcard matching to simplify CLI and allow to set mapping for a specific cpu or group of cpu objects specified by matched properties. For example: # exact mapping x86 -numa cpu,node-id=x,socket-id=y,core-id=z,thread-id=n # exact mapping SPAPR -numa cpu,node-id=x,core-id=y # wildcard mapping, all cpu objects that match socket-id=y # are mapped to node-id=x -numa cpu,node-id=x,socket-id=y Signed-off-by: Igor Mammedov Message-Id: <1494415802-227633-18-git-send-email-imammedo@redhat.com> Reviewed-by: David Gibson Signed-off-by: Eduardo Habkost --- numa.c | 15 +++++++++++++++ qapi-schema.json | 21 +++++++++++++++++++-- qemu-options.hx | 20 ++++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/numa.c b/numa.c index ca122ccb13..84ce2af9b4 100644 --- a/numa.c +++ b/numa.c @@ -290,6 +290,21 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp) goto end; } break; + case NUMA_OPTIONS_TYPE_CPU: + if (!object->u.cpu.has_node_id) { + error_setg(&err, "Missing mandatory node-id property"); + goto end; + } + if (!numa_info[object->u.cpu.node_id].present) { + error_setg(&err, "Invalid node-id=%" PRId64 ", NUMA node must be " + "defined with -numa node,nodeid=ID before it's used with " + "-numa cpu,node-id=ID", object->u.cpu.node_id); + goto end; + } + + machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu), + &err); + break; default: abort(); } diff --git a/qapi-schema.json b/qapi-schema.json index c3051fb901..80603cfc51 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -5690,10 +5690,12 @@ # # @dist: NUMA distance configuration (since 2.10) # +# @cpu: property based CPU(s) to node mapping (Since: 2.10) +# # Since: 2.1 ## { 'enum': 'NumaOptionsType', - 'data': [ 'node', 'dist' ] } + 'data': [ 'node', 'dist', 'cpu' ] } ## # @NumaOptions: @@ -5707,7 +5709,8 @@ 'discriminator': 'type', 'data': { 'node': 'NumaNodeOptions', - 'dist': 'NumaDistOptions' }} + 'dist': 'NumaDistOptions', + 'cpu': 'NumaCpuOptions' }} ## # @NumaNodeOptions: @@ -5756,6 +5759,20 @@ 'dst': 'uint16', 'val': 'uint8' }} +## +# @NumaCpuOptions: +# +# Option "-numa cpu" overrides default cpu to node mapping. +# It accepts the same set of cpu properties as returned by +# query-hotpluggable-cpus[].props, where node-id could be used to +# override default node mapping. +# +# Since: 2.10 +## +{ 'struct': 'NumaCpuOptions', + 'base': 'CpuInstanceProperties', + 'data' : {} } + ## # @HostMemPolicy: # diff --git a/qemu-options.hx b/qemu-options.hx index e10c1454d1..6ffa98097d 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -145,10 +145,12 @@ STEXI @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] @itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance} +@itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}] @findex -numa Define a NUMA node and assign RAM and VCPUs to it. Set the NUMA distance from a source node to a destination node. +Legacy VCPU assignment uses @samp{cpus} option where @var{firstcpu} and @var{lastcpu} are CPU indexes. Each @samp{cpus} option represent a contiguous range of CPU indexes (or a single VCPU if @var{lastcpu} is omitted). A non-contiguous @@ -162,6 +164,24 @@ a NUMA node: -numa node,cpus=0-2,cpus=5 @end example +@samp{cpu} option is a new alternative to @samp{cpus} option +which uses @samp{socket-id|core-id|thread-id} properties to assign +CPU objects to a @var{node} using topology layout properties of CPU. +The set of properties is machine specific, and depends on used +machine type/@samp{smp} options. It could be queried with +@samp{hotpluggable-cpus} monitor command. +@samp{node-id} property specifies @var{node} to which CPU object +will be assigned, it's required for @var{node} to be declared +with @samp{node} option before it's used with @samp{cpu} option. + +For example: +@example +-M pc \ +-smp 1,sockets=2,maxcpus=2 \ +-numa node,nodeid=0 -numa node,nodeid=1 \ +-numa cpu,node-id=0,socket-id=0 -numa cpu,node-id=1,socket-id=1 +@end example + @samp{mem} assigns a given RAM amount to a node. @samp{memdev} assigns RAM from a given memory backend device to a node. If @samp{mem} and @samp{memdev} are omitted in all nodes, RAM is -- cgit v1.2.3 From 2941020a476f4875c9112500278e2ba2773cb124 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 10 May 2017 13:30:02 +0200 Subject: tests: check -numa node,cpu=props_list usecase Signed-off-by: Igor Mammedov Reviewed-by: David Gibson Message-Id: <1494415802-227633-19-git-send-email-imammedo@redhat.com> Signed-off-by: Eduardo Habkost --- tests/numa-test.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/tests/numa-test.c b/tests/numa-test.c index 27226877af..c3475d6d5e 100644 --- a/tests/numa-test.c +++ b/tests/numa-test.c @@ -131,6 +131,144 @@ static void test_query_cpus(const void *data) g_free(cli); } +static void pc_numa_cpu(const void *data) +{ + char *cli; + QDict *resp; + QList *cpus; + const QObject *e; + + cli = make_cli(data, "-cpu pentium -smp 8,sockets=2,cores=2,threads=2 " + "-numa node,nodeid=0 -numa node,nodeid=1 " + "-numa cpu,node-id=1,socket-id=0 " + "-numa cpu,node-id=0,socket-id=1,core-id=0 " + "-numa cpu,node-id=0,socket-id=1,core-id=1,thread-id=0 " + "-numa cpu,node-id=1,socket-id=1,core-id=1,thread-id=1"); + qtest_start(cli); + cpus = get_cpus(&resp); + g_assert(cpus); + + while ((e = qlist_pop(cpus))) { + QDict *cpu, *props; + int64_t socket, core, thread, node; + + cpu = qobject_to_qdict(e); + g_assert(qdict_haskey(cpu, "props")); + props = qdict_get_qdict(cpu, "props"); + + g_assert(qdict_haskey(props, "node-id")); + node = qdict_get_int(props, "node-id"); + g_assert(qdict_haskey(props, "socket-id")); + socket = qdict_get_int(props, "socket-id"); + g_assert(qdict_haskey(props, "core-id")); + core = qdict_get_int(props, "core-id"); + g_assert(qdict_haskey(props, "thread-id")); + thread = qdict_get_int(props, "thread-id"); + + if (socket == 0) { + g_assert_cmpint(node, ==, 1); + } else if (socket == 1 && core == 0) { + g_assert_cmpint(node, ==, 0); + } else if (socket == 1 && core == 1 && thread == 0) { + g_assert_cmpint(node, ==, 0); + } else if (socket == 1 && core == 1 && thread == 1) { + g_assert_cmpint(node, ==, 1); + } else { + g_assert(false); + } + } + + QDECREF(resp); + qtest_end(); + g_free(cli); +} + +static void spapr_numa_cpu(const void *data) +{ + char *cli; + QDict *resp; + QList *cpus; + const QObject *e; + + cli = make_cli(data, "-smp 4,cores=4 " + "-numa node,nodeid=0 -numa node,nodeid=1 " + "-numa cpu,node-id=0,core-id=0 " + "-numa cpu,node-id=0,core-id=1 " + "-numa cpu,node-id=0,core-id=2 " + "-numa cpu,node-id=1,core-id=3"); + qtest_start(cli); + cpus = get_cpus(&resp); + g_assert(cpus); + + while ((e = qlist_pop(cpus))) { + QDict *cpu, *props; + int64_t core, node; + + cpu = qobject_to_qdict(e); + g_assert(qdict_haskey(cpu, "props")); + props = qdict_get_qdict(cpu, "props"); + + g_assert(qdict_haskey(props, "node-id")); + node = qdict_get_int(props, "node-id"); + g_assert(qdict_haskey(props, "core-id")); + core = qdict_get_int(props, "core-id"); + + if (core >= 0 && core < 3) { + g_assert_cmpint(node, ==, 0); + } else if (core == 3) { + g_assert_cmpint(node, ==, 1); + } else { + g_assert(false); + } + } + + QDECREF(resp); + qtest_end(); + g_free(cli); +} + +static void aarch64_numa_cpu(const void *data) +{ + char *cli; + QDict *resp; + QList *cpus; + const QObject *e; + + cli = make_cli(data, "-smp 2 " + "-numa node,nodeid=0 -numa node,nodeid=1 " + "-numa cpu,node-id=1,thread-id=0 " + "-numa cpu,node-id=0,thread-id=1"); + qtest_start(cli); + cpus = get_cpus(&resp); + g_assert(cpus); + + while ((e = qlist_pop(cpus))) { + QDict *cpu, *props; + int64_t thread, node; + + cpu = qobject_to_qdict(e); + g_assert(qdict_haskey(cpu, "props")); + props = qdict_get_qdict(cpu, "props"); + + g_assert(qdict_haskey(props, "node-id")); + node = qdict_get_int(props, "node-id"); + g_assert(qdict_haskey(props, "thread-id")); + thread = qdict_get_int(props, "thread-id"); + + if (thread == 0) { + g_assert_cmpint(node, ==, 1); + } else if (thread == 1) { + g_assert_cmpint(node, ==, 0); + } else { + g_assert(false); + } + } + + QDECREF(resp); + qtest_end(); + g_free(cli); +} + int main(int argc, char **argv) { const char *args = NULL; @@ -147,5 +285,18 @@ int main(int argc, char **argv) qtest_add_data_func("/numa/mon/cpus/partial", args, test_mon_partial); qtest_add_data_func("/numa/qmp/cpus/query-cpus", args, test_query_cpus); + if (!strcmp(arch, "i386") || !strcmp(arch, "x86_64")) { + qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu); + } + + if (!strcmp(arch, "ppc64")) { + qtest_add_data_func("/numa/spapr/cpu/explicit", args, spapr_numa_cpu); + } + + if (!strcmp(arch, "aarch64")) { + qtest_add_data_func("/numa/aarch64/cpu/explicit", args, + aarch64_numa_cpu); + } + return g_test_run(); } -- cgit v1.2.3 From 46baa9007fb807f88feb2b1a975ae680dab1d5fd Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 5 Apr 2017 20:00:22 +0100 Subject: migration/i386: Remove old non-softfloat 64bit FP support Long long ago, we used to support storing the x86 FP registers in a 64bit format. Then c31da136a0bf8caad70c348f5ffc283206e9c7fc in v0.14-rc0 removed the last support for writing that in the migration format. Even before that, it was only used if you had softfloat disabled (i.e. !USE_X86LDOUBLE) so in practice use of it in even earlier qemu is unlikely for most users. Kill it off, it's complicated, and possibly broken. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20170405190024.27581-2-dgilbert@redhat.com> Reviewed-by: Juan Quintela Signed-off-by: Eduardo Habkost --- target/i386/machine.c | 107 +++----------------------------------------------- 1 file changed, 6 insertions(+), 101 deletions(-) diff --git a/target/i386/machine.c b/target/i386/machine.c index 78ae2f986b..bf9567cbd4 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -136,36 +136,6 @@ static const VMStateDescription vmstate_mtrr_var = { #define VMSTATE_MTRR_VARS(_field, _state, _n, _v) \ VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_mtrr_var, MTRRVar) -static int put_fpreg_error(QEMUFile *f, void *opaque, size_t size, - VMStateField *field, QJSON *vmdesc) -{ - fprintf(stderr, "call put_fpreg() with invalid arguments\n"); - exit(0); - return 0; -} - -/* XXX: add that in a FPU generic layer */ -union x86_longdouble { - uint64_t mant; - uint16_t exp; -}; - -#define MANTD1(fp) (fp & ((1LL << 52) - 1)) -#define EXPBIAS1 1023 -#define EXPD1(fp) ((fp >> 52) & 0x7FF) -#define SIGND1(fp) ((fp >> 32) & 0x80000000) - -static void fp64_to_fp80(union x86_longdouble *p, uint64_t temp) -{ - int e; - /* mantissa */ - p->mant = (MANTD1(temp) << 11) | (1LL << 63); - /* exponent + sign */ - e = EXPD1(temp) - EXPBIAS1 + 16383; - e |= SIGND1(temp) >> 16; - p->exp = e; -} - static int get_fpreg(QEMUFile *f, void *opaque, size_t size, VMStateField *field) { @@ -200,76 +170,6 @@ static const VMStateInfo vmstate_fpreg = { .put = put_fpreg, }; -static int get_fpreg_1_mmx(QEMUFile *f, void *opaque, size_t size, - VMStateField *field) -{ - union x86_longdouble *p = opaque; - uint64_t mant; - - qemu_get_be64s(f, &mant); - p->mant = mant; - p->exp = 0xffff; - return 0; -} - -static const VMStateInfo vmstate_fpreg_1_mmx = { - .name = "fpreg_1_mmx", - .get = get_fpreg_1_mmx, - .put = put_fpreg_error, -}; - -static int get_fpreg_1_no_mmx(QEMUFile *f, void *opaque, size_t size, - VMStateField *field) -{ - union x86_longdouble *p = opaque; - uint64_t mant; - - qemu_get_be64s(f, &mant); - fp64_to_fp80(p, mant); - return 0; -} - -static const VMStateInfo vmstate_fpreg_1_no_mmx = { - .name = "fpreg_1_no_mmx", - .get = get_fpreg_1_no_mmx, - .put = put_fpreg_error, -}; - -static bool fpregs_is_0(void *opaque, int version_id) -{ - X86CPU *cpu = opaque; - CPUX86State *env = &cpu->env; - - return (env->fpregs_format_vmstate == 0); -} - -static bool fpregs_is_1_mmx(void *opaque, int version_id) -{ - X86CPU *cpu = opaque; - CPUX86State *env = &cpu->env; - int guess_mmx; - - guess_mmx = ((env->fptag_vmstate == 0xff) && - (env->fpus_vmstate & 0x3800) == 0); - return (guess_mmx && (env->fpregs_format_vmstate == 1)); -} - -static bool fpregs_is_1_no_mmx(void *opaque, int version_id) -{ - X86CPU *cpu = opaque; - CPUX86State *env = &cpu->env; - int guess_mmx; - - guess_mmx = ((env->fptag_vmstate == 0xff) && - (env->fpus_vmstate & 0x3800) == 0); - return (!guess_mmx && (env->fpregs_format_vmstate == 1)); -} - -#define VMSTATE_FP_REGS(_field, _state, _n) \ - VMSTATE_ARRAY_TEST(_field, _state, _n, fpregs_is_0, vmstate_fpreg, FPReg), \ - VMSTATE_ARRAY_TEST(_field, _state, _n, fpregs_is_1_mmx, vmstate_fpreg_1_mmx, FPReg), \ - VMSTATE_ARRAY_TEST(_field, _state, _n, fpregs_is_1_no_mmx, vmstate_fpreg_1_no_mmx, FPReg) - static bool version_is_5(void *opaque, int version_id) { return version_id == 5; @@ -356,6 +256,10 @@ static int cpu_post_load(void *opaque, int version_id) return -EINVAL; } + if (env->fpregs_format_vmstate) { + error_report("Unsupported old non-softfloat CPU state"); + return -EINVAL; + } /* * Real mode guest segments register DPL should be zero. * Older KVM version were setting it wrongly. @@ -943,7 +847,8 @@ VMStateDescription vmstate_x86_cpu = { VMSTATE_UINT16(env.fpus_vmstate, X86CPU), VMSTATE_UINT16(env.fptag_vmstate, X86CPU), VMSTATE_UINT16(env.fpregs_format_vmstate, X86CPU), - VMSTATE_FP_REGS(env.fpregs, X86CPU, 8), + + VMSTATE_STRUCT_ARRAY(env.fpregs, X86CPU, 8, 0, vmstate_fpreg, FPReg), VMSTATE_SEGMENT_ARRAY(env.segs, X86CPU, 6), VMSTATE_SEGMENT(env.ldt, X86CPU), -- cgit v1.2.3 From ab808276f8b1db1757e1421e99c98b11714bb9a8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 5 Apr 2017 20:00:23 +0100 Subject: vmstatification: i386 FPReg Convert the fpreg save/restore to use VMSTATE_ macros rather than .get/.put. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20170405190024.27581-3-dgilbert@redhat.com> Reviewed-by: Juan Quintela Signed-off-by: Eduardo Habkost --- target/i386/machine.c | 52 +++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/target/i386/machine.c b/target/i386/machine.c index bf9567cbd4..0b4756b35f 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -136,38 +136,46 @@ static const VMStateDescription vmstate_mtrr_var = { #define VMSTATE_MTRR_VARS(_field, _state, _n, _v) \ VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_mtrr_var, MTRRVar) -static int get_fpreg(QEMUFile *f, void *opaque, size_t size, - VMStateField *field) +typedef struct x86_FPReg_tmp { + FPReg *parent; + uint64_t tmp_mant; + uint16_t tmp_exp; +} x86_FPReg_tmp; + +static void fpreg_pre_save(void *opaque) { - FPReg *fp_reg = opaque; - uint64_t mant; - uint16_t exp; + x86_FPReg_tmp *tmp = opaque; - qemu_get_be64s(f, &mant); - qemu_get_be16s(f, &exp); - fp_reg->d = cpu_set_fp80(mant, exp); - return 0; + /* we save the real CPU data (in case of MMX usage only 'mant' + contains the MMX register */ + cpu_get_fp80(&tmp->tmp_mant, &tmp->tmp_exp, tmp->parent->d); } -static int put_fpreg(QEMUFile *f, void *opaque, size_t size, - VMStateField *field, QJSON *vmdesc) +static int fpreg_post_load(void *opaque, int version) { - FPReg *fp_reg = opaque; - uint64_t mant; - uint16_t exp; - /* we save the real CPU data (in case of MMX usage only 'mant' - contains the MMX register */ - cpu_get_fp80(&mant, &exp, fp_reg->d); - qemu_put_be64s(f, &mant); - qemu_put_be16s(f, &exp); + x86_FPReg_tmp *tmp = opaque; + tmp->parent->d = cpu_set_fp80(tmp->tmp_mant, tmp->tmp_exp); return 0; } -static const VMStateInfo vmstate_fpreg = { +static const VMStateDescription vmstate_fpreg_tmp = { + .name = "fpreg_tmp", + .post_load = fpreg_post_load, + .pre_save = fpreg_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT64(tmp_mant, x86_FPReg_tmp), + VMSTATE_UINT16(tmp_exp, x86_FPReg_tmp), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_fpreg = { .name = "fpreg", - .get = get_fpreg, - .put = put_fpreg, + .fields = (VMStateField[]) { + VMSTATE_WITH_TMP(FPReg, x86_FPReg_tmp, vmstate_fpreg_tmp), + VMSTATE_END_OF_LIST() + } }; static bool version_is_5(void *opaque, int version_id) -- cgit v1.2.3 From 08b277ac46da8b02e50cec455eca7cb2d12ffcf0 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 5 Apr 2017 20:00:24 +0100 Subject: migration/i386: Remove support for pre-0.12 formats Remove support for versions of the CPU state prior to 11 which is the version used in qemu 0.12 - you'd be pretty lucky if you got a migration stream to work from anything that old anyway. This doesn't affect the machine type definition in any way. My main reason for doing this is the hack for sysenter_esp/eip that uses .get/.put's in state versions less than 7 (that's prior to somewhere before 0.10). Signed-off-by: Dr. David Alan Gilbert Message-Id: <20170405190024.27581-4-dgilbert@redhat.com> Reviewed-by: Juan Quintela Signed-off-by: Eduardo Habkost --- target/i386/machine.c | 115 +++++++++++++++----------------------------------- 1 file changed, 34 insertions(+), 81 deletions(-) diff --git a/target/i386/machine.c b/target/i386/machine.c index 0b4756b35f..3cb272948e 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -178,44 +178,6 @@ static const VMStateDescription vmstate_fpreg = { } }; -static bool version_is_5(void *opaque, int version_id) -{ - return version_id == 5; -} - -#ifdef TARGET_X86_64 -static bool less_than_7(void *opaque, int version_id) -{ - return version_id < 7; -} - -static int get_uint64_as_uint32(QEMUFile *f, void *pv, size_t size, - VMStateField *field) -{ - uint64_t *v = pv; - *v = qemu_get_be32(f); - return 0; -} - -static int put_uint64_as_uint32(QEMUFile *f, void *pv, size_t size, - VMStateField *field, QJSON *vmdesc) -{ - uint64_t *v = pv; - qemu_put_be32(f, *v); - - return 0; -} - -static const VMStateInfo vmstate_hack_uint64_as_uint32 = { - .name = "uint64_as_uint32", - .get = get_uint64_as_uint32, - .put = put_uint64_as_uint32, -}; - -#define VMSTATE_HACK_UINT32(_f, _s, _t) \ - VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_hack_uint64_as_uint32, uint64_t) -#endif - static void cpu_pre_save(void *opaque) { X86CPU *cpu = opaque; @@ -842,7 +804,7 @@ static const VMStateDescription vmstate_mcg_ext_ctl = { VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, - .minimum_version_id = 3, + .minimum_version_id = 11, .pre_save = cpu_pre_save, .post_load = cpu_post_load, .fields = (VMStateField[]) { @@ -865,16 +827,8 @@ VMStateDescription vmstate_x86_cpu = { VMSTATE_SEGMENT(env.idt, X86CPU), VMSTATE_UINT32(env.sysenter_cs, X86CPU), -#ifdef TARGET_X86_64 - /* Hack: In v7 size changed from 32 to 64 bits on x86_64 */ - VMSTATE_HACK_UINT32(env.sysenter_esp, X86CPU, less_than_7), - VMSTATE_HACK_UINT32(env.sysenter_eip, X86CPU, less_than_7), - VMSTATE_UINTTL_V(env.sysenter_esp, X86CPU, 7), - VMSTATE_UINTTL_V(env.sysenter_eip, X86CPU, 7), -#else VMSTATE_UINTTL(env.sysenter_esp, X86CPU), VMSTATE_UINTTL(env.sysenter_eip, X86CPU), -#endif VMSTATE_UINTTL(env.cr[0], X86CPU), VMSTATE_UINTTL(env.cr[2], X86CPU), @@ -895,46 +849,45 @@ VMStateDescription vmstate_x86_cpu = { VMSTATE_UINT64(env.fmask, X86CPU), VMSTATE_UINT64(env.kernelgsbase, X86CPU), #endif - VMSTATE_UINT32_V(env.smbase, X86CPU, 4), - - VMSTATE_UINT64_V(env.pat, X86CPU, 5), - VMSTATE_UINT32_V(env.hflags2, X86CPU, 5), - - VMSTATE_UINT32_TEST(parent_obj.halted, X86CPU, version_is_5), - VMSTATE_UINT64_V(env.vm_hsave, X86CPU, 5), - VMSTATE_UINT64_V(env.vm_vmcb, X86CPU, 5), - VMSTATE_UINT64_V(env.tsc_offset, X86CPU, 5), - VMSTATE_UINT64_V(env.intercept, X86CPU, 5), - VMSTATE_UINT16_V(env.intercept_cr_read, X86CPU, 5), - VMSTATE_UINT16_V(env.intercept_cr_write, X86CPU, 5), - VMSTATE_UINT16_V(env.intercept_dr_read, X86CPU, 5), - VMSTATE_UINT16_V(env.intercept_dr_write, X86CPU, 5), - VMSTATE_UINT32_V(env.intercept_exceptions, X86CPU, 5), - VMSTATE_UINT8_V(env.v_tpr, X86CPU, 5), + VMSTATE_UINT32(env.smbase, X86CPU), + + VMSTATE_UINT64(env.pat, X86CPU), + VMSTATE_UINT32(env.hflags2, X86CPU), + + VMSTATE_UINT64(env.vm_hsave, X86CPU), + VMSTATE_UINT64(env.vm_vmcb, X86CPU), + VMSTATE_UINT64(env.tsc_offset, X86CPU), + VMSTATE_UINT64(env.intercept, X86CPU), + VMSTATE_UINT16(env.intercept_cr_read, X86CPU), + VMSTATE_UINT16(env.intercept_cr_write, X86CPU), + VMSTATE_UINT16(env.intercept_dr_read, X86CPU), + VMSTATE_UINT16(env.intercept_dr_write, X86CPU), + VMSTATE_UINT32(env.intercept_exceptions, X86CPU), + VMSTATE_UINT8(env.v_tpr, X86CPU), /* MTRRs */ - VMSTATE_UINT64_ARRAY_V(env.mtrr_fixed, X86CPU, 11, 8), - VMSTATE_UINT64_V(env.mtrr_deftype, X86CPU, 8), + VMSTATE_UINT64_ARRAY(env.mtrr_fixed, X86CPU, 11), + VMSTATE_UINT64(env.mtrr_deftype, X86CPU), VMSTATE_MTRR_VARS(env.mtrr_var, X86CPU, MSR_MTRRcap_VCNT, 8), /* KVM-related states */ - VMSTATE_INT32_V(env.interrupt_injected, X86CPU, 9), - VMSTATE_UINT32_V(env.mp_state, X86CPU, 9), - VMSTATE_UINT64_V(env.tsc, X86CPU, 9), - VMSTATE_INT32_V(env.exception_injected, X86CPU, 11), - VMSTATE_UINT8_V(env.soft_interrupt, X86CPU, 11), - VMSTATE_UINT8_V(env.nmi_injected, X86CPU, 11), - VMSTATE_UINT8_V(env.nmi_pending, X86CPU, 11), - VMSTATE_UINT8_V(env.has_error_code, X86CPU, 11), - VMSTATE_UINT32_V(env.sipi_vector, X86CPU, 11), + VMSTATE_INT32(env.interrupt_injected, X86CPU), + VMSTATE_UINT32(env.mp_state, X86CPU), + VMSTATE_UINT64(env.tsc, X86CPU), + VMSTATE_INT32(env.exception_injected, X86CPU), + VMSTATE_UINT8(env.soft_interrupt, X86CPU), + VMSTATE_UINT8(env.nmi_injected, X86CPU), + VMSTATE_UINT8(env.nmi_pending, X86CPU), + VMSTATE_UINT8(env.has_error_code, X86CPU), + VMSTATE_UINT32(env.sipi_vector, X86CPU), /* MCE */ - VMSTATE_UINT64_V(env.mcg_cap, X86CPU, 10), - VMSTATE_UINT64_V(env.mcg_status, X86CPU, 10), - VMSTATE_UINT64_V(env.mcg_ctl, X86CPU, 10), - VMSTATE_UINT64_ARRAY_V(env.mce_banks, X86CPU, MCE_BANKS_DEF * 4, 10), + VMSTATE_UINT64(env.mcg_cap, X86CPU), + VMSTATE_UINT64(env.mcg_status, X86CPU), + VMSTATE_UINT64(env.mcg_ctl, X86CPU), + VMSTATE_UINT64_ARRAY(env.mce_banks, X86CPU, MCE_BANKS_DEF * 4), /* rdtscp */ - VMSTATE_UINT64_V(env.tsc_aux, X86CPU, 11), + VMSTATE_UINT64(env.tsc_aux, X86CPU), /* KVM pvclock msr */ - VMSTATE_UINT64_V(env.system_time_msr, X86CPU, 11), - VMSTATE_UINT64_V(env.wall_clock_msr, X86CPU, 11), + VMSTATE_UINT64(env.system_time_msr, X86CPU), + VMSTATE_UINT64(env.wall_clock_msr, X86CPU), /* XSAVE related fields */ VMSTATE_UINT64_V(env.xcr0, X86CPU, 12), VMSTATE_UINT64_V(env.xstate_bv, X86CPU, 12), -- cgit v1.2.3