diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 55 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 26 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs_64.c | 33 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 93 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 200 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd.c | 357 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 41 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 57 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/threshold.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/amd.c | 421 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/core.c | 83 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/intel.c | 834 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/intel_lib.c | 184 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/scattered.c | 57 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/vmware.c | 86 |
19 files changed, 1285 insertions, 1252 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4a8697f7d4ef..33b63670bf09 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -20,13 +20,11 @@ obj-y := intel_cacheinfo.o scattered.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o +obj-y += bugs.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_X86_32) += bugs.o -obj-$(CONFIG_X86_64) += bugs_64.o - obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b81fe2d63e15..71cae73a5076 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -20,6 +20,10 @@ #include "cpu.h" +static const int amd_erratum_383[]; +static const int amd_erratum_400[]; +static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); + /* * nodes_per_socket: Stores the number of nodes per socket. * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX @@ -314,11 +318,30 @@ static void amd_get_topology(struct cpuinfo_x86 *c) smp_num_siblings = ((ebx >> 8) & 3) + 1; c->x86_max_cores /= smp_num_siblings; c->cpu_core_id = ebx & 0xff; + + /* + * We may have multiple LLCs if L3 caches exist, so check if we + * have an L3 cache by looking at the L3 cache CPUID leaf. + */ + if (cpuid_edx(0x80000006)) { + if (c->x86 == 0x17) { + /* + * LLC is at the core complex level. + * Core complex id is ApicId[3]. + */ + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; + } else { + /* LLC is at the node level. */ + per_cpu(cpu_llc_id, cpu) = node_id; + } + } } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); node_id = value & 7; + + per_cpu(cpu_llc_id, cpu) = node_id; } else return; @@ -329,9 +352,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_AMD_DCM); cus_per_node = c->x86_max_cores / nodes_per_socket; - /* store NodeID, use llc_shared_map to store sibling info */ - per_cpu(cpu_llc_id, cpu) = node_id; - /* core id has to be in the [0 .. cores_per_node - 1] range */ c->cpu_core_id %= cus_per_node; } @@ -347,7 +367,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP unsigned bits; int cpu = smp_processor_id(); - unsigned int socket_id, core_complex_id; bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ @@ -357,18 +376,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) /* use socket ID also for last level cache */ per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; amd_get_topology(c); - - /* - * Fix percpu cpu_llc_id here as LLC topology is different - * for Fam17h systems. - */ - if (c->x86 != 0x17 || !cpuid_edx(0x80000006)) - return; - - socket_id = (c->apicid >> bits) - 1; - core_complex_id = (c->apicid & ((1 << bits) - 1)) >> 3; - - per_cpu(cpu_llc_id, cpu) = (socket_id << 3) | core_complex_id; #endif } @@ -589,11 +596,16 @@ static void early_init_amd(struct cpuinfo_x86 *c) /* F16h erratum 793, CVE-2013-6885 */ if (c->x86 == 0x16 && c->x86_model <= 0xf) msr_set_bit(MSR_AMD64_LS_CFG, 15); -} -static const int amd_erratum_383[]; -static const int amd_erratum_400[]; -static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); + /* + * Check whether the machine is affected by erratum 400. This is + * used to select the proper idle routine and to enable the check + * whether the machine is affected in arch_post_acpi_init(), which + * sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check. + */ + if (cpu_has_amd_erratum(c, amd_erratum_400)) + set_cpu_bug(c, X86_BUG_AMD_E400); +} static void init_amd_k8(struct cpuinfo_x86 *c) { @@ -774,9 +786,6 @@ static void init_amd(struct cpuinfo_x86 *c) if (c->x86 > 0x11) set_cpu_cap(c, X86_FEATURE_ARAT); - if (cpu_has_amd_erratum(c, amd_erratum_400)) - set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); - rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); /* 3DNow or LM implies PREFETCHW */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bd17db15a2c1..a44ef52184df 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -16,15 +16,19 @@ #include <asm/msr.h> #include <asm/paravirt.h> #include <asm/alternative.h> +#include <asm/pgtable.h> +#include <asm/cacheflush.h> void __init check_bugs(void) { identify_boot_cpu(); -#ifndef CONFIG_SMP - pr_info("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif + if (!IS_ENABLED(CONFIG_SMP)) { + pr_info("CPU: "); + print_cpu_info(&boot_cpu_data); + } + +#ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. * @@ -40,4 +44,18 @@ void __init check_bugs(void) alternative_instructions(); fpu__init_check_bugs(); +#else /* CONFIG_X86_64 */ + alternative_instructions(); + + /* + * Make sure the first 2MB area is not mapped by huge pages + * There are typically fixed size MTRRs in there and overlapping + * MTRRs into large pages causes slow downs. + * + * Right now we don't do that with gbpages because there seems + * very little benefit for that case. + */ + if (!direct_gbpages) + set_memory_4k((unsigned long)__va(0), 1); +#endif } diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c deleted file mode 100644 index a972ac4c7e7d..000000000000 --- a/arch/x86/kernel/cpu/bugs_64.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * Copyright (C) 2000 SuSE - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <asm/alternative.h> -#include <asm/bugs.h> -#include <asm/processor.h> -#include <asm/mtrr.h> -#include <asm/cacheflush.h> - -void __init check_bugs(void) -{ - identify_boot_cpu(); -#if !defined(CONFIG_SMP) - pr_info("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - alternative_instructions(); - - /* - * Make sure the first 2MB area is not mapped by huge pages - * There are typically fixed size MTRRs in there and overlapping - * MTRRs into large pages causes slow downs. - * - * Right now we don't do that with gbpages because there seems - * very little benefit for that case. - */ - if (!direct_gbpages) - set_memory_4k((unsigned long)__va(0), 1); -} diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9bd910a7dd0a..729f92ba8224 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -979,6 +979,35 @@ static void x86_init_cache_qos(struct cpuinfo_x86 *c) } /* + * The physical to logical package id mapping is initialized from the + * acpi/mptables information. Make sure that CPUID actually agrees with + * that. + */ +static void sanitize_package_id(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned int pkg, apicid, cpu = smp_processor_id(); + + apicid = apic->cpu_present_to_apicid(cpu); + pkg = apicid >> boot_cpu_data.x86_coreid_bits; + + if (apicid != c->initial_apicid) { + pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n", + cpu, apicid, c->initial_apicid); + c->initial_apicid = apicid; + } + if (pkg != c->phys_proc_id) { + pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n", + cpu, pkg, c->phys_proc_id); + c->phys_proc_id = pkg; + } + c->logical_proc_id = topology_phys_to_logical_pkg(pkg); +#else + c->logical_proc_id = 0; +#endif +} + +/* * This does the hard work of actually picking apart the CPU stuff... */ static void identify_cpu(struct cpuinfo_x86 *c) @@ -1103,8 +1132,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif - /* The boot/hotplug time assigment got cleared, restore it */ - c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id); + sanitize_package_id(c); } /* @@ -1144,7 +1172,6 @@ void enable_sep_cpu(void) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); - init_amd_e400_c1e_mask(); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); @@ -1162,51 +1189,6 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } -struct msr_range { - unsigned min; - unsigned max; -}; - -static const struct msr_range msr_range_array[] = { - { 0x00000000, 0x00000418}, - { 0xc0000000, 0xc000040b}, - { 0xc0010000, 0xc0010142}, - { 0xc0011000, 0xc001103b}, -}; - -static void __print_cpu_msr(void) -{ - unsigned index_min, index_max; - unsigned index; - u64 val; - int i; - - for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { - index_min = msr_range_array[i].min; - index_max = msr_range_array[i].max; - - for (index = index_min; index < index_max; index++) { - if (rdmsrl_safe(index, &val)) - continue; - pr_info(" MSR%08x: %016llx\n", index, val); - } - } -} - -static int show_msr; - -static __init int setup_show_msr(char *arg) -{ - int num; - - get_option(&arg, &num); - - if (num > 0) - show_msr = num; - return 1; -} -__setup("show_msr=", setup_show_msr); - static __init int setup_noclflush(char *arg) { setup_clear_cpu_cap(X86_FEATURE_CLFLUSH); @@ -1240,14 +1222,6 @@ void print_cpu_info(struct cpuinfo_x86 *c) pr_cont(", stepping: 0x%x)\n", c->x86_mask); else pr_cont(")\n"); - - print_cpu_msr(c); -} - -void print_cpu_msr(struct cpuinfo_x86 *c) -{ - if (c->cpu_index < show_msr) - __print_cpu_msr(); } static __init int setup_disablecpuid(char *arg) @@ -1462,11 +1436,8 @@ void cpu_init(void) */ cr4_init_shadow(); - /* - * Load microcode on this cpu if a valid microcode is available. - * This is early microcode loading procedure. - */ - load_ucode_ap(); + if (cpu) + load_ucode_ap(); t = &per_cpu(cpu_tss, cpu); oist = &per_cpu(orig_ist, cpu); diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 631356c8cca4..c7efbcfbeda6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -311,7 +311,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e *msg = s->msg; s->covered = 1; if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { - if (panic_on_oops || tolerant < 1) + if (tolerant < 1) return MCE_PANIC_SEVERITY; } return s->sev; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a7fdf453d895..132e1ec67da0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -43,6 +43,7 @@ #include <linux/export.h> #include <linux/jump_label.h> +#include <asm/intel-family.h> #include <asm/processor.h> #include <asm/traps.h> #include <asm/tlbflush.h> @@ -135,6 +136,9 @@ void mce_setup(struct mce *m) m->socketid = cpu_data(m->extcpu).phys_proc_id; m->apicid = cpu_data(m->extcpu).initial_apicid; rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); + + if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) + rdmsrl(MSR_PPIN, m->ppin); } DEFINE_PER_CPU(struct mce, injectm); @@ -207,8 +211,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log); static struct notifier_block mce_srao_nb; +static atomic_t num_notifiers; + void mce_register_decode_chain(struct notifier_block *nb) { + atomic_inc(&num_notifiers); + /* Ensure SRAO notifier has the highest priority in the decode chain. */ if (nb != &mce_srao_nb && nb->priority == INT_MAX) nb->priority -= 1; @@ -219,6 +227,8 @@ EXPORT_SYMBOL_GPL(mce_register_decode_chain); void mce_unregister_decode_chain(struct notifier_block *nb) { + atomic_dec(&num_notifiers); + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); } EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); @@ -270,17 +280,17 @@ struct mca_msr_regs msr_ops = { .misc = misc_reg }; -static void print_mce(struct mce *m) +static void __print_mce(struct mce *m) { - int ret = 0; - - pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", - m->extcpu, m->mcgstatus, m->bank, m->status); + pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n", + m->extcpu, + (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""), + m->mcgstatus, m->bank, m->status); if (m->ip) { pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->ip); + m->cs, m->ip); if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); @@ -308,6 +318,13 @@ static void print_mce(struct mce *m) pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, cpu_data(m->extcpu).microcode); +} + +static void print_mce(struct mce *m) +{ + int ret = 0; + + __print_mce(m); /* * Print out human-readable details about the MCE error, @@ -569,6 +586,32 @@ static struct notifier_block mce_srao_nb = { .priority = INT_MAX, }; +static int mce_default_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *m = (struct mce *)data; + + if (!m) + return NOTIFY_DONE; + + /* + * Run the default notifier if we have only the SRAO + * notifier and us registered. + */ + if (atomic_read(&num_notifiers) > 2) + return NOTIFY_DONE; + + __print_mce(m); + + return NOTIFY_DONE; +} + +static struct notifier_block mce_default_nb = { + .notifier_call = mce_default_notifier, + /* lowest prio, we want it to run last. */ + .priority = 0, +}; + /* * Read ADDR and MISC registers. */ @@ -667,6 +710,15 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) mce_gather_info(&m, NULL); + /* + * m.tsc was set in mce_setup(). Clear it if not requested. + * + * FIXME: Propagate @flags to mce_gather_info/mce_setup() to avoid + * that dance. + */ + if (!(flags & MCP_TIMESTAMP)) + m.tsc = 0; + for (i = 0; i < mca_cfg.banks; i++) { if (!mce_banks[i].ctl || !test_bit(i, *b)) continue; @@ -674,14 +726,12 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) m.misc = 0; m.addr = 0; m.bank = i; - m.tsc = 0; barrier(); m.status = mce_rdmsrl(msr_ops.status(i)); if (!(m.status & MCI_STATUS_VAL)) continue; - /* * Uncorrected or signalled events are handled by the exception * handler when it is enabled, so don't process those here. @@ -696,9 +746,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) mce_read_aux(&m, i); - if (!(flags & MCP_TIMESTAMP)) - m.tsc = 0; - severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) @@ -1355,7 +1402,7 @@ static void mce_timer_fn(unsigned long data) iv = __this_cpu_read(mce_next_interval); if (mce_available(this_cpu_ptr(&cpu_info))) { - machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks)); + machine_check_poll(0, this_cpu_ptr(&mce_poll_banks)); if (mce_intel_cmci_poll()) { iv = mce_adjust_timer(iv); @@ -1745,6 +1792,14 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) add_timer_on(t, cpu); } +static void __mcheck_cpu_setup_timer(void) +{ + struct timer_list *t = this_cpu_ptr(&mce_timer); + unsigned int cpu = smp_processor_id(); + + setup_pinned_timer(t, mce_timer_fn, cpu); +} + static void __mcheck_cpu_init_timer(void) { struct timer_list *t = this_cpu_ptr(&mce_timer); @@ -1796,7 +1851,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_clear_banks(); - __mcheck_cpu_init_timer(); + __mcheck_cpu_setup_timer(); } /* @@ -2138,6 +2193,7 @@ int __init mcheck_init(void) { mcheck_intel_therm_init(); mce_register_decode_chain(&mce_srao_nb); + mce_register_decode_chain(&mce_default_nb); mcheck_vendor_init_severity(); INIT_WORK(&mce_work, mce_process_work); @@ -2255,8 +2311,6 @@ static struct bus_type mce_subsys = { DEFINE_PER_CPU(struct device *, mce_device); -void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); - static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) { return container_of(attr, struct mce_bank, attr); @@ -2409,6 +2463,10 @@ static int mce_device_create(unsigned int cpu) if (!mce_available(&boot_cpu_data)) return -EIO; + dev = per_cpu(mce_device, cpu); + if (dev) + return 0; + dev = kzalloc(sizeof *dev, GFP_KERNEL); if (!dev) return -ENOMEM; @@ -2468,28 +2526,25 @@ static void mce_device_remove(unsigned int cpu) } /* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) +static void mce_disable_cpu(void) { - unsigned long action = *(unsigned long *)h; - if (!mce_available(raw_cpu_ptr(&cpu_info))) return; - if (!(action & CPU_TASKS_FROZEN)) + if (!cpuhp_tasks_frozen) cmci_clear(); vendor_disable_error_reporting(); } -static void mce_reenable_cpu(void *h) +static void mce_reenable_cpu(void) { - unsigned long action = *(unsigned long *)h; int i; if (!mce_available(raw_cpu_ptr(&cpu_info))) return; - if (!(action & CPU_TASKS_FROZEN)) + if (!cpuhp_tasks_frozen) cmci_reenable(); for (i = 0; i < mca_cfg.banks; i++) { struct mce_bank *b = &mce_banks[i]; @@ -2499,45 +2554,43 @@ static void mce_reenable_cpu(void *h) } } -/* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static int -mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +static int mce_cpu_dead(unsigned int cpu) +{ + mce_intel_hcpu_update(cpu); + + /* intentionally ignoring frozen here */ + if (!cpuhp_tasks_frozen) + cmci_rediscover(); + return 0; +} + +static int mce_cpu_online(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; struct timer_list *t = &per_cpu(mce_timer, cpu); + int ret; - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - mce_device_create(cpu); - if (threshold_cpu_callback) - threshold_cpu_callback(action, cpu); - break; - case CPU_DEAD: - if (threshold_cpu_callback) - threshold_cpu_callback(action, cpu); - mce_device_remove(cpu); - mce_intel_hcpu_update(cpu); + mce_device_create(cpu); - /* intentionally ignoring frozen here */ - if (!(action & CPU_TASKS_FROZEN)) - cmci_rediscover(); - break; - case CPU_DOWN_PREPARE: - smp_call_function_single(cpu, mce_disable_cpu, &action, 1); - del_timer_sync(t); - break; - case CPU_DOWN_FAILED: - smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); - mce_start_timer(cpu, t); - break; + ret = mce_threshold_create_device(cpu); + if (ret) { + mce_device_remove(cpu); + return ret; } - - return NOTIFY_OK; + mce_reenable_cpu(); + mce_start_timer(cpu, t); + return 0; } -static struct notifier_block mce_cpu_notifier = { - .notifier_call = mce_cpu_callback, -}; +static int mce_cpu_pre_down(unsigned int cpu) +{ + struct timer_list *t = &per_cpu(mce_timer, cpu); + + mce_disable_cpu(); + del_timer_sync(t); + mce_threshold_remove_device(cpu); + mce_device_remove(cpu); + return 0; +} static __init void mce_init_banks(void) { @@ -2559,8 +2612,8 @@ static __init void mce_init_banks(void) static __init int mcheck_init_device(void) { + enum cpuhp_state hp_online; int err; - int i = 0; if (!mce_available(&boot_cpu_data)) { err = -EIO; @@ -2578,23 +2631,16 @@ static __init int mcheck_init_device(void) if (err) goto err_out_mem; - cpu_notifier_register_begin(); - for_each_online_cpu(i) { - err = mce_device_create(i); - if (err) { - /* - * Register notifier anyway (and do not unreg it) so - * that we don't leave undeleted timers, see notifier - * callback above. - */ - __register_hotcpu_notifier(&mce_cpu_notifier); - cpu_notifier_register_done(); - goto err_device_create; - } - } + err = cpuhp_setup_state(CPUHP_X86_MCE_DEAD, "x86/mce:dead", NULL, + mce_cpu_dead); + if (err) + goto err_out_mem; - __register_hotcpu_notifier(&mce_cpu_notifier); - cpu_notifier_register_done(); + err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online", + mce_cpu_online, mce_cpu_pre_down); + if (err < 0) + goto err_out_online; + hp_online = err; register_syscore_ops(&mce_syscore_ops); @@ -2607,16 +2653,10 @@ static __init int mcheck_init_device(void) err_register: unregister_syscore_ops(&mce_syscore_ops); + cpuhp_remove_state(hp_online); -err_device_create: - /* - * We didn't keep track of which devices were created above, but - * even if we had, the set of online cpus might have changed. - * Play safe and remove for every possible cpu, since - * mce_device_remove() will do the right thing. - */ - for_each_possible_cpu(i) - mce_device_remove(i); +err_out_online: + cpuhp_remove_state(CPUHP_X86_MCE_DEAD); err_out_mem: free_cpumask_var(mce_device_initialized); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9b5403462936..ffacfdcacb85 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -24,7 +24,6 @@ #include <asm/amd_nb.h> #include <asm/apic.h> -#include <asm/idle.h> #include <asm/mce.h> #include <asm/msr.h> #include <asm/trace/irq_vectors.h> @@ -55,6 +54,8 @@ /* Threshold LVT offset is at MSR0xC0000410[15:12] */ #define SMCA_THR_LVT_OFF 0xF000 +static bool thresholding_en; + static const char * const th_names[] = { "load_store", "insn_fetch", @@ -69,7 +70,12 @@ static const char * const smca_umc_block_names[] = { "misc_umc" }; -struct smca_bank_name smca_bank_names[] = { +struct smca_bank_name { + const char *name; /* Short name for sysfs */ + const char *long_name; /* Long name for pretty-printing */ +}; + +static struct smca_bank_name smca_names[] = { [SMCA_LS] = { "load_store", "Load Store Unit" }, [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, @@ -84,9 +90,25 @@ struct smca_bank_name smca_bank_names[] = { [SMCA_PSP] = { "psp", "Platform Security Processor" }, [SMCA_SMU] = { "smu", "System Management Unit" }, }; -EXPORT_SYMBOL_GPL(smca_bank_names); -static struct smca_hwid_mcatype smca_hwid_mcatypes[] = { +const char *smca_get_name(enum smca_bank_types t) +{ + if (t >= N_SMCA_BANK_TYPES) + return NULL; + + return smca_names[t].name; +} + +const char *smca_get_long_name(enum smca_bank_types t) +{ + if (t >= N_SMCA_BANK_TYPES) + return NULL; + + return smca_names[t].long_name; +} +EXPORT_SYMBOL_GPL(smca_get_long_name); + +static struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype, xec_bitmap } */ /* ZN Core (HWID=0xB0) MCA types */ @@ -116,7 +138,7 @@ static struct smca_hwid_mcatype smca_hwid_mcatypes[] = { { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 }, }; -struct smca_bank_info smca_banks[MAX_NR_BANKS]; +struct smca_bank smca_banks[MAX_NR_BANKS]; EXPORT_SYMBOL_GPL(smca_banks); /* @@ -142,35 +164,34 @@ static void default_deferred_error_interrupt(void) } void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; -/* - * CPU Initialization - */ - static void get_smca_bank_info(unsigned int bank) { unsigned int i, hwid_mcatype, cpu = smp_processor_id(); - struct smca_hwid_mcatype *type; - u32 high, instanceId; - u16 hwid, mcatype; + struct smca_hwid *s_hwid; + u32 high, instance_id; /* Collect bank_info using CPU 0 for now. */ if (cpu) return; - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) { + if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instance_id, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); return; } - hwid = high & MCI_IPID_HWID; - mcatype = (high & MCI_IPID_MCATYPE) >> 16; - hwid_mcatype = HWID_MCATYPE(hwid, mcatype); + hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID, + (high & MCI_IPID_MCATYPE) >> 16); for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { - type = &smca_hwid_mcatypes[i]; - if (hwid_mcatype == type->hwid_mcatype) { - smca_banks[bank].type = type; - smca_banks[bank].type_instance = instanceId; + s_hwid = &smca_hwid_mcatypes[i]; + if (hwid_mcatype == s_hwid->hwid_mcatype) { + + WARN(smca_banks[bank].hwid, + "Bank %s already initialized!\n", + smca_get_name(s_hwid->bank_type)); + + smca_banks[bank].hwid = s_hwid; + smca_banks[bank].id = instance_id; break; } } @@ -533,6 +554,206 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) deferred_error_interrupt_enable(c); } +int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) +{ + u64 dram_base_addr, dram_limit_addr, dram_hole_base; + /* We start from the normalized address */ + u64 ret_addr = norm_addr; + + u32 tmp; + + u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; + u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; + u8 intlv_addr_sel, intlv_addr_bit; + u8 num_intlv_bits, hashed_bit; + u8 lgcy_mmio_hole_en, base = 0; + u8 cs_mask, cs_id = 0; + bool hash_enabled = false; + + /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ + if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp)) + goto out_err; + + /* Remove HiAddrOffset from normalized address, if enabled: */ + if (tmp & BIT(0)) { + u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8; + + if (norm_addr >= hi_addr_offset) { + ret_addr -= hi_addr_offset; + base = 1; + } + } + + /* Read D18F0x110 (DramBaseAddress). */ + if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp)) + goto out_err; + + /* Check if address range is valid. */ + if (!(tmp & BIT(0))) { + pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", + __func__, tmp); + goto out_err; + } + + lgcy_mmio_hole_en = tmp & BIT(1); + intlv_num_chan = (tmp >> 4) & 0xF; + intlv_addr_sel = (tmp >> 8) & 0x7; + dram_base_addr = (tmp & GENMASK_ULL(31, 12)) << 16; + + /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ + if (intlv_addr_sel > 3) { + pr_err("%s: Invalid interleave address select %d.\n", + __func__, intlv_addr_sel); + goto out_err; + } + + /* Read D18F0x114 (DramLimitAddress). */ + if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp)) + goto out_err; + + intlv_num_sockets = (tmp >> 8) & 0x1; + intlv_num_dies = (tmp >> 10) & 0x3; + dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); + + intlv_addr_bit = intlv_addr_sel + 8; + + /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ + switch (intlv_num_chan) { + case 0: intlv_num_chan = 0; break; + case 1: intlv_num_chan = 1; break; + case 3: intlv_num_chan = 2; break; + case 5: intlv_num_chan = 3; break; + case 7: intlv_num_chan = 4; break; + + case 8: intlv_num_chan = 1; + hash_enabled = true; + break; + default: + pr_err("%s: Invalid number of interleaved channels %d.\n", + __func__, intlv_num_chan); + goto out_err; + } + + num_intlv_bits = intlv_num_chan; + + if (intlv_num_dies > 2) { + pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", + __func__, intlv_num_dies); + goto out_err; + } + + num_intlv_bits += intlv_num_dies; + + /* Add a bit if sockets are interleaved. */ + num_intlv_bits += intlv_num_sockets; + + /* Assert num_intlv_bits <= 4 */ + if (num_intlv_bits > 4) { + pr_err("%s: Invalid interleave bits %d.\n", + __func__, num_intlv_bits); + goto out_err; + } + + if (num_intlv_bits > 0) { + u64 temp_addr_x, temp_addr_i, temp_addr_y; + u8 die_id_bit, sock_id_bit, cs_fabric_id; + + /* + * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. + * This is the fabric id for this coherent slave. Use + * umc/channel# as instance id of the coherent slave + * for FICAA. + */ + if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp)) + goto out_err; + + cs_fabric_id = (tmp >> 8) & 0xFF; + die_id_bit = 0; + + /* If interleaved over more than 1 channel: */ + if (intlv_num_chan) { + die_id_bit = intlv_num_chan; + cs_mask = (1 << die_id_bit) - 1; + cs_id = cs_fabric_id & cs_mask; + } + + sock_id_bit = die_id_bit; + + /* Read D18F1x208 (SystemFabricIdMask). */ + if (intlv_num_dies || intlv_num_sockets) + if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp)) + goto out_err; + + /* If interleaved over more than 1 die. */ + if (intlv_num_dies) { + sock_id_bit = die_id_bit + intlv_num_dies; + die_id_shift = (tmp >> 24) & 0xF; + die_id_mask = (tmp >> 8) & 0xFF; + + cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; + } + + /* If interleaved over more than 1 socket. */ + if (intlv_num_sockets) { + socket_id_shift = (tmp >> 28) & 0xF; + socket_id_mask = (tmp >> 16) & 0xFF; + + cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; + } + + /* + * The pre-interleaved address consists of XXXXXXIIIYYYYY + * where III is the ID for this CS, and XXXXXXYYYYY are the + * address bits from the post-interleaved address. + * "num_intlv_bits" has been calculated to tell us how many "I" + * bits there are. "intlv_addr_bit" tells us how many "Y" bits + * there are (where "I" starts). + */ + temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0); + temp_addr_i = (cs_id << intlv_addr_bit); + temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; + ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; + } + + /* Add dram base address */ + ret_addr += dram_base_addr; + + /* If legacy MMIO hole enabled */ + if (lgcy_mmio_hole_en) { + if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp)) + goto out_err; + + dram_hole_base = tmp & GENMASK(31, 24); + if (ret_addr >= dram_hole_base) + ret_addr += (BIT_ULL(32) - dram_hole_base); + } + + if (hash_enabled) { + /* Save some parentheses and grab ls-bit at the end. */ + hashed_bit = (ret_addr >> 12) ^ + (ret_addr >> 18) ^ + (ret_addr >> 21) ^ + (ret_addr >> 30) ^ + cs_id; + + hashed_bit &= BIT(0); + + if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0))) + ret_addr ^= BIT(intlv_addr_bit); + } + + /* Is calculated system address is above DRAM limit address? */ + if (ret_addr > dram_limit_addr) + goto out_err; + + *sys_addr = ret_addr; + return 0; + +out_err: + return -EINVAL; +} +EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); + static void __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) { @@ -645,6 +866,7 @@ static void amd_threshold_interrupt(void) { u32 low = 0, high = 0, address = 0; unsigned int bank, block, cpu = smp_processor_id(); + struct thresh_restart tr; /* assume first bank caused it */ for (bank = 0; bank < mca_cfg.banks; ++bank) { @@ -681,6 +903,11 @@ static void amd_threshold_interrupt(void) log: __log_error(bank, false, true, ((u64)high << 32) | low); + + /* Reset threshold block after logging error. */ + memset(&tr, 0, sizeof(tr)); + tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block]; + threshold_restart_bank(&tr); } /* @@ -826,10 +1053,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return th_names[bank]; } - if (!smca_banks[bank].type) + if (!smca_banks[bank].hwid) return NULL; - bank_type = smca_banks[bank].type->bank_type; + bank_type = smca_banks[bank].hwid->bank_type; if (b && bank_type == SMCA_UMC) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) @@ -838,8 +1065,8 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) } snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, - "%s_%x", smca_bank_names[bank_type].name, - smca_banks[bank].type_instance); + "%s_%x", smca_get_name(bank_type), + smca_banks[bank].id); return buf_mcatype; } @@ -1010,31 +1237,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) return err; } -/* create dir/files for all valid threshold banks */ -static int threshold_create_device(unsigned int cpu) -{ - unsigned int bank; - struct threshold_bank **bp; - int err = 0; - - bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks, - GFP_KERNEL); - if (!bp) - return -ENOMEM; - - per_cpu(threshold_banks, cpu) = bp; - - for (bank = 0; bank < mca_cfg.banks; ++bank) { - if (!(per_cpu(bank_map, cpu) & (1 << bank))) - continue; - err = threshold_create_bank(cpu, bank); - if (err) - return err; - } - - return err; -} - static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) { @@ -1102,48 +1304,71 @@ free_out: per_cpu(threshold_banks, cpu)[bank] = NULL; } -static void threshold_remove_device(unsigned int cpu) +int mce_threshold_remove_device(unsigned int cpu) { unsigned int bank; + if (!thresholding_en) + return 0; + for (bank = 0; bank < mca_cfg.banks; ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; threshold_remove_bank(cpu, bank); } kfree(per_cpu(threshold_banks, cpu)); + per_cpu(threshold_banks, cpu) = NULL; + return 0; } -/* get notified when a cpu comes on/off */ -static void -amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu) +/* create dir/files for all valid threshold banks */ +int mce_threshold_create_device(unsigned int cpu) { - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - threshold_create_device(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - threshold_remove_device(cpu); - break; - default: - break; + unsigned int bank; + struct threshold_bank **bp; + int err = 0; + + if (!thresholding_en) + return 0; + + bp = per_cpu(threshold_banks, cpu); + if (bp) + return 0; + + bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks, + GFP_KERNEL); + if (!bp) + return -ENOMEM; + + per_cpu(threshold_banks, cpu) = bp; + + for (bank = 0; bank < mca_cfg.banks; ++bank) { + if (!(per_cpu(bank_map, cpu) & (1 << bank))) + continue; + err = threshold_create_bank(cpu, bank); + if (err) + goto err; } + return err; +err: + mce_threshold_remove_device(cpu); + return err; } static __init int threshold_init_device(void) { unsigned lcpu = 0; + if (mce_threshold_vector == amd_threshold_interrupt) + thresholding_en = true; + /* to hit CPUs online before the notifier is up */ for_each_online_cpu(lcpu) { - int err = threshold_create_device(lcpu); + int err = mce_threshold_create_device(lcpu); if (err) return err; } - threshold_cpu_callback = amd_64_threshold_cpu_callback; return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 1defb8ea882c..190b3e6cef4d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -11,6 +11,8 @@ #include <linux/sched.h> #include <linux/cpumask.h> #include <asm/apic.h> +#include <asm/cpufeature.h> +#include <asm/intel-family.h> #include <asm/processor.h> #include <asm/msr.h> #include <asm/mce.h> @@ -130,7 +132,7 @@ bool mce_intel_cmci_poll(void) * Reset the counter if we've logged an error in the last poll * during the storm. */ - if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned))) + if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned))) this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); else this_cpu_dec(cmci_backoff_cnt); @@ -342,7 +344,7 @@ void cmci_recheck(void) return; local_irq_save(flags); - machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); + machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)); local_irq_restore(flags); } @@ -464,11 +466,46 @@ static void intel_clear_lmce(void) wrmsrl(MSR_IA32_MCG_EXT_CTL, val); } +static void intel_ppin_init(struct cpuinfo_x86 *c) +{ + unsigned long long val; + + /* + * Even if testing the presence of the MSR would be enough, we don't + * want to risk the situation where other models reuse this MSR for + * other purposes. + */ + switch (c->x86_model) { + case INTEL_FAM6_IVYBRIDGE_X: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_X: + case INTEL_FAM6_SKYLAKE_X: + if (rdmsrl_safe(MSR_PPIN_CTL, &val)) + return; + + if ((val & 3UL) == 1UL) { + /* PPIN available but disabled: */ + return; + } + + /* If PPIN is disabled, but not locked, try to enable: */ + if (!(val & 3UL)) { + wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); + rdmsrl_safe(MSR_PPIN_CTL, &val); + } + + if ((val & 3UL) == 2UL) + set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); + } +} + void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); intel_init_cmci(); intel_init_lmce(); + intel_ppin_init(c); } void mce_intel_feature_clear(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 6b9dc4d18ccc..465aca8be009 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -26,7 +26,6 @@ #include <asm/processor.h> #include <asm/apic.h> -#include <asm/idle.h> #include <asm/mce.h> #include <asm/msr.h> #include <asm/trace/irq_vectors.h> @@ -271,58 +270,32 @@ static void thermal_throttle_remove_dev(struct device *dev) } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static int -thermal_throttle_cpu_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static int thermal_throttle_online(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; - struct device *dev; - int err = 0; - - dev = get_cpu_device(cpu); - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - err = thermal_throttle_add_dev(dev, cpu); - WARN_ON(err); - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - case CPU_DEAD_FROZEN: - thermal_throttle_remove_dev(dev); - break; - } - return notifier_from_errno(err); + struct device *dev = get_cpu_device(cpu); + + return thermal_throttle_add_dev(dev, cpu); } -static struct notifier_block thermal_throttle_cpu_notifier = +static int thermal_throttle_offline(unsigned int cpu) { - .notifier_call = thermal_throttle_cpu_callback, -}; + struct device *dev = get_cpu_device(cpu); + + thermal_throttle_remove_dev(dev); + return 0; +} static __init int thermal_throttle_init_device(void) { - unsigned int cpu = 0; - int err; + int ret; if (!atomic_read(&therm_throt_en)) return 0; - cpu_notifier_register_begin(); - - /* connect live CPUs to sysfs */ - for_each_online_cpu(cpu) { - err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu); - WARN_ON(err); - } - - __register_hotcpu_notifier(&thermal_throttle_cpu_notifier); - cpu_notifier_register_done(); - - return 0; + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online", + thermal_throttle_online, + thermal_throttle_offline); + return ret < 0 ? ret : 0; } device_initcall(thermal_throttle_init_device); diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index fcf9ae9384f4..9beb092d68a5 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -6,7 +6,6 @@ #include <asm/irq_vectors.h> #include <asm/apic.h> -#include <asm/idle.h> #include <asm/mce.h> #include <asm/trace/irq_vectors.h> diff --git a/arch/x86/kernel/cpu/microcode/Makefile b/arch/x86/kernel/cpu/microcode/Makefile index 220b1a508513..ba12e8aa4a45 100644 --- a/arch/x86/kernel/cpu/microcode/Makefile +++ b/arch/x86/kernel/cpu/microcode/Makefile @@ -1,4 +1,4 @@ microcode-y := core.o obj-$(CONFIG_MICROCODE) += microcode.o -microcode-$(CONFIG_MICROCODE_INTEL) += intel.o intel_lib.o +microcode-$(CONFIG_MICROCODE_INTEL) += intel.o microcode-$(CONFIG_MICROCODE_AMD) += amd.o diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 017bda12caae..6f353bdb3a25 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -5,6 +5,7 @@ * CPUs and later. * * Copyright (C) 2008-2011 Advanced Micro Devices Inc. + * 2013-2016 Borislav Petkov <bp@alien8.de> * * Author: Peter Oruba <peter.oruba@amd.com> * @@ -39,64 +40,25 @@ static struct equiv_cpu_entry *equiv_cpu_table; -struct ucode_patch { - struct list_head plist; - void *data; - u32 patch_id; - u16 equiv_cpu; -}; - -static LIST_HEAD(pcache); - /* * This points to the current valid container of microcode patches which we will - * save from the initrd before jettisoning its contents. + * save from the initrd/builtin before jettisoning its contents. */ -static u8 *container; -static size_t container_size; -static bool ucode_builtin; +struct container { + u8 *data; + size_t size; +} cont; static u32 ucode_new_rev; static u8 amd_ucode_patch[PATCH_MAX_SIZE]; static u16 this_equiv_id; -static struct cpio_data ucode_cpio; - -static struct cpio_data __init find_ucode_in_initrd(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - char *path; - void *start; - size_t size; - - /* - * Microcode patch container file is prepended to the initrd in cpio - * format. See Documentation/x86/early-microcode.txt - */ - static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin"; - -#ifdef CONFIG_X86_32 - struct boot_params *p; - - /* - * On 32-bit, early load occurs before paging is turned on so we need - * to use physical addresses. - */ - p = (struct boot_params *)__pa_nodebug(&boot_params); - path = (char *)__pa_nodebug(ucode_path); - start = (void *)p->hdr.ramdisk_image; - size = p->hdr.ramdisk_size; -#else - path = ucode_path; - start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET); - size = boot_params.hdr.ramdisk_size; -#endif /* !CONFIG_X86_32 */ - - return find_cpio_data(path, start, size, NULL); -#else - return (struct cpio_data){ NULL, 0, "" }; -#endif -} +/* + * Microcode patch container file is prepended to the initrd in cpio + * format. See Documentation/x86/early-microcode.txt + */ +static const char +ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; static size_t compute_container_size(u8 *data, u32 total_size) { @@ -135,48 +97,48 @@ static size_t compute_container_size(u8 *data, u32 total_size) return size; } +static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, + unsigned int sig) +{ + int i = 0; + + if (!equiv_cpu_table) + return 0; + + while (equiv_cpu_table[i].installed_cpu != 0) { + if (sig == equiv_cpu_table[i].installed_cpu) + return equiv_cpu_table[i].equiv_cpu; + + i++; + } + return 0; +} + /* - * Early load occurs before we can vmalloc(). So we look for the microcode - * patch container file in initrd, traverse equivalent cpu table, look for a - * matching microcode patch, and update, all in initrd memory in place. - * When vmalloc() is available for use later -- on 64-bit during first AP load, - * and on 32-bit during save_microcode_in_initrd_amd() -- we can call - * load_microcode_amd() to save equivalent cpu table and microcode patches in - * kernel heap memory. + * This scans the ucode blob for the proper container as we can have multiple + * containers glued together. */ -static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch) +static struct container +find_proper_container(u8 *ucode, size_t size, u16 *ret_id) { + struct container ret = { NULL, 0 }; + u32 eax, ebx, ecx, edx; struct equiv_cpu_entry *eq; - size_t *cont_sz; - u32 *header; - u8 *data, **cont; - u8 (*patch)[PATCH_MAX_SIZE]; - u16 eq_id = 0; int offset, left; - u32 rev, eax, ebx, ecx, edx; - u32 *new_rev; - -#ifdef CONFIG_X86_32 - new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); - cont_sz = (size_t *)__pa_nodebug(&container_size); - cont = (u8 **)__pa_nodebug(&container); - patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch); -#else - new_rev = &ucode_new_rev; - cont_sz = &container_size; - cont = &container; - patch = &amd_ucode_patch; -#endif + u16 eq_id = 0; + u32 *header; + u8 *data; data = ucode; left = size; header = (u32 *)data; + /* find equiv cpu table */ if (header[0] != UCODE_MAGIC || header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ header[2] == 0) /* size */ - return; + return ret; eax = 0x00000001; ecx = 0; @@ -185,7 +147,7 @@ static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch) while (left > 0) { eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); - *cont = data; + ret.data = data; /* Advance past the container header */ offset = header[2] + CONTAINER_HDR_SZ; @@ -194,15 +156,15 @@ static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch) eq_id = find_equiv_id(eq, eax); if (eq_id) { - this_equiv_id = eq_id; - *cont_sz = compute_container_size(*cont, left + offset); + ret.size = compute_container_size(ret.data, left + offset); /* * truncate how much we need to iterate over in the * ucode update loop below */ - left = *cont_sz - offset; - break; + left = ret.size - offset; + *ret_id = eq_id; + return ret; } /* @@ -212,6 +174,7 @@ static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch) */ while (left > 0) { header = (u32 *)data; + if (header[0] == UCODE_MAGIC && header[1] == UCODE_EQUIV_CPU_TABLE_TYPE) break; @@ -226,14 +189,64 @@ static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch) ucode = data; } - if (!eq_id) { - *cont = NULL; - *cont_sz = 0; - return; - } + return ret; +} + +static int __apply_microcode_amd(struct microcode_amd *mc_amd) +{ + u32 rev, dummy; + + native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); + + /* verify patch application was successful */ + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + if (rev != mc_amd->hdr.patch_id) + return -1; + + return 0; +} + +/* + * Early load occurs before we can vmalloc(). So we look for the microcode + * patch container file in initrd, traverse equivalent cpu table, look for a + * matching microcode patch, and update, all in initrd memory in place. + * When vmalloc() is available for use later -- on 64-bit during first AP load, + * and on 32-bit during save_microcode_in_initrd_amd() -- we can call + * load_microcode_amd() to save equivalent cpu table and microcode patches in + * kernel heap memory. + */ +static struct container +apply_microcode_early_amd(void *ucode, size_t size, bool save_patch) +{ + struct container ret = { NULL, 0 }; + u8 (*patch)[PATCH_MAX_SIZE]; + int offset, left; + u32 rev, *header; + u8 *data; + u16 eq_id = 0; + u32 *new_rev; + +#ifdef CONFIG_X86_32 + new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); + patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch); +#else + new_rev = &ucode_new_rev; + patch = &amd_ucode_patch; +#endif if (check_current_patch_level(&rev, true)) - return; + return (struct container){ NULL, 0 }; + + ret = find_proper_container(ucode, size, &eq_id); + if (!eq_id) + return (struct container){ NULL, 0 }; + + this_equiv_id = eq_id; + header = (u32 *)ret.data; + + /* We're pointing to an equiv table, skip over it. */ + data = ret.data + header[2] + CONTAINER_HDR_SZ; + left = ret.size - (header[2] + CONTAINER_HDR_SZ); while (left > 0) { struct microcode_amd *mc; @@ -252,8 +265,7 @@ static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch) *new_rev = rev; if (save_patch) - memcpy(patch, mc, - min_t(u32, header[1], PATCH_MAX_SIZE)); + memcpy(patch, mc, min_t(u32, header[1], PATCH_MAX_SIZE)); } } @@ -261,10 +273,10 @@ static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch) data += offset; left -= offset; } + return ret; } -static bool __init load_builtin_amd_microcode(struct cpio_data *cp, - unsigned int family) +static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) { #ifdef CONFIG_X86_64 char fw_name[36] = "amd-ucode/microcode_amd.bin"; @@ -281,47 +293,45 @@ static bool __init load_builtin_amd_microcode(struct cpio_data *cp, void __init load_ucode_amd_bsp(unsigned int family) { + struct ucode_cpu_info *uci; struct cpio_data cp; - bool *builtin; - void **data; - size_t *size; + const char *path; + bool use_pa; -#ifdef CONFIG_X86_32 - data = (void **)__pa_nodebug(&ucode_cpio.data); - size = (size_t *)__pa_nodebug(&ucode_cpio.size); - builtin = (bool *)__pa_nodebug(&ucode_builtin); -#else - data = &ucode_cpio.data; - size = &ucode_cpio.size; - builtin = &ucode_builtin; -#endif + if (IS_ENABLED(CONFIG_X86_32)) { + uci = (struct ucode_cpu_info *)__pa_nodebug(ucode_cpu_info); + path = (const char *)__pa_nodebug(ucode_path); + use_pa = true; + } else { + uci = ucode_cpu_info; + path = ucode_path; + use_pa = false; + } - *builtin = load_builtin_amd_microcode(&cp, family); - if (!*builtin) - cp = find_ucode_in_initrd(); + if (!get_builtin_microcode(&cp, family)) + cp = find_microcode_in_initrd(path, use_pa); if (!(cp.data && cp.size)) return; - *data = cp.data; - *size = cp.size; + /* Get BSP's CPUID.EAX(1), needed in load_microcode_amd() */ + uci->cpu_sig.sig = cpuid_eax(1); - apply_ucode_in_initrd(cp.data, cp.size, true); + apply_microcode_early_amd(cp.data, cp.size, true); } #ifdef CONFIG_X86_32 /* * On 32-bit, since AP's early load occurs before paging is turned on, we - * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during - * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During - * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch, + * cannot traverse cpu_equiv_table and microcode_cache in kernel heap memory. + * So during cold boot, AP will apply_ucode_in_initrd() just like the BSP. + * In save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch, * which is used upon resume from suspend. */ -void load_ucode_amd_ap(void) +void load_ucode_amd_ap(unsigned int family) { struct microcode_amd *mc; - size_t *usize; - void **ucode; + struct cpio_data cp; mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { @@ -329,60 +339,63 @@ void load_ucode_amd_ap(void) return; } - ucode = (void *)__pa_nodebug(&container); - usize = (size_t *)__pa_nodebug(&container_size); + if (!get_builtin_microcode(&cp, family)) + cp = find_microcode_in_initrd((const char *)__pa_nodebug(ucode_path), true); - if (!*ucode || !*usize) + if (!(cp.data && cp.size)) return; - apply_ucode_in_initrd(*ucode, *usize, false); -} - -static void __init collect_cpu_sig_on_bsp(void *arg) -{ - unsigned int cpu = smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - uci->cpu_sig.sig = cpuid_eax(0x00000001); -} - -static void __init get_bsp_sig(void) -{ - unsigned int bsp = boot_cpu_data.cpu_index; - struct ucode_cpu_info *uci = ucode_cpu_info + bsp; - - if (!uci->cpu_sig.sig) - smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1); + /* + * This would set amd_ucode_patch above so that the following APs can + * use it directly instead of going down this path again. + */ + apply_microcode_early_amd(cp.data, cp.size, true); } #else -void load_ucode_amd_ap(void) +void load_ucode_amd_ap(unsigned int family) { - unsigned int cpu = smp_processor_id(); struct equiv_cpu_entry *eq; struct microcode_amd *mc; - u8 *cont = container; u32 rev, eax; u16 eq_id; - /* Exit if called on the BSP. */ - if (!cpu) + /* 64-bit runs with paging enabled, thus early==false. */ + if (check_current_patch_level(&rev, false)) return; - if (!container) - return; + /* First AP hasn't cached it yet, go through the blob. */ + if (!cont.data) { + struct cpio_data cp = { NULL, 0, "" }; - /* - * 64-bit runs with paging enabled, thus early==false. - */ - if (check_current_patch_level(&rev, false)) - return; + if (cont.size == -1) + return; - /* Add CONFIG_RANDOMIZE_MEMORY offset. */ - if (!ucode_builtin) - cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; +reget: + if (!get_builtin_microcode(&cp, family)) { +#ifdef CONFIG_BLK_DEV_INITRD + cp = find_cpio_data(ucode_path, (void *)initrd_start, + initrd_end - initrd_start, NULL); +#endif + if (!(cp.data && cp.size)) { + /* + * Mark it so that other APs do not scan again + * for no real reason and slow down boot + * needlessly. + */ + cont.size = -1; + return; + } + } + + cont = apply_microcode_early_amd(cp.data, cp.size, false); + if (!(cont.data && cont.size)) { + cont.size = -1; + return; + } + } eax = cpuid_eax(0x00000001); - eq = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ); + eq = (struct equiv_cpu_entry *)(cont.data + CONTAINER_HDR_SZ); eq_id = find_equiv_id(eq, eax); if (!eq_id) @@ -397,61 +410,50 @@ void load_ucode_amd_ap(void) } } else { - if (!ucode_cpio.data) - return; /* * AP has a different equivalence ID than BSP, looks like * mixed-steppings silicon so go through the ucode blob anew. */ - apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false); + goto reget; } } -#endif +#endif /* CONFIG_X86_32 */ + +static enum ucode_state +load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); -int __init save_microcode_in_initrd_amd(void) +int __init save_microcode_in_initrd_amd(unsigned int fam) { - unsigned long cont; - int retval = 0; enum ucode_state ret; - u8 *cont_va; - u32 eax; + int retval = 0; + u16 eq_id; - if (!container) - return -EINVAL; + if (!cont.data) { + if (IS_ENABLED(CONFIG_X86_32) && (cont.size != -1)) { + struct cpio_data cp = { NULL, 0, "" }; -#ifdef CONFIG_X86_32 - get_bsp_sig(); - cont = (unsigned long)container; - cont_va = __va(container); -#else - /* - * We need the physical address of the container for both bitness since - * boot_params.hdr.ramdisk_image is a physical address. - */ - cont = __pa_nodebug(container); - cont_va = container; +#ifdef CONFIG_BLK_DEV_INITRD + cp = find_cpio_data(ucode_path, (void *)initrd_start, + initrd_end - initrd_start, NULL); #endif - /* - * Take into account the fact that the ramdisk might get relocated and - * therefore we need to recompute the container's position in virtual - * memory space. - */ - if (relocated_ramdisk) - container = (u8 *)(__va(relocated_ramdisk) + - (cont - boot_params.hdr.ramdisk_image)); - else - container = cont_va; + if (!(cp.data && cp.size)) { + cont.size = -1; + return -EINVAL; + } - /* Add CONFIG_RANDOMIZE_MEMORY offset. */ - if (!ucode_builtin) - container += PAGE_OFFSET - __PAGE_OFFSET_BASE; + cont = find_proper_container(cp.data, cp.size, &eq_id); + if (!eq_id) { + cont.size = -1; + return -EINVAL; + } - eax = cpuid_eax(0x00000001); - eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); + } else + return -EINVAL; + } - ret = load_microcode_amd(smp_processor_id(), eax, container, container_size); + ret = load_microcode_amd(smp_processor_id(), fam, cont.data, cont.size); if (ret != UCODE_OK) retval = -EINVAL; @@ -459,8 +461,8 @@ int __init save_microcode_in_initrd_amd(void) * This will be freed any msec now, stash patches for the current * family and switch to patch cache for cpu hotplug, etc later. */ - container = NULL; - container_size = 0; + cont.data = NULL; + cont.size = 0; return retval; } @@ -478,8 +480,10 @@ void reload_ucode_amd(void) return; mc = (struct microcode_amd *)amd_ucode_patch; + if (!mc) + return; - if (mc && rev < mc->hdr.patch_id) { + if (rev < mc->hdr.patch_id) { if (!__apply_microcode_amd(mc)) { ucode_new_rev = mc->hdr.patch_id; pr_info("reload patch_level=0x%08x\n", ucode_new_rev); @@ -513,7 +517,7 @@ static struct ucode_patch *cache_find_patch(u16 equiv_cpu) { struct ucode_patch *p; - list_for_each_entry(p, &pcache, plist) + list_for_each_entry(p, µcode_cache, plist) if (p->equiv_cpu == equiv_cpu) return p; return NULL; @@ -523,7 +527,7 @@ static void update_cache(struct ucode_patch *new_patch) { struct ucode_patch *p; - list_for_each_entry(p, &pcache, plist) { + list_for_each_entry(p, µcode_cache, plist) { if (p->equiv_cpu == new_patch->equiv_cpu) { if (p->patch_id >= new_patch->patch_id) /* we already have the latest patch */ @@ -536,14 +540,14 @@ static void update_cache(struct ucode_patch *new_patch) } } /* no patch found, add it */ - list_add_tail(&new_patch->plist, &pcache); + list_add_tail(&new_patch->plist, µcode_cache); } static void free_cache(void) { struct ucode_patch *p, *tmp; - list_for_each_entry_safe(p, tmp, &pcache, plist) { + list_for_each_entry_safe(p, tmp, µcode_cache, plist) { __list_del(p->plist.prev, p->plist.next); kfree(p->data); kfree(p); @@ -663,21 +667,7 @@ bool check_current_patch_level(u32 *rev, bool early) return ret; } -int __apply_microcode_amd(struct microcode_amd *mc_amd) -{ - u32 rev, dummy; - - native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); - - /* verify patch application was successful */ - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - if (rev != mc_amd->hdr.patch_id) - return -1; - - return 0; -} - -int apply_microcode_amd(int cpu) +static int apply_microcode_amd(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_amd *mc_amd; @@ -860,7 +850,8 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, return UCODE_OK; } -enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) +static enum ucode_state +load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) { enum ucode_state ret; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 5ce5155f0695..6996413c78c3 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -3,7 +3,7 @@ * * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> * 2006 Shaohua Li <shaohua.li@intel.com> - * 2013-2015 Borislav Petkov <bp@alien8.de> + * 2013-2016 Borislav Petkov <bp@alien8.de> * * X86 CPU microcode early update for Linux: * @@ -39,12 +39,15 @@ #include <asm/microcode.h> #include <asm/processor.h> #include <asm/cmdline.h> +#include <asm/setup.h> -#define MICROCODE_VERSION "2.01" +#define DRIVER_VERSION "2.2" static struct microcode_ops *microcode_ops; static bool dis_ucode_ldr; +LIST_HEAD(microcode_cache); + /* * Synchronization. * @@ -167,7 +170,7 @@ void load_ucode_ap(void) break; case X86_VENDOR_AMD: if (family >= 0x10) - load_ucode_amd_ap(); + load_ucode_amd_ap(family); break; default: break; @@ -185,7 +188,7 @@ static int __init save_microcode_in_initrd(void) break; case X86_VENDOR_AMD: if (c->x86 >= 0x10) - return save_microcode_in_initrd_amd(); + return save_microcode_in_initrd_amd(c->x86); break; default: break; @@ -194,6 +197,58 @@ static int __init save_microcode_in_initrd(void) return -EINVAL; } +struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa) +{ +#ifdef CONFIG_BLK_DEV_INITRD + unsigned long start = 0; + size_t size; + +#ifdef CONFIG_X86_32 + struct boot_params *params; + + if (use_pa) + params = (struct boot_params *)__pa_nodebug(&boot_params); + else + params = &boot_params; + + size = params->hdr.ramdisk_size; + + /* + * Set start only if we have an initrd image. We cannot use initrd_start + * because it is not set that early yet. + */ + if (size) + start = params->hdr.ramdisk_image; + +# else /* CONFIG_X86_64 */ + size = (unsigned long)boot_params.ext_ramdisk_size << 32; + size |= boot_params.hdr.ramdisk_size; + + if (size) { + start = (unsigned long)boot_params.ext_ramdisk_image << 32; + start |= boot_params.hdr.ramdisk_image; + + start += PAGE_OFFSET; + } +# endif + + /* + * Did we relocate the ramdisk? + * + * So we possibly relocate the ramdisk *after* applying microcode on the + * BSP so we rely on use_pa (use physical addresses) - even if it is not + * absolutely correct - to determine whether we've done the ramdisk + * relocation already. + */ + if (!use_pa && relocated_ramdisk) + start = initrd_start; + + return find_cpio_data(path, (void *)start, size, NULL); +#else /* !CONFIG_BLK_DEV_INITRD */ + return (struct cpio_data){ NULL, 0, "" }; +#endif +} + void reload_early_microcode(void) { int vendor, family; @@ -453,16 +508,17 @@ static struct attribute_group mc_attr_group = { static void microcode_fini_cpu(int cpu) { - microcode_ops->microcode_fini_cpu(cpu); + if (microcode_ops->microcode_fini_cpu) + microcode_ops->microcode_fini_cpu(cpu); } static enum ucode_state microcode_resume_cpu(int cpu) { - pr_debug("CPU%d updated upon resume\n", cpu); - if (apply_microcode_on_target(cpu)) return UCODE_ERROR; + pr_debug("CPU%d updated upon resume\n", cpu); + return UCODE_OK; } @@ -496,6 +552,9 @@ static enum ucode_state microcode_update_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + /* Refresh CPU microcode revision after resume. */ + collect_cpu_info(cpu); + if (uci->valid) return microcode_resume_cpu(cpu); @@ -579,12 +638,7 @@ static int mc_cpu_down_prep(unsigned int cpu) /* Suspend is in progress, only remove the interface */ sysfs_remove_group(&dev->kobj, &mc_attr_group); pr_debug("CPU%d removed\n", cpu); - /* - * When a CPU goes offline, don't free up or invalidate the copy of - * the microcode in kernel memory, so that we can reuse it when the - * CPU comes back online without unnecessarily requesting the userspace - * for it again. - */ + return 0; } @@ -649,8 +703,7 @@ int __init microcode_init(void) cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); - pr_info("Microcode Update Driver: v" MICROCODE_VERSION - " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n"); + pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); return 0; diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index cdc0deab00c9..54d50c3694d8 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -39,125 +39,83 @@ #include <asm/setup.h> #include <asm/msr.h> -/* - * Temporary microcode blobs pointers storage. We note here during early load - * the pointers to microcode blobs we've got from whatever storage (detached - * initrd, builtin). Later on, we put those into final storage - * mc_saved_data.mc_saved. - * - * Important: those are offsets from the beginning of initrd or absolute - * addresses within the kernel image when built-in. - */ -static unsigned long mc_tmp_ptrs[MAX_UCODE_COUNT]; - -static struct mc_saved_data { - unsigned int num_saved; - struct microcode_intel **mc_saved; -} mc_saved_data; +static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; -/* Microcode blobs within the initrd. 0 if builtin. */ -static struct ucode_blobs { - unsigned long start; - bool valid; -} blobs; +/* Current microcode patch used in early patching */ +struct microcode_intel *intel_ucode_patch; -/* Go through saved patches and find the one suitable for the current CPU. */ -static enum ucode_state -find_microcode_patch(struct microcode_intel **saved, - unsigned int num_saved, struct ucode_cpu_info *uci) +static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1, + unsigned int s2, unsigned int p2) { - struct microcode_intel *ucode_ptr, *new_mc = NULL; - struct microcode_header_intel *mc_hdr; - int new_rev, ret, i; - - new_rev = uci->cpu_sig.rev; - - for (i = 0; i < num_saved; i++) { - ucode_ptr = saved[i]; - mc_hdr = (struct microcode_header_intel *)ucode_ptr; - - ret = has_newer_microcode(ucode_ptr, - uci->cpu_sig.sig, - uci->cpu_sig.pf, - new_rev); - if (!ret) - continue; - - new_rev = mc_hdr->rev; - new_mc = ucode_ptr; - } + if (s1 != s2) + return false; - if (!new_mc) - return UCODE_NFOUND; + /* Processor flags are either both 0 ... */ + if (!p1 && !p2) + return true; - uci->mc = (struct microcode_intel *)new_mc; - return UCODE_OK; + /* ... or they intersect. */ + return p1 & p2; } -static inline void -copy_ptrs(struct microcode_intel **mc_saved, unsigned long *mc_ptrs, - unsigned long off, int num_saved) +/* + * Returns 1 if update has been found, 0 otherwise. + */ +static int find_matching_signature(void *mc, unsigned int csig, int cpf) { + struct microcode_header_intel *mc_hdr = mc; + struct extended_sigtable *ext_hdr; + struct extended_signature *ext_sig; int i; - for (i = 0; i < num_saved; i++) - mc_saved[i] = (struct microcode_intel *)(mc_ptrs[i] + off); -} - -#ifdef CONFIG_X86_32 -static void -microcode_phys(struct microcode_intel **mc_saved_tmp, struct mc_saved_data *mcs) -{ - int i; - struct microcode_intel ***mc_saved; + if (cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf)) + return 1; - mc_saved = (struct microcode_intel ***)__pa_nodebug(&mcs->mc_saved); + /* Look for ext. headers: */ + if (get_totalsize(mc_hdr) <= get_datasize(mc_hdr) + MC_HEADER_SIZE) + return 0; - for (i = 0; i < mcs->num_saved; i++) { - struct microcode_intel *p; + ext_hdr = mc + get_datasize(mc_hdr) + MC_HEADER_SIZE; + ext_sig = (void *)ext_hdr + EXT_HEADER_SIZE; - p = *(struct microcode_intel **)__pa_nodebug(mcs->mc_saved + i); - mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p); + for (i = 0; i < ext_hdr->count; i++) { + if (cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf)) + return 1; + ext_sig++; } + return 0; } -#endif -static enum ucode_state -load_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs, - unsigned long offset, struct ucode_cpu_info *uci) +/* + * Returns 1 if update has been found, 0 otherwise. + */ +static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev) { - struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; - unsigned int count = mcs->num_saved; + struct microcode_header_intel *mc_hdr = mc; - if (!mcs->mc_saved) { - copy_ptrs(mc_saved_tmp, mc_ptrs, offset, count); + if (mc_hdr->rev <= new_rev) + return 0; - return find_microcode_patch(mc_saved_tmp, count, uci); - } else { -#ifdef CONFIG_X86_32 - microcode_phys(mc_saved_tmp, mcs); - return find_microcode_patch(mc_saved_tmp, count, uci); -#else - return find_microcode_patch(mcs->mc_saved, count, uci); -#endif - } + return find_matching_signature(mc, csig, cpf); } /* * Given CPU signature and a microcode patch, this function finds if the * microcode patch has matching family and model with the CPU. + * + * %true - if there's a match + * %false - otherwise */ -static enum ucode_state -matching_model_microcode(struct microcode_header_intel *mc_header, - unsigned long sig) +static bool microcode_matches(struct microcode_header_intel *mc_header, + unsigned long sig) { - unsigned int fam, model; - unsigned int fam_ucode, model_ucode; - struct extended_sigtable *ext_header; unsigned long total_size = get_totalsize(mc_header); unsigned long data_size = get_datasize(mc_header); - int ext_sigcount, i; + struct extended_sigtable *ext_header; + unsigned int fam_ucode, model_ucode; struct extended_signature *ext_sig; + unsigned int fam, model; + int ext_sigcount, i; fam = x86_family(sig); model = x86_model(sig); @@ -166,11 +124,11 @@ matching_model_microcode(struct microcode_header_intel *mc_header, model_ucode = x86_model(mc_header->sig); if (fam == fam_ucode && model == model_ucode) - return UCODE_OK; + return true; /* Look for ext. headers: */ if (total_size <= data_size + MC_HEADER_SIZE) - return UCODE_NFOUND; + return false; ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; ext_sig = (void *)ext_header + EXT_HEADER_SIZE; @@ -181,192 +139,242 @@ matching_model_microcode(struct microcode_header_intel *mc_header, model_ucode = x86_model(ext_sig->sig); if (fam == fam_ucode && model == model_ucode) - return UCODE_OK; + return true; ext_sig++; } - return UCODE_NFOUND; + return false; } -static int -save_microcode(struct mc_saved_data *mcs, - struct microcode_intel **mc_saved_src, - unsigned int num_saved) +static struct ucode_patch *__alloc_microcode_buf(void *data, unsigned int size) { - int i, j; - struct microcode_intel **saved_ptr; - int ret; + struct ucode_patch *p; - if (!num_saved) - return -EINVAL; + p = kzalloc(size, GFP_KERNEL); + if (!p) + return ERR_PTR(-ENOMEM); - /* - * Copy new microcode data. - */ - saved_ptr = kcalloc(num_saved, sizeof(struct microcode_intel *), GFP_KERNEL); - if (!saved_ptr) - return -ENOMEM; - - for (i = 0; i < num_saved; i++) { - struct microcode_header_intel *mc_hdr; - struct microcode_intel *mc; - unsigned long size; - - if (!mc_saved_src[i]) { - ret = -EINVAL; - goto err; - } + p->data = kmemdup(data, size, GFP_KERNEL); + if (!p->data) { + kfree(p); + return ERR_PTR(-ENOMEM); + } - mc = mc_saved_src[i]; - mc_hdr = &mc->hdr; - size = get_totalsize(mc_hdr); + return p; +} - saved_ptr[i] = kmemdup(mc, size, GFP_KERNEL); - if (!saved_ptr[i]) { - ret = -ENOMEM; - goto err; +static void save_microcode_patch(void *data, unsigned int size) +{ + struct microcode_header_intel *mc_hdr, *mc_saved_hdr; + struct ucode_patch *iter, *tmp, *p; + bool prev_found = false; + unsigned int sig, pf; + + mc_hdr = (struct microcode_header_intel *)data; + + list_for_each_entry_safe(iter, tmp, µcode_cache, plist) { + mc_saved_hdr = (struct microcode_header_intel *)iter->data; + sig = mc_saved_hdr->sig; + pf = mc_saved_hdr->pf; + + if (find_matching_signature(data, sig, pf)) { + prev_found = true; + + if (mc_hdr->rev <= mc_saved_hdr->rev) + continue; + + p = __alloc_microcode_buf(data, size); + if (IS_ERR(p)) + pr_err("Error allocating buffer %p\n", data); + else + list_replace(&iter->plist, &p->plist); } } /* - * Point to newly saved microcode. + * There weren't any previous patches found in the list cache; save the + * newly found. */ - mcs->mc_saved = saved_ptr; - mcs->num_saved = num_saved; - - return 0; - -err: - for (j = 0; j <= i; j++) - kfree(saved_ptr[j]); - kfree(saved_ptr); - - return ret; + if (!prev_found) { + p = __alloc_microcode_buf(data, size); + if (IS_ERR(p)) + pr_err("Error allocating buffer for %p\n", data); + else + list_add_tail(&p->plist, µcode_cache); + } } -/* - * A microcode patch in ucode_ptr is saved into mc_saved - * - if it has matching signature and newer revision compared to an existing - * patch mc_saved. - * - or if it is a newly discovered microcode patch. - * - * The microcode patch should have matching model with CPU. - * - * Returns: The updated number @num_saved of saved microcode patches. - */ -static unsigned int _save_mc(struct microcode_intel **mc_saved, - u8 *ucode_ptr, unsigned int num_saved) +static int microcode_sanity_check(void *mc, int print_err) { - struct microcode_header_intel *mc_hdr, *mc_saved_hdr; - unsigned int sig, pf; - int found = 0, i; + unsigned long total_size, data_size, ext_table_size; + struct microcode_header_intel *mc_header = mc; + struct extended_sigtable *ext_header = NULL; + u32 sum, orig_sum, ext_sigcount = 0, i; + struct extended_signature *ext_sig; - mc_hdr = (struct microcode_header_intel *)ucode_ptr; + total_size = get_totalsize(mc_header); + data_size = get_datasize(mc_header); - for (i = 0; i < num_saved; i++) { - mc_saved_hdr = (struct microcode_header_intel *)mc_saved[i]; - sig = mc_saved_hdr->sig; - pf = mc_saved_hdr->pf; + if (data_size + MC_HEADER_SIZE > total_size) { + if (print_err) + pr_err("Error: bad microcode data file size.\n"); + return -EINVAL; + } - if (!find_matching_signature(ucode_ptr, sig, pf)) - continue; + if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { + if (print_err) + pr_err("Error: invalid/unknown microcode update format.\n"); + return -EINVAL; + } - found = 1; + ext_table_size = total_size - (MC_HEADER_SIZE + data_size); + if (ext_table_size) { + u32 ext_table_sum = 0; + u32 *ext_tablep; - if (mc_hdr->rev <= mc_saved_hdr->rev) - continue; + if ((ext_table_size < EXT_HEADER_SIZE) + || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { + if (print_err) + pr_err("Error: truncated extended signature table.\n"); + return -EINVAL; + } + + ext_header = mc + MC_HEADER_SIZE + data_size; + if (ext_table_size != exttable_size(ext_header)) { + if (print_err) + pr_err("Error: extended signature table size mismatch.\n"); + return -EFAULT; + } + + ext_sigcount = ext_header->count; /* - * Found an older ucode saved earlier. Replace it with - * this newer one. + * Check extended table checksum: the sum of all dwords that + * comprise a valid table must be 0. */ - mc_saved[i] = (struct microcode_intel *)ucode_ptr; - break; + ext_tablep = (u32 *)ext_header; + + i = ext_table_size / sizeof(u32); + while (i--) + ext_table_sum += ext_tablep[i]; + + if (ext_table_sum) { + if (print_err) + pr_warn("Bad extended signature table checksum, aborting.\n"); + return -EINVAL; + } + } + + /* + * Calculate the checksum of update data and header. The checksum of + * valid update data and header including the extended signature table + * must be 0. + */ + orig_sum = 0; + i = (MC_HEADER_SIZE + data_size) / sizeof(u32); + while (i--) + orig_sum += ((u32 *)mc)[i]; + + if (orig_sum) { + if (print_err) + pr_err("Bad microcode data checksum, aborting.\n"); + return -EINVAL; } - /* Newly detected microcode, save it to memory. */ - if (i >= num_saved && !found) - mc_saved[num_saved++] = (struct microcode_intel *)ucode_ptr; + if (!ext_table_size) + return 0; - return num_saved; + /* + * Check extended signature checksum: 0 => valid. + */ + for (i = 0; i < ext_sigcount; i++) { + ext_sig = (void *)ext_header + EXT_HEADER_SIZE + + EXT_SIGNATURE_SIZE * i; + + sum = (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); + if (sum) { + if (print_err) + pr_err("Bad extended signature checksum, aborting.\n"); + return -EINVAL; + } + } + return 0; } /* * Get microcode matching with BSP's model. Only CPUs with the same model as * BSP can stay in the platform. */ -static enum ucode_state __init -get_matching_model_microcode(unsigned long start, void *data, size_t size, - struct mc_saved_data *mcs, unsigned long *mc_ptrs, - struct ucode_cpu_info *uci) +static struct microcode_intel * +scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save) { - struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; struct microcode_header_intel *mc_header; - unsigned int num_saved = mcs->num_saved; - enum ucode_state state = UCODE_OK; - unsigned int leftover = size; - u8 *ucode_ptr = data; + struct microcode_intel *patch = NULL; unsigned int mc_size; - int i; - - while (leftover && num_saved < ARRAY_SIZE(mc_saved_tmp)) { - if (leftover < sizeof(mc_header)) + while (size) { + if (size < sizeof(struct microcode_header_intel)) break; - mc_header = (struct microcode_header_intel *)ucode_ptr; + mc_header = (struct microcode_header_intel *)data; mc_size = get_totalsize(mc_header); - if (!mc_size || mc_size > leftover || - microcode_sanity_check(ucode_ptr, 0) < 0) + if (!mc_size || + mc_size > size || + microcode_sanity_check(data, 0) < 0) break; - leftover -= mc_size; + size -= mc_size; - /* - * Since APs with same family and model as the BSP may boot in - * the platform, we need to find and save microcode patches - * with the same family and model as the BSP. - */ - if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != UCODE_OK) { - ucode_ptr += mc_size; + if (!microcode_matches(mc_header, uci->cpu_sig.sig)) { + data += mc_size; continue; } - num_saved = _save_mc(mc_saved_tmp, ucode_ptr, num_saved); + if (save) { + save_microcode_patch(data, mc_size); + goto next; + } - ucode_ptr += mc_size; - } - if (leftover) { - state = UCODE_ERROR; - return state; - } + if (!patch) { + if (!has_newer_microcode(data, + uci->cpu_sig.sig, + uci->cpu_sig.pf, + uci->cpu_sig.rev)) + goto next; - if (!num_saved) { - state = UCODE_NFOUND; - return state; - } + } else { + struct microcode_header_intel *phdr = &patch->hdr; - for (i = 0; i < num_saved; i++) - mc_ptrs[i] = (unsigned long)mc_saved_tmp[i] - start; + if (!has_newer_microcode(data, + phdr->sig, + phdr->pf, + phdr->rev)) + goto next; + } + + /* We have a newer patch, save it. */ + patch = data; - mcs->num_saved = num_saved; +next: + data += mc_size; + } - return state; + if (size) + return NULL; + + return patch; } static int collect_cpu_info_early(struct ucode_cpu_info *uci) { unsigned int val[2]; unsigned int family, model; - struct cpu_signature csig; + struct cpu_signature csig = { 0 }; unsigned int eax, ebx, ecx, edx; - csig.sig = 0; - csig.pf = 0; - csig.rev = 0; - memset(uci, 0, sizeof(*uci)); eax = 0x00000001; @@ -374,8 +382,8 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) native_cpuid(&eax, &ebx, &ecx, &edx); csig.sig = eax; - family = x86_family(csig.sig); - model = x86_model(csig.sig); + family = x86_family(eax); + model = x86_model(eax); if ((model >= 5) || (family > 6)) { /* get processor flags from MSR 0x17 */ @@ -401,40 +409,41 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) static void show_saved_mc(void) { #ifdef DEBUG - int i, j; + int i = 0, j; unsigned int sig, pf, rev, total_size, data_size, date; struct ucode_cpu_info uci; + struct ucode_patch *p; - if (!mc_saved_data.num_saved) { + if (list_empty(µcode_cache)) { pr_debug("no microcode data saved.\n"); return; } - pr_debug("Total microcode saved: %d\n", mc_saved_data.num_saved); collect_cpu_info_early(&uci); - sig = uci.cpu_sig.sig; - pf = uci.cpu_sig.pf; - rev = uci.cpu_sig.rev; + sig = uci.cpu_sig.sig; + pf = uci.cpu_sig.pf; + rev = uci.cpu_sig.rev; pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev); - for (i = 0; i < mc_saved_data.num_saved; i++) { + list_for_each_entry(p, µcode_cache, plist) { struct microcode_header_intel *mc_saved_header; struct extended_sigtable *ext_header; - int ext_sigcount; struct extended_signature *ext_sig; + int ext_sigcount; + + mc_saved_header = (struct microcode_header_intel *)p->data; - mc_saved_header = (struct microcode_header_intel *) - mc_saved_data.mc_saved[i]; - sig = mc_saved_header->sig; - pf = mc_saved_header->pf; - rev = mc_saved_header->rev; - total_size = get_totalsize(mc_saved_header); - data_size = get_datasize(mc_saved_header); - date = mc_saved_header->date; + sig = mc_saved_header->sig; + pf = mc_saved_header->pf; + rev = mc_saved_header->rev; + date = mc_saved_header->date; + + total_size = get_totalsize(mc_saved_header); + data_size = get_datasize(mc_saved_header); pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, total size=0x%x, date = %04x-%02x-%02x\n", - i, sig, pf, rev, total_size, + i++, sig, pf, rev, total_size, date & 0xffff, date >> 24, (date >> 16) & 0xff); @@ -443,7 +452,7 @@ static void show_saved_mc(void) if (total_size <= data_size + MC_HEADER_SIZE) continue; - ext_header = (void *) mc_saved_header + data_size + MC_HEADER_SIZE; + ext_header = (void *)mc_saved_header + data_size + MC_HEADER_SIZE; ext_sigcount = ext_header->count; ext_sig = (void *)ext_header + EXT_HEADER_SIZE; @@ -456,85 +465,43 @@ static void show_saved_mc(void) ext_sig++; } - } #endif } /* - * Save this mc into mc_saved_data. So it will be loaded early when a CPU is - * hot added or resumes. - * - * Please make sure this mc should be a valid microcode patch before calling - * this function. + * Save this microcode patch. It will be loaded early when a CPU is + * hot-added or resumes. */ -static void save_mc_for_early(u8 *mc) +static void save_mc_for_early(u8 *mc, unsigned int size) { #ifdef CONFIG_HOTPLUG_CPU /* Synchronization during CPU hotplug. */ static DEFINE_MUTEX(x86_cpu_microcode_mutex); - struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; - unsigned int mc_saved_count_init; - unsigned int num_saved; - struct microcode_intel **mc_saved; - int ret, i; - mutex_lock(&x86_cpu_microcode_mutex); - mc_saved_count_init = mc_saved_data.num_saved; - num_saved = mc_saved_data.num_saved; - mc_saved = mc_saved_data.mc_saved; - - if (mc_saved && num_saved) - memcpy(mc_saved_tmp, mc_saved, - num_saved * sizeof(struct microcode_intel *)); - /* - * Save the microcode patch mc in mc_save_tmp structure if it's a newer - * version. - */ - num_saved = _save_mc(mc_saved_tmp, mc, num_saved); - - /* - * Save the mc_save_tmp in global mc_saved_data. - */ - ret = save_microcode(&mc_saved_data, mc_saved_tmp, num_saved); - if (ret) { - pr_err("Cannot save microcode patch.\n"); - goto out; - } - + save_microcode_patch(mc, size); show_saved_mc(); - /* - * Free old saved microcode data. - */ - if (mc_saved) { - for (i = 0; i < mc_saved_count_init; i++) - kfree(mc_saved[i]); - kfree(mc_saved); - } - -out: mutex_unlock(&x86_cpu_microcode_mutex); #endif } -static bool __init load_builtin_intel_microcode(struct cpio_data *cp) +static bool load_builtin_intel_microcode(struct cpio_data *cp) { -#ifdef CONFIG_X86_64 - unsigned int eax = 0x00000001, ebx, ecx = 0, edx; + unsigned int eax = 1, ebx, ecx = 0, edx; char name[30]; + if (IS_ENABLED(CONFIG_X86_32)) + return false; + native_cpuid(&eax, &ebx, &ecx, &edx); sprintf(name, "intel-ucode/%02x-%02x-%02x", x86_family(eax), x86_model(eax), x86_stepping(eax)); return get_builtin_firmware(cp, name); -#else - return false; -#endif } /* @@ -570,8 +537,7 @@ void show_ucode_info_early(void) } /* - * At this point, we can not call printk() yet. Keep microcode patch number in - * mc_saved_data.mc_saved and delay printing microcode info in + * At this point, we can not call printk() yet. Delay printing microcode info in * show_ucode_info_early() until printk() works. */ static void print_ucode(struct ucode_cpu_info *uci) @@ -648,206 +614,140 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) return 0; } -/* - * This function converts microcode patch offsets previously stored in - * mc_tmp_ptrs to pointers and stores the pointers in mc_saved_data. - */ int __init save_microcode_in_initrd_intel(void) { - struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; - unsigned int count = mc_saved_data.num_saved; - unsigned long offset = 0; - int ret; - - if (!count) - return 0; + struct ucode_cpu_info uci; + struct cpio_data cp; /* - * We have found a valid initrd but it might've been relocated in the - * meantime so get its updated address. + * AP loading didn't find any microcode patch, no need to save anything. */ - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && blobs.valid) - offset = initrd_start; - - copy_ptrs(mc_saved, mc_tmp_ptrs, offset, count); + if (!intel_ucode_patch || IS_ERR(intel_ucode_patch)) + return 0; - ret = save_microcode(&mc_saved_data, mc_saved, count); - if (ret) - pr_err("Cannot save microcode patches from initrd.\n"); - else - show_saved_mc(); + if (!load_builtin_intel_microcode(&cp)) + cp = find_microcode_in_initrd(ucode_path, false); - return ret; -} + if (!(cp.data && cp.size)) + return 0; -static __init enum ucode_state -__scan_microcode_initrd(struct cpio_data *cd, struct ucode_blobs *blbp) -{ -#ifdef CONFIG_BLK_DEV_INITRD - static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; - char *p = IS_ENABLED(CONFIG_X86_32) ? (char *)__pa_nodebug(ucode_name) - : ucode_name; -# ifdef CONFIG_X86_32 - unsigned long start = 0, size; - struct boot_params *params; + collect_cpu_info_early(&uci); - params = (struct boot_params *)__pa_nodebug(&boot_params); - size = params->hdr.ramdisk_size; + scan_microcode(cp.data, cp.size, &uci, true); - /* - * Set start only if we have an initrd image. We cannot use initrd_start - * because it is not set that early yet. - */ - start = (size ? params->hdr.ramdisk_image : 0); + show_saved_mc(); -# else /* CONFIG_X86_64 */ - unsigned long start = 0, size; + return 0; +} - size = (u64)boot_params.ext_ramdisk_size << 32; - size |= boot_params.hdr.ramdisk_size; - if (size) { - start = (u64)boot_params.ext_ramdisk_image << 32; - start |= boot_params.hdr.ramdisk_image; +/* + * @res_patch, output: a pointer to the patch we found. + */ +static struct microcode_intel *__load_ucode_intel(struct ucode_cpu_info *uci) +{ + static const char *path; + struct cpio_data cp; + bool use_pa; - start += PAGE_OFFSET; + if (IS_ENABLED(CONFIG_X86_32)) { + path = (const char *)__pa_nodebug(ucode_path); + use_pa = true; + } else { + path = ucode_path; + use_pa = false; } -# endif - - *cd = find_cpio_data(p, (void *)start, size, NULL); - if (cd->data) { - blbp->start = start; - blbp->valid = true; - return UCODE_OK; - } else -#endif /* CONFIG_BLK_DEV_INITRD */ - return UCODE_ERROR; -} + /* try built-in microcode first */ + if (!load_builtin_intel_microcode(&cp)) + cp = find_microcode_in_initrd(path, use_pa); -static __init enum ucode_state -scan_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs, - struct ucode_cpu_info *uci, struct ucode_blobs *blbp) -{ - struct cpio_data cd = { NULL, 0, "" }; - enum ucode_state ret; + if (!(cp.data && cp.size)) + return NULL; - /* try built-in microcode first */ - if (load_builtin_intel_microcode(&cd)) - /* - * Invalidate blobs as we might've gotten an initrd too, - * supplied by the boot loader, by mistake or simply forgotten - * there. That's fine, we ignore it since we've found builtin - * microcode already. - */ - blbp->valid = false; - else { - ret = __scan_microcode_initrd(&cd, blbp); - if (ret != UCODE_OK) - return ret; - } + collect_cpu_info_early(uci); - return get_matching_model_microcode(blbp->start, cd.data, cd.size, - mcs, mc_ptrs, uci); + return scan_microcode(cp.data, cp.size, uci, false); } -static void __init -_load_ucode_intel_bsp(struct mc_saved_data *mcs, unsigned long *mc_ptrs, - struct ucode_blobs *blbp) +void __init load_ucode_intel_bsp(void) { + struct microcode_intel *patch; struct ucode_cpu_info uci; - enum ucode_state ret; - - collect_cpu_info_early(&uci); - ret = scan_microcode(mcs, mc_ptrs, &uci, blbp); - if (ret != UCODE_OK) + patch = __load_ucode_intel(&uci); + if (!patch) return; - ret = load_microcode(mcs, mc_ptrs, blbp->start, &uci); - if (ret != UCODE_OK) - return; + uci.mc = patch; apply_microcode_early(&uci, true); } -void __init load_ucode_intel_bsp(void) +void load_ucode_intel_ap(void) { - struct ucode_blobs *blobs_p; - struct mc_saved_data *mcs; - unsigned long *ptrs; + struct microcode_intel *patch, **iup; + struct ucode_cpu_info uci; -#ifdef CONFIG_X86_32 - mcs = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); - ptrs = (unsigned long *)__pa_nodebug(&mc_tmp_ptrs); - blobs_p = (struct ucode_blobs *)__pa_nodebug(&blobs); -#else - mcs = &mc_saved_data; - ptrs = mc_tmp_ptrs; - blobs_p = &blobs; -#endif + if (IS_ENABLED(CONFIG_X86_32)) + iup = (struct microcode_intel **) __pa_nodebug(&intel_ucode_patch); + else + iup = &intel_ucode_patch; + +reget: + if (!*iup) { + patch = __load_ucode_intel(&uci); + if (!patch) + return; + + *iup = patch; + } - _load_ucode_intel_bsp(mcs, ptrs, blobs_p); + uci.mc = *iup; + + if (apply_microcode_early(&uci, true)) { + /* Mixed-silicon system? Try to refetch the proper patch: */ + *iup = NULL; + + goto reget; + } } -void load_ucode_intel_ap(void) +static struct microcode_intel *find_patch(struct ucode_cpu_info *uci) { - struct ucode_blobs *blobs_p; - unsigned long *ptrs, start = 0; - struct mc_saved_data *mcs; - struct ucode_cpu_info uci; - enum ucode_state ret; + struct microcode_header_intel *phdr; + struct ucode_patch *iter, *tmp; -#ifdef CONFIG_X86_32 - mcs = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); - ptrs = (unsigned long *)__pa_nodebug(mc_tmp_ptrs); - blobs_p = (struct ucode_blobs *)__pa_nodebug(&blobs); -#else - mcs = &mc_saved_data; - ptrs = mc_tmp_ptrs; - blobs_p = &blobs; -#endif - - /* - * If there is no valid ucode previously saved in memory, no need to - * update ucode on this AP. - */ - if (!mcs->num_saved) - return; + list_for_each_entry_safe(iter, tmp, µcode_cache, plist) { - if (blobs_p->valid) { - start = blobs_p->start; + phdr = (struct microcode_header_intel *)iter->data; - /* - * Pay attention to CONFIG_RANDOMIZE_MEMORY=y as it shuffles - * physmem mapping too and there we have the initrd. - */ - start += PAGE_OFFSET - __PAGE_OFFSET_BASE; - } + if (phdr->rev <= uci->cpu_sig.rev) + continue; - collect_cpu_info_early(&uci); - ret = load_microcode(mcs, ptrs, start, &uci); - if (ret != UCODE_OK) - return; + if (!find_matching_signature(phdr, + uci->cpu_sig.sig, + uci->cpu_sig.pf)) + continue; - apply_microcode_early(&uci, true); + return iter->data; + } + return NULL; } void reload_ucode_intel(void) { + struct microcode_intel *p; struct ucode_cpu_info uci; - enum ucode_state ret; - - if (!mc_saved_data.num_saved) - return; collect_cpu_info_early(&uci); - ret = find_microcode_patch(mc_saved_data.mc_saved, - mc_saved_data.num_saved, &uci); - if (ret != UCODE_OK) + p = find_patch(&uci); + if (!p) return; + uci.mc = p; + apply_microcode_early(&uci, false); } @@ -879,24 +779,6 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) return 0; } -/* - * return 0 - no update found - * return 1 - found update - */ -static int get_matching_mc(struct microcode_intel *mc, int cpu) -{ - struct cpu_signature cpu_sig; - unsigned int csig, cpf, crev; - - collect_cpu_info(cpu, &cpu_sig); - - csig = cpu_sig.sig; - cpf = cpu_sig.pf; - crev = cpu_sig.rev; - - return has_newer_microcode(mc, csig, cpf, crev); -} - static int apply_microcode_intel(int cpu) { struct microcode_intel *mc; @@ -911,16 +793,12 @@ static int apply_microcode_intel(int cpu) uci = ucode_cpu_info + cpu; mc = uci->mc; - if (!mc) - return 0; - - /* - * Microcode on this CPU could be updated earlier. Only apply the - * microcode patch in mc when it is newer than the one on this - * CPU. - */ - if (!get_matching_mc(mc, cpu)) - return 0; + if (!mc) { + /* Look for a newer patch in our cache: */ + mc = find_patch(uci); + if (!mc) + return 0; + } /* write microcode via MSR 0x79 */ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -962,7 +840,6 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL; int new_rev = uci->cpu_sig.rev; unsigned int leftover = size; - enum ucode_state state = UCODE_OK; unsigned int curr_mc_size = 0; unsigned int csig, cpf; @@ -1015,14 +892,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, if (leftover) { vfree(new_mc); - state = UCODE_ERROR; - goto out; + return UCODE_ERROR; } - if (!new_mc) { - state = UCODE_NFOUND; - goto out; - } + if (!new_mc) + return UCODE_NFOUND; vfree(uci->mc); uci->mc = (struct microcode_intel *)new_mc; @@ -1032,12 +906,12 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, * permanent memory. So it will be loaded early when a CPU is hot added * or resumes. */ - save_mc_for_early(new_mc); + save_mc_for_early(new_mc, curr_mc_size); pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); -out: - return state; + + return UCODE_OK; } static int get_ucode_fw(void *to, const void *from, size_t n) @@ -1081,20 +955,11 @@ request_microcode_user(int cpu, const void __user *buf, size_t size) return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); } -static void microcode_fini_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - vfree(uci->mc); - uci->mc = NULL; -} - static struct microcode_ops microcode_intel_ops = { .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info, .apply_microcode = apply_microcode_intel, - .microcode_fini_cpu = microcode_fini_cpu, }; struct microcode_ops * __init init_intel_microcode(void) @@ -1109,4 +974,3 @@ struct microcode_ops * __init init_intel_microcode(void) return µcode_intel_ops; } - diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c deleted file mode 100644 index 406cb6c0d9dd..000000000000 --- a/arch/x86/kernel/cpu/microcode/intel_lib.c +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> - * H Peter Anvin" <hpa@zytor.com> - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture - * Software Developer's Manual - * Order Number 253668 or free download from: - * - * http://developer.intel.com/Assets/PDF/manual/253668.pdf - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ -#include <linux/firmware.h> -#include <linux/uaccess.h> -#include <linux/kernel.h> - -#include <asm/microcode_intel.h> -#include <asm/processor.h> -#include <asm/msr.h> - -static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1, - unsigned int s2, unsigned int p2) -{ - if (s1 != s2) - return false; - - /* Processor flags are either both 0 ... */ - if (!p1 && !p2) - return true; - - /* ... or they intersect. */ - return p1 & p2; -} - -int microcode_sanity_check(void *mc, int print_err) -{ - unsigned long total_size, data_size, ext_table_size; - struct microcode_header_intel *mc_header = mc; - struct extended_sigtable *ext_header = NULL; - u32 sum, orig_sum, ext_sigcount = 0, i; - struct extended_signature *ext_sig; - - total_size = get_totalsize(mc_header); - data_size = get_datasize(mc_header); - - if (data_size + MC_HEADER_SIZE > total_size) { - if (print_err) - pr_err("Error: bad microcode data file size.\n"); - return -EINVAL; - } - - if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - if (print_err) - pr_err("Error: invalid/unknown microcode update format.\n"); - return -EINVAL; - } - - ext_table_size = total_size - (MC_HEADER_SIZE + data_size); - if (ext_table_size) { - u32 ext_table_sum = 0; - u32 *ext_tablep; - - if ((ext_table_size < EXT_HEADER_SIZE) - || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - if (print_err) - pr_err("Error: truncated extended signature table.\n"); - return -EINVAL; - } - - ext_header = mc + MC_HEADER_SIZE + data_size; - if (ext_table_size != exttable_size(ext_header)) { - if (print_err) - pr_err("Error: extended signature table size mismatch.\n"); - return -EFAULT; - } - - ext_sigcount = ext_header->count; - - /* - * Check extended table checksum: the sum of all dwords that - * comprise a valid table must be 0. - */ - ext_tablep = (u32 *)ext_header; - - i = ext_table_size / sizeof(u32); - while (i--) - ext_table_sum += ext_tablep[i]; - - if (ext_table_sum) { - if (print_err) - pr_warn("Bad extended signature table checksum, aborting.\n"); - return -EINVAL; - } - } - - /* - * Calculate the checksum of update data and header. The checksum of - * valid update data and header including the extended signature table - * must be 0. - */ - orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / sizeof(u32); - while (i--) - orig_sum += ((u32 *)mc)[i]; - - if (orig_sum) { - if (print_err) - pr_err("Bad microcode data checksum, aborting.\n"); - return -EINVAL; - } - - if (!ext_table_size) - return 0; - - /* - * Check extended signature checksum: 0 => valid. - */ - for (i = 0; i < ext_sigcount; i++) { - ext_sig = (void *)ext_header + EXT_HEADER_SIZE + - EXT_SIGNATURE_SIZE * i; - - sum = (mc_header->sig + mc_header->pf + mc_header->cksum) - - (ext_sig->sig + ext_sig->pf + ext_sig->cksum); - if (sum) { - if (print_err) - pr_err("Bad extended signature checksum, aborting.\n"); - return -EINVAL; - } - } - return 0; -} - -/* - * Returns 1 if update has been found, 0 otherwise. - */ -int find_matching_signature(void *mc, unsigned int csig, int cpf) -{ - struct microcode_header_intel *mc_hdr = mc; - struct extended_sigtable *ext_hdr; - struct extended_signature *ext_sig; - int i; - - if (cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf)) - return 1; - - /* Look for ext. headers: */ - if (get_totalsize(mc_hdr) <= get_datasize(mc_hdr) + MC_HEADER_SIZE) - return 0; - - ext_hdr = mc + get_datasize(mc_hdr) + MC_HEADER_SIZE; - ext_sig = (void *)ext_hdr + EXT_HEADER_SIZE; - - for (i = 0; i < ext_hdr->count; i++) { - if (cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf)) - return 1; - ext_sig++; - } - return 0; -} - -/* - * Returns 1 if update has been found, 0 otherwise. - */ -int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev) -{ - struct microcode_header_intel *mc_hdr = mc; - - if (mc_hdr->rev <= new_rev) - return 0; - - return find_matching_signature(mc, csig, cpf); -} diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 8f44c5a50ab8..6c044543545e 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -25,7 +25,6 @@ #include <asm/hyperv.h> #include <asm/mshyperv.h> #include <asm/desc.h> -#include <asm/idle.h> #include <asm/irq_regs.h> #include <asm/i8259.h> #include <asm/apic.h> diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 1db8dc490b66..d1316f9c8329 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -17,11 +17,17 @@ struct cpuid_bit { u32 sub_leaf; }; -enum cpuid_regs { - CR_EAX = 0, - CR_ECX, - CR_EDX, - CR_EBX +/* Please keep the leaf sorted by cpuid_bit.level for faster search. */ +static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, + { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, + { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, + { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, + { 0, 0, 0, 0, 0 } }; void init_scattered_cpuid_features(struct cpuinfo_x86 *c) @@ -30,18 +36,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) u32 regs[4]; const struct cpuid_bit *cb; - static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, - { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, - { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, - { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, - { X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 }, - { 0, 0, 0, 0, 0 } - }; - for (cb = cpuid_bits; cb->feature; cb++) { /* Verify that the level is valid */ @@ -50,10 +44,35 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) max_level > (cb->level | 0xffff)) continue; - cpuid_count(cb->level, cb->sub_leaf, ®s[CR_EAX], - ®s[CR_EBX], ®s[CR_ECX], ®s[CR_EDX]); + cpuid_count(cb->level, cb->sub_leaf, ®s[CPUID_EAX], + ®s[CPUID_EBX], ®s[CPUID_ECX], + ®s[CPUID_EDX]); if (regs[cb->reg] & (1 << cb->bit)) set_cpu_cap(c, cb->feature); } } + +u32 get_scattered_cpuid_leaf(unsigned int level, unsigned int sub_leaf, + enum cpuid_regs_idx reg) +{ + const struct cpuid_bit *cb; + u32 cpuid_val = 0; + + for (cb = cpuid_bits; cb->feature; cb++) { + + if (level > cb->level) + continue; + + if (level < cb->level) + break; + + if (reg == cb->reg && sub_leaf == cb->sub_leaf) { + if (cpu_has(&boot_cpu_data, cb->feature)) + cpuid_val |= BIT(cb->bit); + } + } + + return cpuid_val; +} +EXPORT_SYMBOL_GPL(get_scattered_cpuid_leaf); diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 5130985b758b..891f4dad7b2c 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -24,11 +24,16 @@ #include <linux/dmi.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/clocksource.h> #include <asm/div64.h> #include <asm/x86_init.h> #include <asm/hypervisor.h> #include <asm/timer.h> #include <asm/apic.h> +#include <asm/timer.h> + +#undef pr_fmt +#define pr_fmt(fmt) "vmware: " fmt #define CPUID_VMWARE_INFO_LEAF 0x40000000 #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 @@ -48,6 +53,8 @@ "2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) : \ "memory"); +static unsigned long vmware_tsc_khz __ro_after_init; + static inline int __vmware_platform(void) { uint32_t eax, ebx, ecx, edx; @@ -57,35 +64,80 @@ static inline int __vmware_platform(void) static unsigned long vmware_get_tsc_khz(void) { - uint64_t tsc_hz, lpj; - uint32_t eax, ebx, ecx, edx; + return vmware_tsc_khz; +} - VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); +#ifdef CONFIG_PARAVIRT +static struct cyc2ns_data vmware_cyc2ns __ro_after_init; +static int vmw_sched_clock __initdata = 1; - tsc_hz = eax | (((uint64_t)ebx) << 32); - do_div(tsc_hz, 1000); - BUG_ON(tsc_hz >> 32); - pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n", - (unsigned long) tsc_hz / 1000, - (unsigned long) tsc_hz % 1000); - - if (!preset_lpj) { - lpj = ((u64)tsc_hz * 1000); - do_div(lpj, HZ); - preset_lpj = lpj; - } +static __init int setup_vmw_sched_clock(char *s) +{ + vmw_sched_clock = 0; + return 0; +} +early_param("no-vmw-sched-clock", setup_vmw_sched_clock); + +static unsigned long long vmware_sched_clock(void) +{ + unsigned long long ns; - return tsc_hz; + ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul, + vmware_cyc2ns.cyc2ns_shift); + ns -= vmware_cyc2ns.cyc2ns_offset; + return ns; } +static void __init vmware_sched_clock_setup(void) +{ + struct cyc2ns_data *d = &vmware_cyc2ns; + unsigned long long tsc_now = rdtsc(); + + clocks_calc_mult_shift(&d->cyc2ns_mul, &d->cyc2ns_shift, + vmware_tsc_khz, NSEC_PER_MSEC, 0); + d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul, + d->cyc2ns_shift); + + pv_time_ops.sched_clock = vmware_sched_clock; + pr_info("using sched offset of %llu ns\n", d->cyc2ns_offset); +} + +static void __init vmware_paravirt_ops_setup(void) +{ + pv_info.name = "VMware hypervisor"; + pv_cpu_ops.io_delay = paravirt_nop; + + if (vmware_tsc_khz && vmw_sched_clock) + vmware_sched_clock_setup(); +} +#else +#define vmware_paravirt_ops_setup() do {} while (0) +#endif + static void __init vmware_platform_setup(void) { uint32_t eax, ebx, ecx, edx; + uint64_t lpj, tsc_khz; VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); if (ebx != UINT_MAX) { + lpj = tsc_khz = eax | (((uint64_t)ebx) << 32); + do_div(tsc_khz, 1000); + WARN_ON(tsc_khz >> 32); + pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n", + (unsigned long) tsc_khz / 1000, + (unsigned long) tsc_khz % 1000); + + if (!preset_lpj) { + do_div(lpj, HZ); + preset_lpj = lpj; + } + + vmware_tsc_khz = tsc_khz; x86_platform.calibrate_tsc = vmware_get_tsc_khz; + x86_platform.calibrate_cpu = vmware_get_tsc_khz; + #ifdef CONFIG_X86_LOCAL_APIC /* Skip lapic calibration since we know the bus frequency. */ lapic_timer_frequency = ecx / HZ; @@ -96,6 +148,8 @@ static void __init vmware_platform_setup(void) pr_warn("Failed to get TSC freq from the hypervisor\n"); } + vmware_paravirt_ops_setup(); + #ifdef CONFIG_X86_IO_APIC no_timer_check = 1; #endif |