From 9554bfe403bdfc084823df8695a01f28c680af61 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 14 Feb 2020 14:27:15 -0800 Subject: x86/mce: Convert the CEC to use the MCE notifier The CEC code has its claws in a couple of routines in mce/core.c. Convert it to just register itself on the normal MCE notifier chain. [ bp: Make cec_add_elem() and cec_init() static. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Tested-by: Tony Luck Link: https://lkml.kernel.org/r/20200214222720.13168-3-tony.luck@intel.com --- include/linux/ras.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/ras.h b/include/linux/ras.h index 7c3debb47c87..1f4048bf2674 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -17,12 +17,7 @@ static inline int ras_add_daemon_trace(void) { return 0; } #endif #ifdef CONFIG_RAS_CEC -void __init cec_init(void); int __init parse_cec_param(char *str); -int cec_add_elem(u64 pfn); -#else -static inline void __init cec_init(void) { } -static inline int cec_add_elem(u64 pfn) { return -ENODEV; } #endif #ifdef CONFIG_RAS -- cgit v1.2.3 From 7fc0b9b995f222646ece8d5bca528060c098ee88 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 14 Feb 2020 14:27:20 -0800 Subject: EDAC: Drop the EDAC report status checks When acpi_extlog was added, we were worried that the same error would be reported more than once by different subsystems. But in the ensuing years I've seen complaints that people could not find an error log (because this mechanism suppressed the log they were looking for). Rip it all out. People are smart enough to notice the same address from different reporting mechanisms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Tested-by: Tony Luck Link: https://lkml.kernel.org/r/20200214222720.13168-8-tony.luck@intel.com --- drivers/acpi/acpi_extlog.c | 14 ----------- drivers/edac/edac_mc.c | 61 ---------------------------------------------- drivers/edac/pnd2_edac.c | 3 --- drivers/edac/sb_edac.c | 4 --- drivers/edac/skx_common.c | 3 --- include/linux/edac.h | 8 ------ 6 files changed, 93 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 9cc3c1f92db5..f138e12b7b82 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -42,8 +42,6 @@ struct extlog_l1_head { u8 rev1[12]; }; -static int old_edac_report_status; - static u8 extlog_dsm_uuid[] __initdata = "663E35AF-CC10-41A4-88EA-5470AF055295"; /* L1 table related physical address */ @@ -229,11 +227,6 @@ static int __init extlog_init(void) if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr()) return -ENODEV; - if (edac_get_report_status() == EDAC_REPORTING_FORCE) { - pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); - return -EPERM; - } - rc = -EINVAL; /* get L1 header to fetch necessary information */ l1_hdr_size = sizeof(struct extlog_l1_head); @@ -281,12 +274,6 @@ static int __init extlog_init(void) if (elog_buf == NULL) goto err_release_elog; - /* - * eMCA event report method has higher priority than EDAC method, - * unless EDAC event report method is mandatory. - */ - old_edac_report_status = edac_get_report_status(); - edac_set_report_status(EDAC_REPORTING_DISABLED); mce_register_decode_chain(&extlog_mce_dec); /* enable OS to be involved to take over management from BIOS */ ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; @@ -308,7 +295,6 @@ err: static void __exit extlog_exit(void) { - edac_set_report_status(old_edac_report_status); mce_unregister_decode_chain(&extlog_mce_dec); ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; if (extlog_l1_addr) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 75ede27bdf6a..5813e931f2f0 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -43,8 +43,6 @@ int edac_op_state = EDAC_OPSTATE_INVAL; EXPORT_SYMBOL_GPL(edac_op_state); -static int edac_report = EDAC_REPORTING_ENABLED; - /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); @@ -60,65 +58,6 @@ static struct mem_ctl_info *error_desc_to_mci(struct edac_raw_error_desc *e) return container_of(e, struct mem_ctl_info, error_desc); } -int edac_get_report_status(void) -{ - return edac_report; -} -EXPORT_SYMBOL_GPL(edac_get_report_status); - -void edac_set_report_status(int new) -{ - if (new == EDAC_REPORTING_ENABLED || - new == EDAC_REPORTING_DISABLED || - new == EDAC_REPORTING_FORCE) - edac_report = new; -} -EXPORT_SYMBOL_GPL(edac_set_report_status); - -static int edac_report_set(const char *str, const struct kernel_param *kp) -{ - if (!str) - return -EINVAL; - - if (!strncmp(str, "on", 2)) - edac_report = EDAC_REPORTING_ENABLED; - else if (!strncmp(str, "off", 3)) - edac_report = EDAC_REPORTING_DISABLED; - else if (!strncmp(str, "force", 5)) - edac_report = EDAC_REPORTING_FORCE; - - return 0; -} - -static int edac_report_get(char *buffer, const struct kernel_param *kp) -{ - int ret = 0; - - switch (edac_report) { - case EDAC_REPORTING_ENABLED: - ret = sprintf(buffer, "on"); - break; - case EDAC_REPORTING_DISABLED: - ret = sprintf(buffer, "off"); - break; - case EDAC_REPORTING_FORCE: - ret = sprintf(buffer, "force"); - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -static const struct kernel_param_ops edac_report_ops = { - .set = edac_report_set, - .get = edac_report_get, -}; - -module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644); - unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned int len) { diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index 1929a5dc8f94..c1f2e6deb021 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1396,9 +1396,6 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo struct dram_addr daddr; char *type; - if (edac_get_report_status() == EDAC_REPORTING_DISABLED) - return NOTIFY_DONE; - mci = pnd2_mci; if (!mci || (mce->kflags & MCE_HANDLED_CEC)) return NOTIFY_DONE; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index f790f7d08688..d414698ca324 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3134,8 +3134,6 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, struct mem_ctl_info *mci; char *type; - if (edac_get_report_status() == EDAC_REPORTING_DISABLED) - return NOTIFY_DONE; if (mce->kflags & MCE_HANDLED_CEC) return NOTIFY_DONE; @@ -3526,8 +3524,6 @@ static int __init sbridge_init(void) if (rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); - if (edac_get_report_status() == EDAC_REPORTING_DISABLED) - sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; } diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 6f08a12f6b11..423d33aef54f 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -574,9 +574,6 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, struct mem_ctl_info *mci; char *type; - if (edac_get_report_status() == EDAC_REPORTING_DISABLED) - return NOTIFY_DONE; - if (mce->kflags & MCE_HANDLED_CEC) return NOTIFY_DONE; diff --git a/include/linux/edac.h b/include/linux/edac.h index 0f20b986b0ab..6eb7d55d7c3d 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -31,14 +31,6 @@ struct device; extern int edac_op_state; struct bus_type *edac_get_sysfs_subsys(void); -int edac_get_report_status(void); -void edac_set_report_status(int new); - -enum { - EDAC_REPORTING_ENABLED, - EDAC_REPORTING_DISABLED, - EDAC_REPORTING_FORCE -}; static inline void opstate_init(void) { -- cgit v1.2.3 From a4e91825d7e1252f7cba005f1451e5464b23c15d Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Sun, 10 May 2020 20:48:40 +0000 Subject: x86/amd_nb: Add AMD family 17h model 60h PCI IDs Add PCI IDs for AMD Renoir (4000-series Ryzen CPUs). This is necessary to enable support for temperature sensors via the k10temp module. Signed-off-by: Alexander Monakov Signed-off-by: Borislav Petkov Acked-by: Yazen Ghannam Acked-by: Guenter Roeck Link: https://lkml.kernel.org/r/20200510204842.2603-2-amonakov@ispras.ru --- arch/x86/kernel/amd_nb.c | 5 +++++ include/linux/pci_ids.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index b6b3297851f3..18f6b7c4bd79 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -18,9 +18,11 @@ #define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 #define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 #define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480 +#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 +#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 #define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 @@ -33,6 +35,7 @@ static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) }, {} }; @@ -50,6 +53,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, @@ -65,6 +69,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 1dfc4e1dcb94..3155f5ada02e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -550,6 +550,7 @@ #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493 +#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443 #define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653 #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 -- cgit v1.2.3 From 17fae1294ad9d711b2c3dd0edef479d40c76a5e8 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 20 May 2020 09:35:46 -0700 Subject: x86/{mce,mm}: Unmap the entire page if the whole page is affected and poisoned An interesting thing happened when a guest Linux instance took a machine check. The VMM unmapped the bad page from guest physical space and passed the machine check to the guest. Linux took all the normal actions to offline the page from the process that was using it. But then guest Linux crashed because it said there was a second machine check inside the kernel with this stack trace: do_memory_failure set_mce_nospec set_memory_uc _set_memory_uc change_page_attr_set_clr cpa_flush clflush_cache_range_opt This was odd, because a CLFLUSH instruction shouldn't raise a machine check (it isn't consuming the data). Further investigation showed that the VMM had passed in another machine check because is appeared that the guest was accessing the bad page. Fix is to check the scope of the poison by checking the MCi_MISC register. If the entire page is affected, then unmap the page. If only part of the page is affected, then mark the page as uncacheable. This assumes that VMMs will do the logical thing and pass in the "whole page scope" via the MCi_MISC register (since they unmapped the entire page). [ bp: Adjust to x86/entry changes. ] Fixes: 284ce4011ba6 ("x86/memory_failure: Introduce {set, clear}_mce_nospec()") Reported-by: Jue Wang Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Jue Wang Cc: Link: https://lkml.kernel.org/r/20200520163546.GA7977@agluck-desk2.amr.corp.intel.com --- arch/x86/include/asm/set_memory.h | 19 +++++++++++++------ arch/x86/kernel/cpu/mce/core.c | 18 ++++++++++++++---- include/linux/sched.h | 4 +++- include/linux/set_memory.h | 2 +- 4 files changed, 31 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ec2c0a094b5d..5948218f35c5 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -86,28 +86,35 @@ int set_direct_map_default_noflush(struct page *page); extern int kernel_set_to_readonly; #ifdef CONFIG_X86_64 -static inline int set_mce_nospec(unsigned long pfn) +/* + * Prevent speculative access to the page by either unmapping + * it (if we do not require access to any part of the page) or + * marking it uncacheable (if we want to try to retrieve data + * from non-poisoned lines in the page). + */ +static inline int set_mce_nospec(unsigned long pfn, bool unmap) { unsigned long decoy_addr; int rc; /* - * Mark the linear address as UC to make sure we don't log more - * errors because of speculative access to the page. * We would like to just call: - * set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1); + * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); * but doing that would radically increase the odds of a * speculative access to the poison page because we'd have * the virtual address of the kernel 1:1 mapping sitting * around in registers. * Instead we get tricky. We create a non-canonical address * that looks just like the one we want, but has bit 63 flipped. - * This relies on set_memory_uc() properly sanitizing any __pa() + * This relies on set_memory_XX() properly sanitizing any __pa() * results with __PHYSICAL_MASK or PTE_PFN_MASK. */ decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); - rc = set_memory_uc(decoy_addr, 1); + if (unmap) + rc = set_memory_np(decoy_addr, 1); + else + rc = set_memory_uc(decoy_addr, 1); if (rc) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); return rc; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 30413325de22..ce9120c4f740 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -520,6 +520,14 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); +static bool whole_page(struct mce *m) +{ + if (!mca_cfg.ser || !(m->status & MCI_STATUS_MISCV)) + return true; + + return MCI_MISC_ADDR_LSB(m->misc) >= PAGE_SHIFT; +} + bool mce_is_correctable(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) @@ -573,7 +581,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val, pfn = mce->addr >> PAGE_SHIFT; if (!memory_failure(pfn, 0)) { - set_mce_nospec(pfn); + set_mce_nospec(pfn, whole_page(mce)); mce->kflags |= MCE_HANDLED_UC; } @@ -1173,11 +1181,12 @@ static void kill_me_maybe(struct callback_head *cb) int flags = MF_ACTION_REQUIRED; pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr); - if (!(p->mce_status & MCG_STATUS_RIPV)) + + if (!p->mce_ripv) flags |= MF_MUST_KILL; if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) { - set_mce_nospec(p->mce_addr >> PAGE_SHIFT); + set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); return; } @@ -1331,7 +1340,8 @@ void noinstr do_machine_check(struct pt_regs *regs) BUG_ON(!on_thread_stack() || !user_mode(regs)); current->mce_addr = m.addr; - current->mce_status = m.mcgstatus; + current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV); + current->mce_whole_page = whole_page(&m); current->mce_kill_me.func = kill_me_maybe; if (kill_it) current->mce_kill_me.func = kill_me_now; diff --git a/include/linux/sched.h b/include/linux/sched.h index c5d96e3e7fff..62c1de522fc5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1304,7 +1304,9 @@ struct task_struct { #ifdef CONFIG_X86_MCE u64 mce_addr; - u64 mce_status; + __u64 mce_ripv : 1, + mce_whole_page : 1, + __mce_reserved : 62; struct callback_head mce_kill_me; #endif diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index 86281ac7c305..860e0f843c12 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -26,7 +26,7 @@ static inline int set_direct_map_default_noflush(struct page *page) #endif #ifndef set_mce_nospec -static inline int set_mce_nospec(unsigned long pfn) +static inline int set_mce_nospec(unsigned long pfn, bool unmap) { return 0; } -- cgit v1.2.3