From f65aad41772f6a0022e9763fe06f47604449964c Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 17 Oct 2012 00:39:09 +0200 Subject: MIPS: Cavium: Add EDAC support. Drivers for EDAC on Cavium. Supported subsystems are: o CPU primary caches. These are parity protected only, so only error reporting. o Second level cache - ECC protected, provides SECDED. o Memory: ECC / SECDEC if used with suitable DRAM modules. The driver will will only initialize if ECC is enabled on a system so is safe to run on non-ECC memory. o PCI: Parity error reporting Since it is very hard to test this sort of code the implementation is very conservative and uses polling where possible for now. Signed-off-by: Ralf Baechle Reviewed-by: Borislav Petkov --- drivers/edac/Kconfig | 33 ++++++++- drivers/edac/Makefile | 5 ++ drivers/edac/octeon_edac-l2c.c | 118 ++++++++++++++++++++++++++++++++ drivers/edac/octeon_edac-lmc.c | 150 +++++++++++++++++++++++++++++++++++++++++ drivers/edac/octeon_edac-lmc.h | 78 +++++++++++++++++++++ drivers/edac/octeon_edac-pc.c | 140 ++++++++++++++++++++++++++++++++++++++ drivers/edac/octeon_edac-pci.c | 135 +++++++++++++++++++++++++++++++++++++ 7 files changed, 658 insertions(+), 1 deletion(-) create mode 100644 drivers/edac/octeon_edac-l2c.c create mode 100644 drivers/edac/octeon_edac-lmc.c create mode 100644 drivers/edac/octeon_edac-lmc.h create mode 100644 drivers/edac/octeon_edac-pc.c create mode 100644 drivers/edac/octeon_edac-pci.c (limited to 'drivers') diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 409b92b8d346..a9db20815a39 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -7,7 +7,7 @@ menuconfig EDAC bool "EDAC (Error Detection And Correction) reporting" depends on HAS_IOMEM - depends on X86 || PPC || TILE || ARM + depends on X86 || PPC || TILE || ARM || EDAC_SUPPORT help EDAC is designed to report errors in the core system. These are low-level errors that are reported in the CPU or @@ -27,6 +27,9 @@ menuconfig EDAC There is also a mailing list for the EDAC project, which can be found via the sourceforge page. +config EDAC_SUPPORT + bool + if EDAC comment "Reporting subsystems" @@ -316,4 +319,32 @@ config EDAC_HIGHBANK_L2 Support for error detection and correction on the Calxeda Highbank memory controller. +config EDAC_OCTEON_PC + tristate "Cavium Octeon Primary Caches" + depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + help + Support for error detection and correction on the primary caches of + the cnMIPS cores of Cavium Octeon family SOCs. + +config EDAC_OCTEON_L2C + tristate "Cavium Octeon Secondary Caches (L2C)" + depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + help + Support for error detection and correction on the + Cavium Octeon family of SOCs. + +config EDAC_OCTEON_LMC + tristate "Cavium Octeon DRAM Memory Controller (LMC)" + depends on EDAC_MM_EDAC && CPU_CAVIUM_OCTEON + help + Support for error detection and correction on the + Cavium Octeon family of SOCs. + +config EDAC_OCTEON_PCI + tristate "Cavium Octeon PCI Controller" + depends on EDAC_MM_EDAC && PCI && CPU_CAVIUM_OCTEON + help + Support for error detection and correction on the + Cavium Octeon family of SOCs. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 7e5129a733f8..5608a9ba61b7 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -58,3 +58,8 @@ obj-$(CONFIG_EDAC_TILE) += tile_edac.o obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o + +obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o +obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o +obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o +obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c new file mode 100644 index 000000000000..5f459aa451bf --- /dev/null +++ b/drivers/edac/octeon_edac-l2c.c @@ -0,0 +1,118 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2009 Wind River Systems, + * written by Ralf Baechle + */ +#include +#include +#include +#include +#include + +#include + +#include "edac_core.h" +#include "edac_module.h" + +#define EDAC_MOD_STR "octeon-l2c" + +static void co_l2c_poll(struct edac_device_ctl_info *l2c) +{ + union cvmx_l2t_err l2t_err; + + l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR); + if (l2t_err.s.sec_err) { + edac_device_handle_ce(l2c, 0, 0, + "Single bit error (corrected)"); + l2t_err.s.sec_err = 1; /* Reset */ + cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64); + } + if (l2t_err.s.ded_err) { + edac_device_handle_ue(l2c, 0, 0, + "Double bit error (corrected)"); + l2t_err.s.ded_err = 1; /* Reset */ + cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64); + } +} + +static int __devinit co_l2c_probe(struct platform_device *pdev) +{ + struct edac_device_ctl_info *l2c; + union cvmx_l2t_err l2t_err; + int res = 0; + + l2c = edac_device_alloc_ctl_info(0, "l2c", 1, NULL, 0, 0, + NULL, 0, edac_device_alloc_index()); + if (!l2c) + return -ENOMEM; + + l2c->dev = &pdev->dev; + platform_set_drvdata(pdev, l2c); + l2c->dev_name = dev_name(&pdev->dev); + + l2c->mod_name = "octeon-l2c"; + l2c->ctl_name = "octeon_l2c_err"; + l2c->edac_check = co_l2c_poll; + + if (edac_device_add_device(l2c) > 0) { + pr_err("%s: edac_device_add_device() failed\n", __func__); + goto err; + } + + l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR); + l2t_err.s.sec_intena = 0; /* We poll */ + l2t_err.s.ded_intena = 0; + l2t_err.s.sec_err = 1; /* Clear, just in case */ + l2t_err.s.ded_err = 1; + cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64); + + return 0; + +err: + edac_device_free_ctl_info(l2c); + + return res; +} + +static int co_l2c_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *l2c = platform_get_drvdata(pdev); + + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(l2c); + + return 0; +} + +static struct platform_driver co_l2c_driver = { + .probe = co_l2c_probe, + .remove = co_l2c_remove, + .driver = { + .name = "co_l2c_edac", + } +}; + +static int __init co_edac_init(void) +{ + int ret; + + ret = platform_driver_register(&co_l2c_driver); + if (ret) + pr_warning(EDAC_MOD_STR " EDAC failed to register\n"); + + return ret; +} + +static void __exit co_edac_exit(void) +{ + platform_driver_unregister(&co_l2c_driver); +} + +module_init(co_edac_init); +module_exit(co_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ralf Baechle "); diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c new file mode 100644 index 000000000000..e0c1e44187bc --- /dev/null +++ b/drivers/edac/octeon_edac-lmc.c @@ -0,0 +1,150 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2009 Wind River Systems, + * written by Ralf Baechle + */ +#include +#include +#include +#include +#include + +#include + +#include "edac_core.h" +#include "edac_module.h" +#include "octeon_edac-lmc.h" + +#define EDAC_MOD_STR "octeon" + +static struct mem_ctl_info *mc_cavium; +static void *lmc_base; + +static void co_lmc_poll(struct mem_ctl_info *mci) +{ + union lmc_mem_cfg0 cfg0; + union lmc_fadr fadr; + char msg[64]; + + fadr.u64 = readq(lmc_base + LMC_FADR); + cfg0.u64 = readq(lmc_base + LMC_MEM_CFG0); + snprintf(msg, sizeof(msg), "DIMM %d rank %d bank %d row %d col %d", + fadr.fdimm, fadr.fbunk, fadr.fbank, fadr.frow, fadr.fcol); + + if (cfg0.sec_err) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, -1, -1, -1, + msg, ""); + + cfg0.intr_sec_ena = -1; /* Done, re-arm */ + } + + if (cfg0.ded_err) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1, + msg, ""); + cfg0.intr_ded_ena = -1; /* Done, re-arm */ + } + + writeq(cfg0.u64, lmc_base + LMC_MEM_CFG0); +} + +static int __devinit co_lmc_probe(struct platform_device *pdev) +{ + struct mem_ctl_info *mci; + union lmc_mem_cfg0 cfg0; + int res = 0; + + mci = edac_mc_alloc(0, 0, 0, 0); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + platform_set_drvdata(pdev, mci); + mci->dev_name = dev_name(&pdev->dev); + + mci->mod_name = "octeon-lmc"; + mci->ctl_name = "co_lmc_err"; + mci->edac_check = co_lmc_poll; + + if (edac_mc_add_mc(mci) > 0) { + pr_err("%s: edac_mc_add_mc() failed\n", __func__); + goto err; + } + + cfg0.u64 = readq(lmc_base + LMC_MEM_CFG0); /* We poll */ + cfg0.intr_ded_ena = 0; + cfg0.intr_sec_ena = 0; + writeq(cfg0.u64, lmc_base + LMC_MEM_CFG0); + + mc_cavium = mci; + + return 0; + +err: + edac_mc_free(mci); + + return res; +} + +static int co_lmc_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + mc_cavium = NULL; + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + + return 0; +} + +static struct platform_driver co_lmc_driver = { + .probe = co_lmc_probe, + .remove = co_lmc_remove, + .driver = { + .name = "co_lmc_edac", + } +}; + +static int __init co_edac_init(void) +{ + union lmc_mem_cfg0 cfg0; + int ret; + + lmc_base = ioremap_nocache(LMC_BASE, LMC_SIZE); + if (!lmc_base) + return -ENOMEM; + + cfg0.u64 = readq(lmc_base + LMC_MEM_CFG0); + if (!cfg0.ecc_ena) { + pr_info(EDAC_MOD_STR " LMC EDAC: ECC disabled, good bye\n"); + ret = -ENODEV; + goto out; + } + + ret = platform_driver_register(&co_lmc_driver); + if (ret) { + pr_warning(EDAC_MOD_STR " LMC EDAC failed to register\n"); + goto out; + } + + return ret; + +out: + iounmap(lmc_base); + + return ret; +} + +static void __exit co_edac_exit(void) +{ + platform_driver_unregister(&co_lmc_driver); + iounmap(lmc_base); +} + +module_init(co_edac_init); +module_exit(co_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ralf Baechle "); diff --git a/drivers/edac/octeon_edac-lmc.h b/drivers/edac/octeon_edac-lmc.h new file mode 100644 index 000000000000..246dc525bc10 --- /dev/null +++ b/drivers/edac/octeon_edac-lmc.h @@ -0,0 +1,78 @@ +/* + * LMC Registers, see chapter 2.5 + * + * These are RSL Type registers and are accessed indirectly across the + * I/O bus, so accesses are slowish. Not that it matters. Any size load is + * ok but stores must be 64-bit. + */ +#define LMC_BASE 0x0001180088000000 +#define LMC_SIZE 0xb8 + +#define LMC_MEM_CFG0 0x0000000000000000 +#define LMC_MEM_CFG1 0x0000000000000008 +#define LMC_CTL 0x0000000000000010 +#define LMC_DDR2_CTL 0x0000000000000018 +#define LMC_FADR 0x0000000000000020 +#define LMC_FADR_FDIMM +#define LMC_FADR_FBUNK +#define LMC_FADR_FBANK +#define LMC_FADR_FROW +#define LMC_FADR_FCOL +#define LMC_COMP_CTL 0x0000000000000028 +#define LMC_WODT_CTL 0x0000000000000030 +#define LMC_ECC_SYND 0x0000000000000038 +#define LMC_IFB_CNT_LO 0x0000000000000048 +#define LMC_IFB_CNT_HI 0x0000000000000050 +#define LMC_OPS_CNT_LO 0x0000000000000058 +#define LMC_OPS_CNT_HI 0x0000000000000060 +#define LMC_DCLK_CNT_LO 0x0000000000000068 +#define LMC_DCLK_CNT_HI 0x0000000000000070 +#define LMC_DELAY_CFG 0x0000000000000088 +#define LMC_CTL1 0x0000000000000090 +#define LMC_DUAL_MEM_CONFIG 0x0000000000000098 +#define LMC_RODT_COMP_CTL 0x00000000000000A0 +#define LMC_PLL_CTL 0x00000000000000A8 +#define LMC_PLL_STATUS 0x00000000000000B0 + +union lmc_mem_cfg0 { + uint64_t u64; + struct { + uint64_t reserved_32_63:32; + uint64_t reset:1; + uint64_t silo_qc:1; + uint64_t bunk_ena:1; + uint64_t ded_err:4; + uint64_t sec_err:4; + uint64_t intr_ded_ena:1; + uint64_t intr_sec_ena:1; + uint64_t reserved_15_18:4; + uint64_t ref_int:5; + uint64_t pbank_lsb:4; + uint64_t row_lsb:3; + uint64_t ecc_ena:1; + uint64_t init_start:1; + }; +}; + +union lmc_fadr { + uint64_t u64; + struct { + uint64_t reserved_32_63:32; + uint64_t fdimm:2; + uint64_t fbunk:1; + uint64_t fbank:3; + uint64_t frow:14; + uint64_t fcol:12; + }; +}; + +union lmc_ecc_synd { + uint64_t u64; + struct { + uint64_t reserved_32_63:32; + uint64_t mrdsyn3:8; + uint64_t mrdsyn2:8; + uint64_t mrdsyn1:8; + uint64_t mrdsyn0:8; + }; +}; diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c new file mode 100644 index 000000000000..9d13061744e4 --- /dev/null +++ b/drivers/edac/octeon_edac-pc.c @@ -0,0 +1,140 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2009 Wind River Systems, + * written by Ralf Baechle + */ +#include +#include +#include +#include +#include +#include + +#include "edac_core.h" +#include "edac_module.h" + +#include +#include + +#define EDAC_MOD_STR "octeon" + +extern int register_co_cache_error_notifier(struct notifier_block *nb); +extern int unregister_co_cache_error_notifier(struct notifier_block *nb); + +extern unsigned long long cache_err_dcache[NR_CPUS]; + +static struct edac_device_ctl_info *ed_cavium; + +/* + * EDAC CPU cache error callback + * + */ + +static int co_cache_error_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + unsigned int core = cvmx_get_core_num(); + unsigned int cpu = smp_processor_id(); + uint64_t icache_err = read_octeon_c0_icacheerr(); + struct edac_device_ctl_info *ed = ed_cavium; + + edac_device_printk(ed, KERN_ERR, + "Cache error exception on core %d / processor %d:\n", + core, cpu); + edac_device_printk(ed, KERN_ERR, + "cp0_errorepc == %lx\n", read_c0_errorepc()); + if (icache_err & 1) { + edac_device_printk(ed, KERN_ERR, "CacheErr (Icache) == %llx\n", + (unsigned long long)icache_err); + write_octeon_c0_icacheerr(0); + edac_device_handle_ce(ed, 0, 0, ed->ctl_name); + } + if (cache_err_dcache[core] & 1) { + edac_device_printk(ed, KERN_ERR, "CacheErr (Dcache) == %llx\n", + (unsigned long long)cache_err_dcache[core]); + cache_err_dcache[core] = 0; + edac_device_handle_ue(ed, 0, 0, ed->ctl_name); + } + + return NOTIFY_DONE; +} + +static struct notifier_block co_cache_error_notifier = { + .notifier_call = co_cache_error_event, +}; + +static int __devinit co_cache_error_probe(struct platform_device *pdev) +{ + struct edac_device_ctl_info *ed; + int res = 0; + + ed = edac_device_alloc_ctl_info(0, "cpu", 1, NULL, 0, 0, NULL, 0, + edac_device_alloc_index()); + + ed->dev = &pdev->dev; + platform_set_drvdata(pdev, ed); + ed->dev_name = dev_name(&pdev->dev); + + ed->mod_name = "octeon-cpu"; + ed->ctl_name = "co_cpu_err"; + + if (edac_device_add_device(ed) > 0) { + pr_err("%s: edac_device_add_device() failed\n", __func__); + goto err; + } + + register_co_cache_error_notifier(&co_cache_error_notifier); + ed_cavium = ed; + + return 0; + +err: + edac_device_free_ctl_info(ed); + + return res; +} + +static int co_cache_error_remove(struct platform_device *pdev) +{ + struct edac_device_ctl_info *ed = platform_get_drvdata(pdev); + + unregister_co_cache_error_notifier(&co_cache_error_notifier); + ed_cavium = NULL; + edac_device_del_device(&pdev->dev); + edac_device_free_ctl_info(ed); + + return 0; +} + +static struct platform_driver co_cache_error_driver = { + .probe = co_cache_error_probe, + .remove = co_cache_error_remove, + .driver = { + .name = "co_pc_edac", + } +}; + +static int __init co_edac_init(void) +{ + int ret; + + ret = platform_driver_register(&co_cache_error_driver); + if (ret) + pr_warning(EDAC_MOD_STR "CPU err failed to register\n"); + + return ret; +} + +static void __exit co_edac_exit(void) +{ + platform_driver_unregister(&co_cache_error_driver); +} + +module_init(co_edac_init); +module_exit(co_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ralf Baechle "); diff --git a/drivers/edac/octeon_edac-pci.c b/drivers/edac/octeon_edac-pci.c new file mode 100644 index 000000000000..e72b96e3e4e0 --- /dev/null +++ b/drivers/edac/octeon_edac-pci.c @@ -0,0 +1,135 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2009 Wind River Systems, + * written by Ralf Baechle + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "edac_core.h" +#include "edac_module.h" + +#define EDAC_MOD_STR "octeon" + +static void co_pci_poll(struct edac_pci_ctl_info *pci) +{ + union cvmx_pci_cfg01 cfg01; + + cfg01.u32 = octeon_npi_read32(CVMX_NPI_PCI_CFG01); + if (cfg01.s.dpe) { /* Detected parity error */ + edac_pci_handle_pe(pci, pci->ctl_name); + cfg01.s.dpe = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.sse) { + edac_pci_handle_npe(pci, "Signaled System Error"); + cfg01.s.sse = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.rma) { + edac_pci_handle_npe(pci, "Received Master Abort"); + cfg01.s.rma = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.rta) { + edac_pci_handle_npe(pci, "Received Target Abort"); + cfg01.s.rta = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.sta) { + edac_pci_handle_npe(pci, "Signaled Target Abort"); + cfg01.s.sta = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.mdpe) { + edac_pci_handle_npe(pci, "Master Data Parity Error"); + cfg01.s.mdpe = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } + if (cfg01.s.mdpe) { + edac_pci_handle_npe(pci, "Master Data Parity Error"); + cfg01.s.mdpe = 1; /* Reset */ + octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); + } +} + +static int __devinit co_pci_probe(struct platform_device *pdev) +{ + struct edac_pci_ctl_info *pci; + int res = 0; + + pci = edac_pci_alloc_ctl_info(0, "octeon_pci_err"); + if (!pci) + return -ENOMEM; + + pci->dev = &pdev->dev; + platform_set_drvdata(pdev, pci); + pci->dev_name = dev_name(&pdev->dev); + + pci->mod_name = "octeon-pci"; + pci->ctl_name = "octeon_pci_err"; + pci->edac_check = co_pci_poll; + + if (edac_pci_add_device(pci, 0) > 0) { + pr_err("%s: edac_pci_add_device() failed\n", __func__); + goto err; + } + + return 0; + +err: + edac_pci_free_ctl_info(pci); + + return res; +} + +static int co_pci_remove(struct platform_device *pdev) +{ + struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev); + + edac_pci_del_device(&pdev->dev); + edac_pci_free_ctl_info(pci); + + return 0; +} + +static struct platform_driver co_pci_driver = { + .probe = co_pci_probe, + .remove = co_pci_remove, + .driver = { + .name = "co_pci_edac", + } +}; + +static int __init co_edac_init(void) +{ + int ret; + + ret = platform_driver_register(&co_pci_driver); + if (ret) + pr_warning(EDAC_MOD_STR " PCI EDAC failed to register\n"); + + return ret; +} + +static void __exit co_edac_exit(void) +{ + platform_driver_unregister(&co_pci_driver); +} + +module_init(co_edac_init); +module_exit(co_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ralf Baechle "); -- cgit v1.2.3