From e1ced09797776dfd4a2a7b04b9ee7e97ab1e64be Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 15 Nov 2012 13:58:59 -0800 Subject: [PATCH] MIPS/EDAC: Improve OCTEON EDAC support. Some initialization errors are reported with the existing OCTEON EDAC support patch. Also some parts have more than one memory controller. Fix the errors and add multiple controllers if present. Signed-off-by: David Daney --- arch/mips/cavium-octeon/setup.c | 30 ++++- arch/mips/mm/c-octeon.c | 39 ++++-- arch/mips/pci/pci-octeon.c | 3 +- drivers/edac/octeon_edac-l2c.c | 178 ++++++++++++++++++------ drivers/edac/octeon_edac-lmc.c | 230 ++++++++++++++++++-------------- drivers/edac/octeon_edac-lmc.h | 78 ----------- drivers/edac/octeon_edac-pc.c | 143 ++++++++++---------- drivers/edac/octeon_edac-pci.c | 44 ++---- 8 files changed, 408 insertions(+), 337 deletions(-) delete mode 100644 drivers/edac/octeon_edac-lmc.h diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 7c2b7aab77ba..d7e0a09f77c2 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -1112,18 +1112,30 @@ void __init device_tree_init(void) unflatten_device_tree(); } +static int __initdata disable_octeon_edac_p; + +static int __init disable_octeon_edac(char *str) +{ + disable_octeon_edac_p = 1; + return 0; +} +early_param("disable_octeon_edac", disable_octeon_edac); + static char *edac_device_names[] = { - "co_l2c_edac", - "co_lmc_edac", - "co_pc_edac", + "octeon_l2c_edac", + "octeon_pc_edac", }; static int __init edac_devinit(void) { struct platform_device *dev; int i, err = 0; + int num_lmc; char *name; + if (disable_octeon_edac_p) + return 0; + for (i = 0; i < ARRAY_SIZE(edac_device_names); i++) { name = edac_device_names[i]; dev = platform_device_register_simple(name, -1, NULL, 0); @@ -1133,7 +1145,17 @@ static int __init edac_devinit(void) } } + num_lmc = OCTEON_IS_MODEL(OCTEON_CN68XX) ? 4 : + (OCTEON_IS_MODEL(OCTEON_CN56XX) ? 2 : 1); + for (i = 0; i < num_lmc; i++) { + dev = platform_device_register_simple("octeon_lmc_edac", + i, NULL, 0); + if (IS_ERR(dev)) { + pr_err("Registation of octeon_lmc_edac %d failed!\n", i); + err = PTR_ERR(dev); + } + } + return err; } - device_initcall(edac_devinit); diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index 9f67553762d5..6ec04daf4231 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -286,10 +286,9 @@ void __cpuinit octeon_cache_init(void) board_cache_error_setup = octeon_cache_error_setup; } -/** +/* * Handle a cache error exception */ - static RAW_NOTIFIER_HEAD(co_cache_error_chain); int register_co_cache_error_notifier(struct notifier_block *nb) @@ -304,14 +303,39 @@ int unregister_co_cache_error_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_co_cache_error_notifier); -static inline int co_cache_error_call_notifiers(unsigned long val) +static void co_cache_error_call_notifiers(unsigned long val) { - return raw_notifier_call_chain(&co_cache_error_chain, val, NULL); + int rv = raw_notifier_call_chain(&co_cache_error_chain, val, NULL); + if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) { + u64 dcache_err; + unsigned long coreid = cvmx_get_core_num(); + u64 icache_err = read_octeon_c0_icacheerr(); + + if (val) { + dcache_err = cache_err_dcache[coreid]; + cache_err_dcache[coreid] = 0; + } else { + dcache_err = read_octeon_c0_dcacheerr(); + } + + pr_err("Core%lu: Cache error exception:\n", coreid); + pr_err("cp0_errorepc == %lx\n", read_c0_errorepc()); + if (icache_err & 1) { + pr_err("CacheErr (Icache) == %llx\n", + (unsigned long long)icache_err); + write_octeon_c0_icacheerr(0); + } + if (dcache_err & 1) { + pr_err("CacheErr (Dcache) == %llx\n", + (unsigned long long)dcache_err); + } + } } -/** +/* * Called when the the exception is recoverable */ + asmlinkage void cache_parity_error_octeon_recoverable(void) { co_cache_error_call_notifiers(0); @@ -319,11 +343,8 @@ asmlinkage void cache_parity_error_octeon_recoverable(void) /** * Called when the the exception is not recoverable - * - * The issue not that the cache error exception itself was non-recoverable - * but that due to nesting of exception may have lost some state so can't - * continue. */ + asmlinkage void cache_parity_error_octeon_non_recoverable(void) { co_cache_error_call_notifiers(1); diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index 8eb2ee345d03..5b5ed76c6f47 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -705,7 +705,8 @@ static int __init octeon_pci_setup(void) */ cvmx_write_csr(CVMX_NPI_PCI_INT_SUM2, -1); - if (IS_ERR(platform_device_register_simple("co_pci_edac", 0, NULL, 0))) + if (IS_ERR(platform_device_register_simple("octeon_pci_edac", + -1, NULL, 0))) pr_err("Registation of co_pci_edac failed!\n"); octeon_pci_dma_init(); diff --git a/drivers/edac/octeon_edac-l2c.c b/drivers/edac/octeon_edac-l2c.c index 5f459aa451bf..40fde6a51ed6 100644 --- a/drivers/edac/octeon_edac-l2c.c +++ b/drivers/edac/octeon_edac-l2c.c @@ -3,6 +3,8 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * + * Copyright (C) 2012 Cavium, Inc. + * * Copyright (C) 2009 Wind River Systems, * written by Ralf Baechle */ @@ -19,32 +21,124 @@ #define EDAC_MOD_STR "octeon-l2c" -static void co_l2c_poll(struct edac_device_ctl_info *l2c) +static void octeon_l2c_poll_oct1(struct edac_device_ctl_info *l2c) { - union cvmx_l2t_err l2t_err; + union cvmx_l2t_err l2t_err, l2t_err_reset; + union cvmx_l2d_err l2d_err, l2d_err_reset; + l2t_err_reset.u64 = 0; l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR); if (l2t_err.s.sec_err) { edac_device_handle_ce(l2c, 0, 0, - "Single bit error (corrected)"); - l2t_err.s.sec_err = 1; /* Reset */ - cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64); + "Tag Single bit error (corrected)"); + l2t_err_reset.s.sec_err = 1; } if (l2t_err.s.ded_err) { edac_device_handle_ue(l2c, 0, 0, - "Double bit error (corrected)"); - l2t_err.s.ded_err = 1; /* Reset */ - cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64); + "Tag Double bit error (detected)"); + l2t_err_reset.s.ded_err = 1; } + if (l2t_err_reset.u64) + cvmx_write_csr(CVMX_L2T_ERR, l2t_err_reset.u64); + + l2d_err_reset.u64 = 0; + l2d_err.u64 = cvmx_read_csr(CVMX_L2D_ERR); + if (l2d_err.s.sec_err) { + edac_device_handle_ce(l2c, 0, 1, + "Data Single bit error (corrected)"); + l2d_err_reset.s.sec_err = 1; + } + if (l2d_err.s.ded_err) { + edac_device_handle_ue(l2c, 0, 1, + "Data Double bit error (detected)"); + l2d_err_reset.s.ded_err = 1; + } + if (l2d_err_reset.u64) + cvmx_write_csr(CVMX_L2D_ERR, l2d_err_reset.u64); + } -static int __devinit co_l2c_probe(struct platform_device *pdev) +static void _octeon_l2c_poll_oct2(struct edac_device_ctl_info *l2c, int tad) +{ + union cvmx_l2c_err_tdtx err_tdtx, err_tdtx_reset; + union cvmx_l2c_err_ttgx err_ttgx, err_ttgx_reset; + char buf1[64]; + char buf2[80]; + + err_tdtx_reset.u64 = 0; + err_tdtx.u64 = cvmx_read_csr(CVMX_L2C_ERR_TDTX(tad)); + if (err_tdtx.s.dbe || err_tdtx.s.sbe || + err_tdtx.s.vdbe || err_tdtx.s.vsbe) + snprintf(buf1, sizeof(buf1), + "type:%d, syn:0x%x, way:%d", + err_tdtx.s.type, err_tdtx.s.syn, err_tdtx.s.wayidx); + + if (err_tdtx.s.dbe) { + snprintf(buf2, sizeof(buf2), + "L2D Double bit error (detected):%s", buf1); + err_tdtx_reset.s.dbe = 1; + edac_device_handle_ue(l2c, tad, 1, buf2); + } + if (err_tdtx.s.sbe) { + snprintf(buf2, sizeof(buf2), + "L2D Single bit error (corrected):%s", buf1); + err_tdtx_reset.s.sbe = 1; + edac_device_handle_ce(l2c, tad, 1, buf2); + } + if (err_tdtx.s.vdbe) { + snprintf(buf2, sizeof(buf2), + "VBF Double bit error (detected):%s", buf1); + err_tdtx_reset.s.vdbe = 1; + edac_device_handle_ue(l2c, tad, 1, buf2); + } + if (err_tdtx.s.vsbe) { + snprintf(buf2, sizeof(buf2), + "VBF Single bit error (corrected):%s", buf1); + err_tdtx_reset.s.vsbe = 1; + edac_device_handle_ce(l2c, tad, 1, buf2); + } + if (err_tdtx_reset.u64) + cvmx_write_csr(CVMX_L2C_ERR_TDTX(tad), err_tdtx_reset.u64); + + err_ttgx_reset.u64 = 0; + err_ttgx.u64 = cvmx_read_csr(CVMX_L2C_ERR_TTGX(tad)); + + if (err_ttgx.s.dbe || err_ttgx.s.sbe) + snprintf(buf1, sizeof(buf1), + "type:%d, syn:0x%x, way:%d", + err_ttgx.s.type, err_ttgx.s.syn, err_ttgx.s.wayidx); + + if (err_ttgx.s.dbe) { + snprintf(buf2, sizeof(buf2), + "Tag Double bit error (detected):%s", buf1); + err_ttgx_reset.s.dbe = 1; + edac_device_handle_ue(l2c, tad, 0, buf2); + } + if (err_ttgx.s.sbe) { + snprintf(buf2, sizeof(buf2), + "Tag Single bit error (corrected):%s", buf1); + err_ttgx_reset.s.sbe = 1; + edac_device_handle_ce(l2c, tad, 0, buf2); + } + if (err_ttgx_reset.u64) + cvmx_write_csr(CVMX_L2C_ERR_TTGX(tad), err_ttgx_reset.u64); +} + +static void octeon_l2c_poll_oct2(struct edac_device_ctl_info *l2c) +{ + int i; + for (i = 0; i < l2c->nr_instances; i++) + _octeon_l2c_poll_oct2(l2c, i); +} + +static int __devinit octeon_l2c_probe(struct platform_device *pdev) { struct edac_device_ctl_info *l2c; - union cvmx_l2t_err l2t_err; - int res = 0; - l2c = edac_device_alloc_ctl_info(0, "l2c", 1, NULL, 0, 0, + int num_tads = OCTEON_IS_MODEL(OCTEON_CN68XX) ? 4 : 1; + + /* 'Tags' are block 0, 'Data' is block 1*/ + l2c = edac_device_alloc_ctl_info(0, "l2c", num_tads, "l2c", 2, 0, NULL, 0, edac_device_alloc_index()); if (!l2c) return -ENOMEM; @@ -55,29 +149,43 @@ static int __devinit co_l2c_probe(struct platform_device *pdev) l2c->mod_name = "octeon-l2c"; l2c->ctl_name = "octeon_l2c_err"; - l2c->edac_check = co_l2c_poll; + + + if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + union cvmx_l2t_err l2t_err; + union cvmx_l2d_err l2d_err; + + l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR); + l2t_err.s.sec_intena = 0; /* We poll */ + l2t_err.s.ded_intena = 0; + cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64); + + l2d_err.u64 = cvmx_read_csr(CVMX_L2D_ERR); + l2d_err.s.sec_intena = 0; /* We poll */ + l2d_err.s.ded_intena = 0; + cvmx_write_csr(CVMX_L2T_ERR, l2d_err.u64); + + l2c->edac_check = octeon_l2c_poll_oct1; + } else { + /* OCTEON II */ + l2c->edac_check = octeon_l2c_poll_oct2; + } if (edac_device_add_device(l2c) > 0) { pr_err("%s: edac_device_add_device() failed\n", __func__); goto err; } - l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR); - l2t_err.s.sec_intena = 0; /* We poll */ - l2t_err.s.ded_intena = 0; - l2t_err.s.sec_err = 1; /* Clear, just in case */ - l2t_err.s.ded_err = 1; - cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64); return 0; err: edac_device_free_ctl_info(l2c); - return res; + return -ENXIO; } -static int co_l2c_remove(struct platform_device *pdev) +static int octeon_l2c_remove(struct platform_device *pdev) { struct edac_device_ctl_info *l2c = platform_get_drvdata(pdev); @@ -87,32 +195,14 @@ static int co_l2c_remove(struct platform_device *pdev) return 0; } -static struct platform_driver co_l2c_driver = { - .probe = co_l2c_probe, - .remove = co_l2c_remove, +static struct platform_driver octeon_l2c_driver = { + .probe = octeon_l2c_probe, + .remove = octeon_l2c_remove, .driver = { - .name = "co_l2c_edac", + .name = "octeon_l2c_edac", } }; - -static int __init co_edac_init(void) -{ - int ret; - - ret = platform_driver_register(&co_l2c_driver); - if (ret) - pr_warning(EDAC_MOD_STR " EDAC failed to register\n"); - - return ret; -} - -static void __exit co_edac_exit(void) -{ - platform_driver_unregister(&co_l2c_driver); -} - -module_init(co_edac_init); -module_exit(co_edac_exit); +module_platform_driver(octeon_l2c_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ralf Baechle "); diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index e0c1e44187bc..33bca766e37d 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -12,139 +12,175 @@ #include #include -#include +#include +#include #include "edac_core.h" #include "edac_module.h" -#include "octeon_edac-lmc.h" -#define EDAC_MOD_STR "octeon" +#define OCTEON_MAX_MC 4 -static struct mem_ctl_info *mc_cavium; -static void *lmc_base; - -static void co_lmc_poll(struct mem_ctl_info *mci) +static void octeon_lmc_edac_poll(struct mem_ctl_info *mci) { - union lmc_mem_cfg0 cfg0; - union lmc_fadr fadr; + union cvmx_lmcx_mem_cfg0 cfg0; + bool do_clear = false; char msg[64]; - fadr.u64 = readq(lmc_base + LMC_FADR); - cfg0.u64 = readq(lmc_base + LMC_MEM_CFG0); - snprintf(msg, sizeof(msg), "DIMM %d rank %d bank %d row %d col %d", - fadr.fdimm, fadr.fbunk, fadr.fbank, fadr.frow, fadr.fcol); - - if (cfg0.sec_err) { - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, -1, -1, -1, - msg, ""); - - cfg0.intr_sec_ena = -1; /* Done, re-arm */ + cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mci->mc_idx)); + if (cfg0.s.sec_err || cfg0.s.ded_err) { + union cvmx_lmcx_fadr fadr; + fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx)); + snprintf(msg, sizeof(msg), + "DIMM %d rank %d bank %d row %d col %d", + fadr.cn30xx.fdimm, fadr.cn30xx.fbunk, + fadr.cn30xx.fbank, fadr.cn30xx.frow, fadr.cn30xx.fcol); } - if (cfg0.ded_err) { - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1, - msg, ""); - cfg0.intr_ded_ena = -1; /* Done, re-arm */ + if (cfg0.s.sec_err) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, msg, ""); + cfg0.s.sec_err = -1; /* Done, re-arm */ + do_clear = true; } - writeq(cfg0.u64, lmc_base + LMC_MEM_CFG0); + if (cfg0.s.ded_err) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, msg, ""); + cfg0.s.ded_err = -1; /* Done, re-arm */ + do_clear = true; + } + if (do_clear) + cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mci->mc_idx), cfg0.u64); } -static int __devinit co_lmc_probe(struct platform_device *pdev) +static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci) +{ + union cvmx_lmcx_int int_reg; + bool do_clear = false; + char msg[64]; + + int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx)); + if (int_reg.s.sec_err || int_reg.s.ded_err) { + union cvmx_lmcx_fadr fadr; + fadr.u64 = cvmx_read_csr(CVMX_LMCX_FADR(mci->mc_idx)); + snprintf(msg, sizeof(msg), + "DIMM %d rank %d bank %d row %d col %d", + fadr.cn61xx.fdimm, fadr.cn61xx.fbunk, + fadr.cn61xx.fbank, fadr.cn61xx.frow, fadr.cn61xx.fcol); + } + + if (int_reg.s.sec_err) { + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, msg, ""); + int_reg.s.sec_err = -1; /* Done, re-arm */ + do_clear = true; + } + + if (int_reg.s.ded_err) { + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, + -1, -1, -1, msg, ""); + int_reg.s.ded_err = -1; /* Done, re-arm */ + do_clear = true; + } + if (do_clear) + cvmx_write_csr(CVMX_LMCX_INT(mci->mc_idx), int_reg.u64); +} + +static int __devinit octeon_lmc_edac_probe(struct platform_device *pdev) { struct mem_ctl_info *mci; - union lmc_mem_cfg0 cfg0; - int res = 0; + struct edac_mc_layer layers[1]; + int mc = pdev->id; - mci = edac_mc_alloc(0, 0, 0, 0); - if (!mci) - return -ENOMEM; + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = 1; + layers[0].is_virt_csrow = false; - mci->pdev = &pdev->dev; - platform_set_drvdata(pdev, mci); - mci->dev_name = dev_name(&pdev->dev); + if (OCTEON_IS_MODEL(OCTEON_FAM_1_PLUS)) { + union cvmx_lmcx_mem_cfg0 cfg0; - mci->mod_name = "octeon-lmc"; - mci->ctl_name = "co_lmc_err"; - mci->edac_check = co_lmc_poll; + cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(0)); + if (!cfg0.s.ecc_ena) { + dev_info(&pdev->dev, "Disabled (ECC not enabled)\n"); + return 0; + } - if (edac_mc_add_mc(mci) > 0) { - pr_err("%s: edac_mc_add_mc() failed\n", __func__); - goto err; + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); + if (!mci) + return -ENXIO; + + mci->pdev = &pdev->dev; + mci->dev_name = dev_name(&pdev->dev); + + mci->mod_name = "octeon-lmc"; + mci->ctl_name = "octeon-lmc-err"; + mci->edac_check = octeon_lmc_edac_poll; + + if (edac_mc_add_mc(mci)) { + dev_err(&pdev->dev, "edac_mc_add_mc() failed\n"); + edac_mc_free(mci); + return -ENXIO; + } + + cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc)); + cfg0.s.intr_ded_ena = 0; /* We poll */ + cfg0.s.intr_sec_ena = 0; + cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mc), cfg0.u64); + } else { + /* OCTEON II */ + union cvmx_lmcx_int_en en; + union cvmx_lmcx_config config; + + config.u64 = cvmx_read_csr(CVMX_LMCX_CONFIG(0)); + if (!config.s.ecc_ena) { + dev_info(&pdev->dev, "Disabled (ECC not enabled)\n"); + return 0; + } + + mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0); + if (!mci) + return -ENXIO; + + mci->pdev = &pdev->dev; + mci->dev_name = dev_name(&pdev->dev); + + mci->mod_name = "octeon-lmc"; + mci->ctl_name = "co_lmc_err"; + mci->edac_check = octeon_lmc_edac_poll_o2; + + if (edac_mc_add_mc(mci)) { + dev_err(&pdev->dev, "edac_mc_add_mc() failed\n"); + edac_mc_free(mci); + return -ENXIO; + } + + en.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc)); + en.s.intr_ded_ena = 0; /* We poll */ + en.s.intr_sec_ena = 0; + cvmx_write_csr(CVMX_LMCX_MEM_CFG0(mc), en.u64); } - - cfg0.u64 = readq(lmc_base + LMC_MEM_CFG0); /* We poll */ - cfg0.intr_ded_ena = 0; - cfg0.intr_sec_ena = 0; - writeq(cfg0.u64, lmc_base + LMC_MEM_CFG0); - - mc_cavium = mci; + platform_set_drvdata(pdev, mci); return 0; - -err: - edac_mc_free(mci); - - return res; } -static int co_lmc_remove(struct platform_device *pdev) +static int octeon_lmc_edac_remove(struct platform_device *pdev) { struct mem_ctl_info *mci = platform_get_drvdata(pdev); - mc_cavium = NULL; edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); - return 0; } -static struct platform_driver co_lmc_driver = { - .probe = co_lmc_probe, - .remove = co_lmc_remove, +static struct platform_driver octeon_lmc_edac_driver = { + .probe = octeon_lmc_edac_probe, + .remove = octeon_lmc_edac_remove, .driver = { - .name = "co_lmc_edac", + .name = "octeon_lmc_edac", } }; - -static int __init co_edac_init(void) -{ - union lmc_mem_cfg0 cfg0; - int ret; - - lmc_base = ioremap_nocache(LMC_BASE, LMC_SIZE); - if (!lmc_base) - return -ENOMEM; - - cfg0.u64 = readq(lmc_base + LMC_MEM_CFG0); - if (!cfg0.ecc_ena) { - pr_info(EDAC_MOD_STR " LMC EDAC: ECC disabled, good bye\n"); - ret = -ENODEV; - goto out; - } - - ret = platform_driver_register(&co_lmc_driver); - if (ret) { - pr_warning(EDAC_MOD_STR " LMC EDAC failed to register\n"); - goto out; - } - - return ret; - -out: - iounmap(lmc_base); - - return ret; -} - -static void __exit co_edac_exit(void) -{ - platform_driver_unregister(&co_lmc_driver); - iounmap(lmc_base); -} - -module_init(co_edac_init); -module_exit(co_edac_exit); +module_platform_driver(octeon_lmc_edac_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ralf Baechle "); diff --git a/drivers/edac/octeon_edac-lmc.h b/drivers/edac/octeon_edac-lmc.h deleted file mode 100644 index 246dc525bc10..000000000000 --- a/drivers/edac/octeon_edac-lmc.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * LMC Registers, see chapter 2.5 - * - * These are RSL Type registers and are accessed indirectly across the - * I/O bus, so accesses are slowish. Not that it matters. Any size load is - * ok but stores must be 64-bit. - */ -#define LMC_BASE 0x0001180088000000 -#define LMC_SIZE 0xb8 - -#define LMC_MEM_CFG0 0x0000000000000000 -#define LMC_MEM_CFG1 0x0000000000000008 -#define LMC_CTL 0x0000000000000010 -#define LMC_DDR2_CTL 0x0000000000000018 -#define LMC_FADR 0x0000000000000020 -#define LMC_FADR_FDIMM -#define LMC_FADR_FBUNK -#define LMC_FADR_FBANK -#define LMC_FADR_FROW -#define LMC_FADR_FCOL -#define LMC_COMP_CTL 0x0000000000000028 -#define LMC_WODT_CTL 0x0000000000000030 -#define LMC_ECC_SYND 0x0000000000000038 -#define LMC_IFB_CNT_LO 0x0000000000000048 -#define LMC_IFB_CNT_HI 0x0000000000000050 -#define LMC_OPS_CNT_LO 0x0000000000000058 -#define LMC_OPS_CNT_HI 0x0000000000000060 -#define LMC_DCLK_CNT_LO 0x0000000000000068 -#define LMC_DCLK_CNT_HI 0x0000000000000070 -#define LMC_DELAY_CFG 0x0000000000000088 -#define LMC_CTL1 0x0000000000000090 -#define LMC_DUAL_MEM_CONFIG 0x0000000000000098 -#define LMC_RODT_COMP_CTL 0x00000000000000A0 -#define LMC_PLL_CTL 0x00000000000000A8 -#define LMC_PLL_STATUS 0x00000000000000B0 - -union lmc_mem_cfg0 { - uint64_t u64; - struct { - uint64_t reserved_32_63:32; - uint64_t reset:1; - uint64_t silo_qc:1; - uint64_t bunk_ena:1; - uint64_t ded_err:4; - uint64_t sec_err:4; - uint64_t intr_ded_ena:1; - uint64_t intr_sec_ena:1; - uint64_t reserved_15_18:4; - uint64_t ref_int:5; - uint64_t pbank_lsb:4; - uint64_t row_lsb:3; - uint64_t ecc_ena:1; - uint64_t init_start:1; - }; -}; - -union lmc_fadr { - uint64_t u64; - struct { - uint64_t reserved_32_63:32; - uint64_t fdimm:2; - uint64_t fbunk:1; - uint64_t fbank:3; - uint64_t frow:14; - uint64_t fcol:12; - }; -}; - -union lmc_ecc_synd { - uint64_t u64; - struct { - uint64_t reserved_32_63:32; - uint64_t mrdsyn3:8; - uint64_t mrdsyn2:8; - uint64_t mrdsyn1:8; - uint64_t mrdsyn0:8; - }; -}; diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c index 9d13061744e4..14a5e57f2b32 100644 --- a/drivers/edac/octeon_edac-pc.c +++ b/drivers/edac/octeon_edac-pc.c @@ -3,6 +3,8 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * + * Copyright (C) 2012 Cavium, Inc. + * * Copyright (C) 2009 Wind River Systems, * written by Ralf Baechle */ @@ -19,93 +21,112 @@ #include #include -#define EDAC_MOD_STR "octeon" - extern int register_co_cache_error_notifier(struct notifier_block *nb); extern int unregister_co_cache_error_notifier(struct notifier_block *nb); extern unsigned long long cache_err_dcache[NR_CPUS]; -static struct edac_device_ctl_info *ed_cavium; +struct co_cache_error { + struct notifier_block notifier; + struct edac_device_ctl_info *ed; +}; -/* +/** * EDAC CPU cache error callback * + * @event: non-zero if unrecoverable. */ - static int co_cache_error_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct co_cache_error *p = container_of(this, struct co_cache_error, + notifier); + unsigned int core = cvmx_get_core_num(); unsigned int cpu = smp_processor_id(); - uint64_t icache_err = read_octeon_c0_icacheerr(); - struct edac_device_ctl_info *ed = ed_cavium; + u64 icache_err = read_octeon_c0_icacheerr(); + u64 dcache_err; - edac_device_printk(ed, KERN_ERR, - "Cache error exception on core %d / processor %d:\n", - core, cpu); - edac_device_printk(ed, KERN_ERR, - "cp0_errorepc == %lx\n", read_c0_errorepc()); - if (icache_err & 1) { - edac_device_printk(ed, KERN_ERR, "CacheErr (Icache) == %llx\n", - (unsigned long long)icache_err); - write_octeon_c0_icacheerr(0); - edac_device_handle_ce(ed, 0, 0, ed->ctl_name); - } - if (cache_err_dcache[core] & 1) { - edac_device_printk(ed, KERN_ERR, "CacheErr (Dcache) == %llx\n", - (unsigned long long)cache_err_dcache[core]); + if (event) { + dcache_err = cache_err_dcache[core]; cache_err_dcache[core] = 0; - edac_device_handle_ue(ed, 0, 0, ed->ctl_name); + } else { + dcache_err = read_octeon_c0_dcacheerr(); } - return NOTIFY_DONE; -} + if (icache_err & 1) { + edac_device_printk(p->ed, KERN_ERR, + "CacheErr (Icache):%llx, core %d/cpu %d, cp0_errorepc == %lx\n", + (unsigned long long)icache_err, core, cpu, + read_c0_errorepc()); + write_octeon_c0_icacheerr(0); + edac_device_handle_ce(p->ed, cpu, 1, "icache"); + } + if (dcache_err & 1) { + edac_device_printk(p->ed, KERN_ERR, + "CacheErr (Dcache):%llx, core %d/cpu %d, cp0_errorepc == %lx\n", + (unsigned long long)dcache_err, core, cpu, + read_c0_errorepc()); + if (event) + edac_device_handle_ue(p->ed, cpu, 0, "dcache"); + else + edac_device_handle_ce(p->ed, cpu, 0, "dcache"); -static struct notifier_block co_cache_error_notifier = { - .notifier_call = co_cache_error_event, -}; + /* Clear the error indication */ + if (OCTEON_IS_MODEL(OCTEON_FAM_2)) + write_octeon_c0_dcacheerr(1); + else + write_octeon_c0_dcacheerr(0); + } + + return NOTIFY_STOP; +} static int __devinit co_cache_error_probe(struct platform_device *pdev) { - struct edac_device_ctl_info *ed; - int res = 0; + struct co_cache_error *p = devm_kzalloc(&pdev->dev, sizeof(*p), + GFP_KERNEL); + if (!p) + return -ENOMEM; - ed = edac_device_alloc_ctl_info(0, "cpu", 1, NULL, 0, 0, NULL, 0, - edac_device_alloc_index()); + p->notifier.notifier_call = co_cache_error_event; + platform_set_drvdata(pdev, p); - ed->dev = &pdev->dev; - platform_set_drvdata(pdev, ed); - ed->dev_name = dev_name(&pdev->dev); - - ed->mod_name = "octeon-cpu"; - ed->ctl_name = "co_cpu_err"; - - if (edac_device_add_device(ed) > 0) { - pr_err("%s: edac_device_add_device() failed\n", __func__); + p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(), + "cache", 2, 0, NULL, 0, + edac_device_alloc_index()); + if (!p->ed) goto err; + + p->ed->dev = &pdev->dev; + + p->ed->dev_name = dev_name(&pdev->dev); + + p->ed->mod_name = "octeon-cpu"; + p->ed->ctl_name = "cache"; + + if (edac_device_add_device(p->ed)) { + pr_err("%s: edac_device_add_device() failed\n", __func__); + goto err1; } - register_co_cache_error_notifier(&co_cache_error_notifier); - ed_cavium = ed; + register_co_cache_error_notifier(&p->notifier); return 0; +err1: + edac_device_free_ctl_info(p->ed); err: - edac_device_free_ctl_info(ed); - - return res; + return -ENXIO; } static int co_cache_error_remove(struct platform_device *pdev) { - struct edac_device_ctl_info *ed = platform_get_drvdata(pdev); + struct co_cache_error *p = platform_get_drvdata(pdev); - unregister_co_cache_error_notifier(&co_cache_error_notifier); - ed_cavium = NULL; + unregister_co_cache_error_notifier(&p->notifier); edac_device_del_device(&pdev->dev); - edac_device_free_ctl_info(ed); - + edac_device_free_ctl_info(p->ed); return 0; } @@ -113,28 +134,10 @@ static struct platform_driver co_cache_error_driver = { .probe = co_cache_error_probe, .remove = co_cache_error_remove, .driver = { - .name = "co_pc_edac", + .name = "octeon_pc_edac", } }; - -static int __init co_edac_init(void) -{ - int ret; - - ret = platform_driver_register(&co_cache_error_driver); - if (ret) - pr_warning(EDAC_MOD_STR "CPU err failed to register\n"); - - return ret; -} - -static void __exit co_edac_exit(void) -{ - platform_driver_unregister(&co_cache_error_driver); -} - -module_init(co_edac_init); -module_exit(co_edac_exit); +module_platform_driver(co_cache_error_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ralf Baechle "); diff --git a/drivers/edac/octeon_edac-pci.c b/drivers/edac/octeon_edac-pci.c index e72b96e3e4e0..758c1ef5fc9e 100644 --- a/drivers/edac/octeon_edac-pci.c +++ b/drivers/edac/octeon_edac-pci.c @@ -3,6 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * + * Copyright (C) 2012 Cavium, Inc. * Copyright (C) 2009 Wind River Systems, * written by Ralf Baechle */ @@ -20,9 +21,7 @@ #include "edac_core.h" #include "edac_module.h" -#define EDAC_MOD_STR "octeon" - -static void co_pci_poll(struct edac_pci_ctl_info *pci) +static void octeon_pci_poll(struct edac_pci_ctl_info *pci) { union cvmx_pci_cfg01 cfg01; @@ -57,14 +56,9 @@ static void co_pci_poll(struct edac_pci_ctl_info *pci) cfg01.s.mdpe = 1; /* Reset */ octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); } - if (cfg01.s.mdpe) { - edac_pci_handle_npe(pci, "Master Data Parity Error"); - cfg01.s.mdpe = 1; /* Reset */ - octeon_npi_write32(CVMX_NPI_PCI_CFG01, cfg01.u32); - } } -static int __devinit co_pci_probe(struct platform_device *pdev) +static int __devinit octeon_pci_probe(struct platform_device *pdev) { struct edac_pci_ctl_info *pci; int res = 0; @@ -79,7 +73,7 @@ static int __devinit co_pci_probe(struct platform_device *pdev) pci->mod_name = "octeon-pci"; pci->ctl_name = "octeon_pci_err"; - pci->edac_check = co_pci_poll; + pci->edac_check = octeon_pci_poll; if (edac_pci_add_device(pci, 0) > 0) { pr_err("%s: edac_pci_add_device() failed\n", __func__); @@ -94,7 +88,7 @@ err: return res; } -static int co_pci_remove(struct platform_device *pdev) +static int octeon_pci_remove(struct platform_device *pdev) { struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev); @@ -104,32 +98,14 @@ static int co_pci_remove(struct platform_device *pdev) return 0; } -static struct platform_driver co_pci_driver = { - .probe = co_pci_probe, - .remove = co_pci_remove, +static struct platform_driver octeon_pci_driver = { + .probe = octeon_pci_probe, + .remove = octeon_pci_remove, .driver = { - .name = "co_pci_edac", + .name = "octeon_pci_edac", } }; - -static int __init co_edac_init(void) -{ - int ret; - - ret = platform_driver_register(&co_pci_driver); - if (ret) - pr_warning(EDAC_MOD_STR " PCI EDAC failed to register\n"); - - return ret; -} - -static void __exit co_edac_exit(void) -{ - platform_driver_unregister(&co_pci_driver); -} - -module_init(co_edac_init); -module_exit(co_edac_exit); +module_platform_driver(octeon_pci_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ralf Baechle ");