- Drop a now obsolete ppc4xx_edac driver

- Fix conversion to physical memory addresses on Intel's Elkhart Lake and Ice
   Lake hardware when the system address is above the (Top-Of-Memory) TOM
   address
 
 - Pay attention to the memory hole on Zynq UltraScale+ MPSoC DDR controllers
   when injecting errors for testing purposes
 
 - Add support for translating normalized error addresses reported by an AMD
   memory controller into system physical addresses using an UEFI mechanism
   called platform runtime mechanism (PRM).
 
 - The usual cleanups and fixes
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmbeuNcACgkQEsHwGGHe
 VUoELw//fZaWbfYg7yYw8iTMojc01LCmS5m6nQeJc6PewcIfLp6FXr4V4Rq99NUn
 FBVIMunm0unRAqep9WTY+xphxlP9u9VovyaLR0cxRf1aEi3xRFit7PIG7P3RyTUn
 ipDKBnx0plTlwB9US5XllhGCM6xAvrNBoKPe1LV+bd7z9wOJvIy3GeV/65ajLsLV
 +7wNBJ8CMXIJ+319FK35ZUM1butp2XFLVtLqKL53nPsumowZcegfaD1u6sfsX4SO
 je8BpNMXKHl0ftZ3DPAMAGrr4M54lsXX/62k3PqcUr4LMbVGLzQmDGyoHUWwdruT
 OGb5tVWqBXoR6DA03/P25q1SGKwGsbuzK33E8T9vkwIqBrj73vA+tVBv03U3QFMO
 RSb4/BS09q/GtA70OFCnigumLoKMmuZu0tcLGQaUMP6sWVVVMp1vVctTapl22h57
 sonEUf0+GMsVu4ueS/vSfU3R3Dqadg/4LxZPG7njc06hCNDAu7u4/0gGdGuiQwqF
 ZyLUZO3SlJX/SkWfNyW4Lc4GNWRWgtFfh5sgODxATCE5NyUrazsQZg5Jsxr/5Jwv
 aBDsbHEUHO0zKRGfDBfHyaWK8318z+my8zvVhIGLuQCKEY8GSTK35rfthkp6vbEe
 UNrCgea+HaDZt6jN4ahaZjK/0DjiMSO12gA3GPt7tdO6v+U46/0=
 =+/Fq
 -----END PGP SIGNATURE-----

Merge tag 'edac_updates_for_v6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Borislav Petkov:

 - Drop a now obsolete ppc4xx_edac driver

 - Fix conversion to physical memory addresses on Intel's Elkhart Lake
   and Ice Lake hardware when the system address is above the
   (Top-Of-Memory) TOM address

 - Pay attention to the memory hole on Zynq UltraScale+ MPSoC DDR
   controllers when injecting errors for testing purposes

 - Add support for translating normalized error addresses reported by an
   AMD memory controller into system physical addresses using an UEFI
   mechanism called platform runtime mechanism (PRM).

 - The usual cleanups and fixes

* tag 'edac_updates_for_v6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  EDAC: Drop obsolete PPC4xx driver
  EDAC/sb_edac: Fix the compile warning of large frame size
  EDAC/{skx_common,i10nm}: Remove the AMAP register for determing DDR5
  EDAC/{skx_common,skx,i10nm}: Move the common debug code to skx_common
  EDAC/igen6: Fix conversion of system address to physical memory address
  EDAC/synopsys: Fix error injection on Zynq UltraScale+
  RAS/AMD/ATL: Translate normalized to system physical addresses using PRM
  ACPI: PRM: Add PRM handler direct call support
This commit is contained in:
Linus Torvalds 2024-09-16 06:36:37 +02:00
commit 7dfc15c473
18 changed files with 222 additions and 1729 deletions

View File

@ -214,6 +214,30 @@ static struct prm_handler_info *find_prm_handler(const guid_t *guid)
#define UPDATE_LOCK_ALREADY_HELD 4
#define UPDATE_UNLOCK_WITHOUT_LOCK 5
int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer)
{
struct prm_handler_info *handler = find_prm_handler(&handler_guid);
struct prm_module_info *module = find_prm_module(&handler_guid);
struct prm_context_buffer context;
efi_status_t status;
if (!module || !handler)
return -ENODEV;
memset(&context, 0, sizeof(context));
ACPI_COPY_NAMESEG(context.signature, "PRMC");
context.identifier = handler->guid;
context.static_data_buffer = handler->static_data_buffer_addr;
context.mmio_ranges = module->mmio_info;
status = efi_call_acpi_prm_handler(handler->handler_addr,
(u64)param_buffer,
&context);
return efi_status_to_err(status);
}
EXPORT_SYMBOL_GPL(acpi_call_prm_handler);
/*
* This is the PlatformRtMechanism opregion space handler.
* @function: indicates the read/write. In fact as the PlatformRtMechanism

View File

@ -311,15 +311,6 @@ config EDAC_CELL
Cell Broadband Engine internal memory controller
on platform without a hypervisor
config EDAC_PPC4XX
tristate "PPC4xx IBM DDR2 Memory Controller"
depends on 4xx
help
This enables support for EDAC on the ECC memory used
with the IBM DDR2 memory controller found in various
PowerPC 4xx embedded processors such as the 405EX[r],
440SP, 440SPe, 460EX, 460GT and 460SX.
config EDAC_AMD8131
tristate "AMD8131 HyperTransport PCI-X Tunnel"
depends on PCI && PPC_MAPLE

View File

@ -63,7 +63,6 @@ i10nm_edac-y := i10nm_base.o
obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o skx_edac_common.o
obj-$(CONFIG_EDAC_CELL) += cell_edac.o
obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o

View File

@ -47,10 +47,6 @@
readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \
(res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \
(i) * (m)->chan_mmio_sz)
#define I10NM_GET_AMAP(m, i) \
readl((m)->mbase + ((m)->hbm_mc ? 0x814 : \
(res_cfg->type == GNR ? 0xc14 : 0x20814)) + \
(i) * (m)->chan_mmio_sz)
#define I10NM_GET_REG32(m, i, offset) \
readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
#define I10NM_GET_REG64(m, i, offset) \
@ -971,7 +967,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
{
struct skx_pvt *pvt = mci->pvt_info;
struct skx_imc *imc = pvt->imc;
u32 mtr, amap, mcddrtcfg = 0;
u32 mtr, mcddrtcfg = 0;
struct dimm_info *dimm;
int i, j, ndimms;
@ -980,7 +976,6 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
continue;
ndimms = 0;
amap = I10NM_GET_AMAP(imc, i);
if (res_cfg->type != GNR)
mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i);
@ -992,7 +987,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
mtr, mcddrtcfg, imc->mc, i, j);
if (IS_DIMM_PRESENT(mtr))
ndimms += skx_get_dimm_info(mtr, 0, amap, dimm,
ndimms += skx_get_dimm_info(mtr, 0, 0, dimm,
imc, i, j, cfg);
else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
@ -1013,54 +1008,6 @@ static struct notifier_block i10nm_mce_dec = {
.priority = MCE_PRIO_EDAC,
};
#ifdef CONFIG_EDAC_DEBUG
/*
* Debug feature.
* Exercise the address decode logic by writing an address to
* /sys/kernel/debug/edac/i10nm_test/addr.
*/
static struct dentry *i10nm_test;
static int debugfs_u64_set(void *data, u64 val)
{
struct mce m;
pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
memset(&m, 0, sizeof(m));
/* ADDRV + MemRd + Unknown channel */
m.status = MCI_STATUS_ADDRV + 0x90;
/* One corrected error */
m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
m.addr = val;
skx_mce_check_error(NULL, 0, &m);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
static void setup_i10nm_debug(void)
{
i10nm_test = edac_debugfs_create_dir("i10nm_test");
if (!i10nm_test)
return;
if (!edac_debugfs_create_file("addr", 0200, i10nm_test,
NULL, &fops_u64_wo)) {
debugfs_remove(i10nm_test);
i10nm_test = NULL;
}
}
static void teardown_i10nm_debug(void)
{
debugfs_remove_recursive(i10nm_test);
}
#else
static inline void setup_i10nm_debug(void) {}
static inline void teardown_i10nm_debug(void) {}
#endif /*CONFIG_EDAC_DEBUG*/
static int __init i10nm_init(void)
{
u8 mc = 0, src_id = 0, node_id = 0;
@ -1159,7 +1106,7 @@ static int __init i10nm_init(void)
opstate_init();
mce_register_decode_chain(&i10nm_mce_dec);
setup_i10nm_debug();
skx_setup_debug("i10nm_test");
if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log);
@ -1187,7 +1134,7 @@ static void __exit i10nm_exit(void)
enable_retry_rd_err_log(false);
}
teardown_i10nm_debug();
skx_teardown_debug();
mce_unregister_decode_chain(&i10nm_mce_dec);
skx_adxl_put();
skx_remove();

View File

@ -316,7 +316,7 @@ static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
if (igen6_tom <= _4GB)
return eaddr + igen6_tolud - _4GB;
if (eaddr < _4GB)
if (eaddr >= igen6_tom)
return eaddr + igen6_tolud - igen6_tom;
return eaddr;

File diff suppressed because it is too large Load Diff

View File

@ -1,167 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2008 Nuovation System Designs, LLC
* Grant Erickson <gerickson@nuovations.com>
*
* This file defines processor mnemonics for accessing and managing
* the IBM DDR1/DDR2 ECC controller found in the 405EX[r], 440SP,
* 440SPe, 460EX, 460GT and 460SX.
*/
#ifndef __PPC4XX_EDAC_H
#define __PPC4XX_EDAC_H
#include <linux/types.h>
/*
* Macro for generating register field mnemonics
*/
#define PPC_REG_BITS 32
#define PPC_REG_VAL(bit, val) ((val) << ((PPC_REG_BITS - 1) - (bit)))
#define PPC_REG_DECODE(bit, val) ((val) >> ((PPC_REG_BITS - 1) - (bit)))
/*
* IBM 4xx DDR1/DDR2 SDRAM memory controller registers (at least those
* relevant to ECC)
*/
#define SDRAM_BESR 0x00 /* Error status (read/clear) */
#define SDRAM_BESRT 0x01 /* Error statuss (test/set) */
#define SDRAM_BEARL 0x02 /* Error address low */
#define SDRAM_BEARH 0x03 /* Error address high */
#define SDRAM_WMIRQ 0x06 /* Write master (read/clear) */
#define SDRAM_WMIRQT 0x07 /* Write master (test/set) */
#define SDRAM_MCOPT1 0x20 /* Controller options 1 */
#define SDRAM_MBXCF_BASE 0x40 /* Bank n configuration base */
#define SDRAM_MBXCF(n) (SDRAM_MBXCF_BASE + (4 * (n)))
#define SDRAM_MB0CF SDRAM_MBXCF(0)
#define SDRAM_MB1CF SDRAM_MBXCF(1)
#define SDRAM_MB2CF SDRAM_MBXCF(2)
#define SDRAM_MB3CF SDRAM_MBXCF(3)
#define SDRAM_ECCCR 0x98 /* ECC error status */
#define SDRAM_ECCES SDRAM_ECCCR
/*
* PLB Master IDs
*/
#define SDRAM_PLB_M0ID_FIRST 0
#define SDRAM_PLB_M0ID_ICU SDRAM_PLB_M0ID_FIRST
#define SDRAM_PLB_M0ID_PCIE0 1
#define SDRAM_PLB_M0ID_PCIE1 2
#define SDRAM_PLB_M0ID_DMA 3
#define SDRAM_PLB_M0ID_DCU 4
#define SDRAM_PLB_M0ID_OPB 5
#define SDRAM_PLB_M0ID_MAL 6
#define SDRAM_PLB_M0ID_SEC 7
#define SDRAM_PLB_M0ID_AHB 8
#define SDRAM_PLB_M0ID_LAST SDRAM_PLB_M0ID_AHB
#define SDRAM_PLB_M0ID_COUNT (SDRAM_PLB_M0ID_LAST - \
SDRAM_PLB_M0ID_FIRST + 1)
/*
* Memory Controller Bus Error Status Register
*/
#define SDRAM_BESR_MASK PPC_REG_VAL(7, 0xFF)
#define SDRAM_BESR_M0ID_MASK PPC_REG_VAL(3, 0xF)
#define SDRAM_BESR_M0ID_DECODE(n) PPC_REG_DECODE(3, n)
#define SDRAM_BESR_M0ID_ICU PPC_REG_VAL(3, SDRAM_PLB_M0ID_ICU)
#define SDRAM_BESR_M0ID_PCIE0 PPC_REG_VAL(3, SDRAM_PLB_M0ID_PCIE0)
#define SDRAM_BESR_M0ID_PCIE1 PPC_REG_VAL(3, SDRAM_PLB_M0ID_PCIE1)
#define SDRAM_BESR_M0ID_DMA PPC_REG_VAL(3, SDRAM_PLB_M0ID_DMA)
#define SDRAM_BESR_M0ID_DCU PPC_REG_VAL(3, SDRAM_PLB_M0ID_DCU)
#define SDRAM_BESR_M0ID_OPB PPC_REG_VAL(3, SDRAM_PLB_M0ID_OPB)
#define SDRAM_BESR_M0ID_MAL PPC_REG_VAL(3, SDRAM_PLB_M0ID_MAL)
#define SDRAM_BESR_M0ID_SEC PPC_REG_VAL(3, SDRAM_PLB_M0ID_SEC)
#define SDRAM_BESR_M0ID_AHB PPC_REG_VAL(3, SDRAM_PLB_M0ID_AHB)
#define SDRAM_BESR_M0ET_MASK PPC_REG_VAL(6, 0x7)
#define SDRAM_BESR_M0ET_NONE PPC_REG_VAL(6, 0)
#define SDRAM_BESR_M0ET_ECC PPC_REG_VAL(6, 1)
#define SDRAM_BESR_M0RW_MASK PPC_REG_VAL(7, 1)
#define SDRAM_BESR_M0RW_WRITE PPC_REG_VAL(7, 0)
#define SDRAM_BESR_M0RW_READ PPC_REG_VAL(7, 1)
/*
* Memory Controller PLB Write Master Interrupt Register
*/
#define SDRAM_WMIRQ_MASK PPC_REG_VAL(8, 0x1FF)
#define SDRAM_WMIRQ_ENCODE(id) PPC_REG_VAL((id % \
SDRAM_PLB_M0ID_COUNT), 1)
#define SDRAM_WMIRQ_ICU PPC_REG_VAL(SDRAM_PLB_M0ID_ICU, 1)
#define SDRAM_WMIRQ_PCIE0 PPC_REG_VAL(SDRAM_PLB_M0ID_PCIE0, 1)
#define SDRAM_WMIRQ_PCIE1 PPC_REG_VAL(SDRAM_PLB_M0ID_PCIE1, 1)
#define SDRAM_WMIRQ_DMA PPC_REG_VAL(SDRAM_PLB_M0ID_DMA, 1)
#define SDRAM_WMIRQ_DCU PPC_REG_VAL(SDRAM_PLB_M0ID_DCU, 1)
#define SDRAM_WMIRQ_OPB PPC_REG_VAL(SDRAM_PLB_M0ID_OPB, 1)
#define SDRAM_WMIRQ_MAL PPC_REG_VAL(SDRAM_PLB_M0ID_MAL, 1)
#define SDRAM_WMIRQ_SEC PPC_REG_VAL(SDRAM_PLB_M0ID_SEC, 1)
#define SDRAM_WMIRQ_AHB PPC_REG_VAL(SDRAM_PLB_M0ID_AHB, 1)
/*
* Memory Controller Options 1 Register
*/
#define SDRAM_MCOPT1_MCHK_MASK PPC_REG_VAL(3, 0x3) /* ECC mask */
#define SDRAM_MCOPT1_MCHK_NON PPC_REG_VAL(3, 0x0) /* No ECC gen */
#define SDRAM_MCOPT1_MCHK_GEN PPC_REG_VAL(3, 0x2) /* ECC gen */
#define SDRAM_MCOPT1_MCHK_CHK PPC_REG_VAL(3, 0x1) /* ECC gen and chk */
#define SDRAM_MCOPT1_MCHK_CHK_REP PPC_REG_VAL(3, 0x3) /* ECC gen/chk/rpt */
#define SDRAM_MCOPT1_MCHK_DECODE(n) ((((u32)(n)) >> 28) & 0x3)
#define SDRAM_MCOPT1_RDEN_MASK PPC_REG_VAL(4, 0x1) /* Rgstrd DIMM mask */
#define SDRAM_MCOPT1_RDEN PPC_REG_VAL(4, 0x1) /* Rgstrd DIMM enbl */
#define SDRAM_MCOPT1_WDTH_MASK PPC_REG_VAL(7, 0x1) /* Width mask */
#define SDRAM_MCOPT1_WDTH_32 PPC_REG_VAL(7, 0x0) /* 32 bits */
#define SDRAM_MCOPT1_WDTH_16 PPC_REG_VAL(7, 0x1) /* 16 bits */
#define SDRAM_MCOPT1_DDR_TYPE_MASK PPC_REG_VAL(11, 0x1) /* DDR type mask */
#define SDRAM_MCOPT1_DDR1_TYPE PPC_REG_VAL(11, 0x0) /* DDR1 type */
#define SDRAM_MCOPT1_DDR2_TYPE PPC_REG_VAL(11, 0x1) /* DDR2 type */
/*
* Memory Bank 0 - n Configuration Register
*/
#define SDRAM_MBCF_BA_MASK PPC_REG_VAL(12, 0x1FFF)
#define SDRAM_MBCF_SZ_MASK PPC_REG_VAL(19, 0xF)
#define SDRAM_MBCF_SZ_DECODE(mbxcf) PPC_REG_DECODE(19, mbxcf)
#define SDRAM_MBCF_SZ_4MB PPC_REG_VAL(19, 0x0)
#define SDRAM_MBCF_SZ_8MB PPC_REG_VAL(19, 0x1)
#define SDRAM_MBCF_SZ_16MB PPC_REG_VAL(19, 0x2)
#define SDRAM_MBCF_SZ_32MB PPC_REG_VAL(19, 0x3)
#define SDRAM_MBCF_SZ_64MB PPC_REG_VAL(19, 0x4)
#define SDRAM_MBCF_SZ_128MB PPC_REG_VAL(19, 0x5)
#define SDRAM_MBCF_SZ_256MB PPC_REG_VAL(19, 0x6)
#define SDRAM_MBCF_SZ_512MB PPC_REG_VAL(19, 0x7)
#define SDRAM_MBCF_SZ_1GB PPC_REG_VAL(19, 0x8)
#define SDRAM_MBCF_SZ_2GB PPC_REG_VAL(19, 0x9)
#define SDRAM_MBCF_SZ_4GB PPC_REG_VAL(19, 0xA)
#define SDRAM_MBCF_SZ_8GB PPC_REG_VAL(19, 0xB)
#define SDRAM_MBCF_AM_MASK PPC_REG_VAL(23, 0xF)
#define SDRAM_MBCF_AM_MODE0 PPC_REG_VAL(23, 0x0)
#define SDRAM_MBCF_AM_MODE1 PPC_REG_VAL(23, 0x1)
#define SDRAM_MBCF_AM_MODE2 PPC_REG_VAL(23, 0x2)
#define SDRAM_MBCF_AM_MODE3 PPC_REG_VAL(23, 0x3)
#define SDRAM_MBCF_AM_MODE4 PPC_REG_VAL(23, 0x4)
#define SDRAM_MBCF_AM_MODE5 PPC_REG_VAL(23, 0x5)
#define SDRAM_MBCF_AM_MODE6 PPC_REG_VAL(23, 0x6)
#define SDRAM_MBCF_AM_MODE7 PPC_REG_VAL(23, 0x7)
#define SDRAM_MBCF_AM_MODE8 PPC_REG_VAL(23, 0x8)
#define SDRAM_MBCF_AM_MODE9 PPC_REG_VAL(23, 0x9)
#define SDRAM_MBCF_BE_MASK PPC_REG_VAL(31, 0x1)
#define SDRAM_MBCF_BE_DISABLE PPC_REG_VAL(31, 0x0)
#define SDRAM_MBCF_BE_ENABLE PPC_REG_VAL(31, 0x1)
/*
* ECC Error Status
*/
#define SDRAM_ECCES_MASK PPC_REG_VAL(21, 0x3FFFFF)
#define SDRAM_ECCES_BNCE_MASK PPC_REG_VAL(15, 0xFFFF)
#define SDRAM_ECCES_BNCE_ENCODE(lane) PPC_REG_VAL(((lane) & 0xF), 1)
#define SDRAM_ECCES_CKBER_MASK PPC_REG_VAL(17, 0x3)
#define SDRAM_ECCES_CKBER_NONE PPC_REG_VAL(17, 0)
#define SDRAM_ECCES_CKBER_16_ECC_0_3 PPC_REG_VAL(17, 2)
#define SDRAM_ECCES_CKBER_32_ECC_0_3 PPC_REG_VAL(17, 1)
#define SDRAM_ECCES_CKBER_32_ECC_4_8 PPC_REG_VAL(17, 2)
#define SDRAM_ECCES_CKBER_32_ECC_0_8 PPC_REG_VAL(17, 3)
#define SDRAM_ECCES_CE PPC_REG_VAL(18, 1)
#define SDRAM_ECCES_UE PPC_REG_VAL(19, 1)
#define SDRAM_ECCES_BKNER_MASK PPC_REG_VAL(21, 0x3)
#define SDRAM_ECCES_BK0ER PPC_REG_VAL(20, 1)
#define SDRAM_ECCES_BK1ER PPC_REG_VAL(21, 1)
#endif /* __PPC4XX_EDAC_H */

View File

@ -29,6 +29,8 @@
/* Static vars */
static LIST_HEAD(sbridge_edac_list);
static char sb_msg[256];
static char sb_msg_full[512];
/*
* Alter this version for the module when modifications are made
@ -3079,7 +3081,6 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
struct mem_ctl_info *new_mci;
struct sbridge_pvt *pvt = mci->pvt_info;
enum hw_event_mc_err_type tp_event;
char *optype, msg[256], msg_full[512];
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@ -3095,10 +3096,10 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
* aligned address reported by patrol scrubber.
*/
u32 lsb = GET_BITFIELD(m->misc, 0, 5);
char *optype, *area_type = "DRAM";
long channel_mask, first_channel;
u8 rank = 0xff, socket, ha;
int rc, dimm;
char *area_type = "DRAM";
if (pvt->info.type != SANDY_BRIDGE)
recoverable = true;
@ -3168,32 +3169,32 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
channel = knl_channel_remap(m->bank == 16, channel);
channel_mask = 1 << channel;
snprintf(msg, sizeof(msg),
"%s%s err_code:%04x:%04x channel:%d (DIMM_%c)",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable)
? " recoverable" : " ",
mscod, errcode, channel, A + channel);
snprintf(sb_msg, sizeof(sb_msg),
"%s%s err_code:%04x:%04x channel:%d (DIMM_%c)",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable)
? " recoverable" : " ",
mscod, errcode, channel, A + channel);
edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
channel, 0, -1,
optype, msg);
optype, sb_msg);
}
return;
} else if (lsb < 12) {
rc = get_memory_error_data(mci, m->addr, &socket, &ha,
&channel_mask, &rank,
&area_type, msg);
&area_type, sb_msg);
} else {
rc = get_memory_error_data_from_mce(mci, m, &socket, &ha,
&channel_mask, msg);
&channel_mask, sb_msg);
}
if (rc < 0)
goto err_parsing;
new_mci = get_mci_for_node_id(socket, ha);
if (!new_mci) {
strcpy(msg, "Error: socket got corrupted!");
strscpy(sb_msg, "Error: socket got corrupted!");
goto err_parsing;
}
mci = new_mci;
@ -3218,7 +3219,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
*/
if (!pvt->is_lockstep && !pvt->is_cur_addr_mirrored && !pvt->is_close_pg)
channel = first_channel;
snprintf(msg_full, sizeof(msg_full),
snprintf(sb_msg_full, sizeof(sb_msg_full),
"%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d %s",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
@ -3226,9 +3227,9 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
mscod, errcode,
socket, ha,
channel_mask,
rank, msg);
rank, sb_msg);
edac_dbg(0, "%s\n", msg_full);
edac_dbg(0, "%s\n", sb_msg_full);
/* FIXME: need support for channel mask */
@ -3239,12 +3240,12 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
channel, dimm, -1,
optype, msg_full);
optype, sb_msg_full);
return;
err_parsing:
edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0,
-1, -1, -1,
msg, "");
sb_msg, "");
}

View File

@ -587,54 +587,6 @@ static struct notifier_block skx_mce_dec = {
.priority = MCE_PRIO_EDAC,
};
#ifdef CONFIG_EDAC_DEBUG
/*
* Debug feature.
* Exercise the address decode logic by writing an address to
* /sys/kernel/debug/edac/skx_test/addr.
*/
static struct dentry *skx_test;
static int debugfs_u64_set(void *data, u64 val)
{
struct mce m;
pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
memset(&m, 0, sizeof(m));
/* ADDRV + MemRd + Unknown channel */
m.status = MCI_STATUS_ADDRV + 0x90;
/* One corrected error */
m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
m.addr = val;
skx_mce_check_error(NULL, 0, &m);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
static void setup_skx_debug(void)
{
skx_test = edac_debugfs_create_dir("skx_test");
if (!skx_test)
return;
if (!edac_debugfs_create_file("addr", 0200, skx_test,
NULL, &fops_u64_wo)) {
debugfs_remove(skx_test);
skx_test = NULL;
}
}
static void teardown_skx_debug(void)
{
debugfs_remove_recursive(skx_test);
}
#else
static inline void setup_skx_debug(void) {}
static inline void teardown_skx_debug(void) {}
#endif /*CONFIG_EDAC_DEBUG*/
/*
* skx_init:
* make sure we are running on the correct cpu model
@ -728,7 +680,7 @@ static int __init skx_init(void)
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
setup_skx_debug();
skx_setup_debug("skx_test");
mce_register_decode_chain(&skx_mce_dec);
@ -742,7 +694,7 @@ static void __exit skx_exit(void)
{
edac_dbg(2, "\n");
mce_unregister_decode_chain(&skx_mce_dec);
teardown_skx_debug();
skx_teardown_debug();
if (nvdimm_count)
skx_adxl_put();
skx_remove();

View File

@ -363,7 +363,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
if (imc->hbm_mc) {
banks = 32;
mtype = MEM_HBM2;
} else if (cfg->support_ddr5 && (amap & 0x8)) {
} else if (cfg->support_ddr5) {
banks = 32;
mtype = MEM_DDR5;
} else {
@ -739,6 +739,53 @@ void skx_remove(void)
}
EXPORT_SYMBOL_GPL(skx_remove);
#ifdef CONFIG_EDAC_DEBUG
/*
* Debug feature.
* Exercise the address decode logic by writing an address to
* /sys/kernel/debug/edac/{skx,i10nm}_test/addr.
*/
static struct dentry *skx_test;
static int debugfs_u64_set(void *data, u64 val)
{
struct mce m;
pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
memset(&m, 0, sizeof(m));
/* ADDRV + MemRd + Unknown channel */
m.status = MCI_STATUS_ADDRV + 0x90;
/* One corrected error */
m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
m.addr = val;
skx_mce_check_error(NULL, 0, &m);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
void skx_setup_debug(const char *name)
{
skx_test = edac_debugfs_create_dir(name);
if (!skx_test)
return;
if (!edac_debugfs_create_file("addr", 0200, skx_test,
NULL, &fops_u64_wo)) {
debugfs_remove(skx_test);
skx_test = NULL;
}
}
EXPORT_SYMBOL_GPL(skx_setup_debug);
void skx_teardown_debug(void)
{
debugfs_remove_recursive(skx_test);
}
EXPORT_SYMBOL_GPL(skx_teardown_debug);
#endif /*CONFIG_EDAC_DEBUG*/
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Tony Luck");
MODULE_DESCRIPTION("MC Driver for Intel server processors");

View File

@ -259,4 +259,12 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
void skx_remove(void);
#ifdef CONFIG_EDAC_DEBUG
void skx_setup_debug(const char *name);
void skx_teardown_debug(void);
#else
static inline void skx_setup_debug(const char *name) {}
static inline void skx_teardown_debug(void) {}
#endif
#endif /* _SKX_COMM_EDAC_H */

View File

@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/spinlock.h>
#include <linux/sizes.h>
#include <linux/interrupt.h>
#include <linux/of.h>
@ -337,6 +338,7 @@ struct synps_edac_priv {
* @get_mtype: Get mtype.
* @get_dtype: Get dtype.
* @get_ecc_state: Get ECC state.
* @get_mem_info: Get EDAC memory info
* @quirks: To differentiate IPs.
*/
struct synps_platform_data {
@ -344,6 +346,9 @@ struct synps_platform_data {
enum mem_type (*get_mtype)(const void __iomem *base);
enum dev_type (*get_dtype)(const void __iomem *base);
bool (*get_ecc_state)(void __iomem *base);
#ifdef CONFIG_EDAC_DEBUG
u64 (*get_mem_info)(struct synps_edac_priv *priv);
#endif
int quirks;
};
@ -402,6 +407,25 @@ out:
return 0;
}
#ifdef CONFIG_EDAC_DEBUG
/**
* zynqmp_get_mem_info - Get the current memory info.
* @priv: DDR memory controller private instance data.
*
* Return: host interface address.
*/
static u64 zynqmp_get_mem_info(struct synps_edac_priv *priv)
{
u64 hif_addr = 0, linear_addr;
linear_addr = priv->poison_addr;
if (linear_addr >= SZ_32G)
linear_addr = linear_addr - SZ_32G + SZ_2G;
hif_addr = linear_addr >> 3;
return hif_addr;
}
#endif
/**
* zynqmp_get_error_info - Get the current ECC error info.
* @priv: DDR memory controller private instance data.
@ -922,6 +946,9 @@ static const struct synps_platform_data zynqmp_edac_def = {
.get_mtype = zynqmp_get_mtype,
.get_dtype = zynqmp_get_dtype,
.get_ecc_state = zynqmp_get_ecc_state,
#ifdef CONFIG_EDAC_DEBUG
.get_mem_info = zynqmp_get_mem_info,
#endif
.quirks = (DDR_ECC_INTR_SUPPORT
#ifdef CONFIG_EDAC_DEBUG
| DDR_ECC_DATA_POISON_SUPPORT
@ -975,10 +1002,16 @@ MODULE_DEVICE_TABLE(of, synps_edac_match);
static void ddr_poison_setup(struct synps_edac_priv *priv)
{
int col = 0, row = 0, bank = 0, bankgrp = 0, rank = 0, regval;
const struct synps_platform_data *p_data;
int index;
ulong hif_addr = 0;
hif_addr = priv->poison_addr >> 3;
p_data = priv->p_data;
if (p_data->get_mem_info)
hif_addr = p_data->get_mem_info(priv);
else
hif_addr = priv->poison_addr >> 3;
for (index = 0; index < DDR_MAX_ROW_SHIFT; index++) {
if (priv->row_shift[index])

View File

@ -19,3 +19,7 @@ config AMD_ATL
Enable this option if using DRAM ECC on Zen-based systems
and OS-based error handling.
config AMD_ATL_PRM
depends on AMD_ATL && ACPI_PRMT
def_bool y

View File

@ -15,4 +15,6 @@ amd_atl-y += map.o
amd_atl-y += system.o
amd_atl-y += umc.o
amd_atl-$(CONFIG_AMD_ATL_PRM) += prm.o
obj-$(CONFIG_AMD_ATL) += amd_atl.o

View File

@ -282,6 +282,16 @@ unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
#ifdef CONFIG_AMD_ATL_PRM
unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id, unsigned long addr);
#else
static inline unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id,
unsigned long addr)
{
return -ENODEV;
}
#endif
/*
* Make a gap in @data that is @num_bits long starting at @bit_num.
* e.g. data = 11111111'b

57
drivers/ras/amd/atl/prm.c Normal file
View File

@ -0,0 +1,57 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* AMD Address Translation Library
*
* prm.c : Plumbing code for ACPI Platform Runtime Mechanism (PRM)
*
* Information on AMD PRM modules and handlers including the GUIDs and buffer
* structures used here are defined in the AMD ACPI Porting Guide in the
* chapter "Platform Runtime Mechanism Table (PRMT)"
*
* Copyright (c) 2024, Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Author: John Allen <john.allen@amd.com>
*/
#include "internal.h"
#include <linux/prmt.h>
/*
* PRM parameter buffer - normalized to system physical address, as described
* in the "PRM Parameter Buffer" section of the AMD ACPI Porting Guide.
*/
struct norm_to_sys_param_buf {
u64 norm_addr;
u8 socket;
u64 bank_id;
void *out_buf;
} __packed;
static const guid_t norm_to_sys_guid = GUID_INIT(0xE7180659, 0xA65D, 0x451D,
0x92, 0xCD, 0x2B, 0x56, 0xF1,
0x2B, 0xEB, 0xA6);
unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 bank_id, unsigned long addr)
{
struct norm_to_sys_param_buf p_buf;
unsigned long ret_addr;
int ret;
p_buf.norm_addr = addr;
p_buf.socket = socket_id;
p_buf.bank_id = bank_id;
p_buf.out_buf = &ret_addr;
ret = acpi_call_prm_handler(norm_to_sys_guid, &p_buf);
if (!ret)
return ret_addr;
if (ret == -ENODEV)
pr_debug("PRM module/handler not available\n");
else
pr_notice_once("PRM address translation failed\n");
return ret;
}

View File

@ -401,9 +401,14 @@ unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err)
u8 coh_st_inst_id = get_coh_st_inst_id(err);
unsigned long addr = get_addr(err->addr);
u8 die_id = get_die_id(err);
unsigned long ret_addr;
pr_debug("socket_id=0x%x die_id=0x%x coh_st_inst_id=0x%x addr=0x%016lx",
socket_id, die_id, coh_st_inst_id, addr);
ret_addr = prm_umc_norm_to_sys_addr(socket_id, err->ipid, addr);
if (!IS_ERR_VALUE(ret_addr))
return ret_addr;
return norm_to_sys_addr(socket_id, die_id, coh_st_inst_id, addr);
}

View File

@ -2,6 +2,11 @@
#ifdef CONFIG_ACPI_PRMT
void init_prmt(void);
int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer);
#else
static inline void init_prmt(void) { }
static inline int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer)
{
return -EOPNOTSUPP;
}
#endif