Merge branch 'apei-release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6
* 'apei-release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6: ACPI, APEI, EINJ Param support is disabled by default APEI GHES: 32-bit buildfix ACPI: APEI build fix ACPI, APEI, GHES: Add hardware memory error recovery support HWPoison: add memory_failure_queue() ACPI, APEI, GHES, Error records content based throttle ACPI, APEI, GHES, printk support for recoverable error via NMI lib, Make gen_pool memory allocator lockless lib, Add lock-less NULL terminated single list Add Kconfig option ARCH_HAVE_NMI_SAFE_CMPXCHG ACPI, APEI, Add WHEA _OSC support ACPI, APEI, Add APEI bit support in generic _OSC call ACPI, APEI, GHES, Support disable GHES at boot time ACPI, APEI, GHES, Prevent GHES to be built as module ACPI, APEI, Use apei_exec_run_optional in APEI EINJ and ERST ACPI, APEI, Add apei_exec_run_optional ACPI, APEI, GHES, Do not ratelimit fatal error printk before panic ACPI, APEI, ERST, Fix erst-dbg long record reading issue ACPI, APEI, ERST, Prevent erst_dbg from loading if ERST is disabled
This commit is contained in:
commit
c0c770e610
@ -48,12 +48,19 @@ directory apei/einj. The following files are provided.
|
||||
- param1
|
||||
This file is used to set the first error parameter value. Effect of
|
||||
parameter depends on error_type specified. For memory error, this is
|
||||
physical memory address.
|
||||
physical memory address. Only available if param_extension module
|
||||
parameter is specified.
|
||||
|
||||
- param2
|
||||
This file is used to set the second error parameter value. Effect of
|
||||
parameter depends on error_type specified. For memory error, this is
|
||||
physical memory address mask.
|
||||
physical memory address mask. Only available if param_extension
|
||||
module parameter is specified.
|
||||
|
||||
Injecting parameter support is a BIOS version specific extension, that
|
||||
is, it only works on some BIOS version. If you want to use it, please
|
||||
make sure your BIOS version has the proper support and specify
|
||||
"param_extension=y" in module parameter.
|
||||
|
||||
For more information about EINJ, please refer to ACPI specification
|
||||
version 4.0, section 17.5.
|
||||
|
@ -178,4 +178,7 @@ config HAVE_ARCH_MUTEX_CPU_RELAX
|
||||
config HAVE_RCU_TABLE_FREE
|
||||
bool
|
||||
|
||||
config ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
bool
|
||||
|
||||
source "kernel/gcov/Kconfig"
|
||||
|
@ -14,6 +14,7 @@ config ALPHA
|
||||
select AUTO_IRQ_AFFINITY if SMP
|
||||
select GENERIC_IRQ_SHOW
|
||||
select ARCH_WANT_OPTIONAL_GPIOLIB
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
help
|
||||
The Alpha is a 64-bit general-purpose processor designed and
|
||||
marketed by the Digital Equipment Corporation of blessed memory,
|
||||
|
@ -10,6 +10,7 @@ config AVR32
|
||||
select GENERIC_IRQ_PROBE
|
||||
select HARDIRQS_SW_RESEND
|
||||
select GENERIC_IRQ_SHOW
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
help
|
||||
AVR32 is a high-performance 32-bit RISC microprocessor core,
|
||||
designed for cost-sensitive embedded applications, with particular
|
||||
|
@ -7,6 +7,7 @@ config FRV
|
||||
select HAVE_PERF_EVENTS
|
||||
select HAVE_GENERIC_HARDIRQS
|
||||
select GENERIC_IRQ_SHOW
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
|
||||
config ZONE_DMA
|
||||
bool
|
||||
|
@ -28,6 +28,7 @@ config IA64
|
||||
select IRQ_PER_CPU
|
||||
select GENERIC_IRQ_SHOW
|
||||
select ARCH_WANT_OPTIONAL_GPIOLIB
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
default y
|
||||
help
|
||||
The Itanium Processor Family is Intel's 64-bit successor to
|
||||
|
@ -6,6 +6,7 @@ config M68K
|
||||
select GENERIC_ATOMIC64 if MMU
|
||||
select HAVE_GENERIC_HARDIRQS if !MMU
|
||||
select GENERIC_IRQ_SHOW if !MMU
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
|
||||
|
||||
config RWSEM_GENERIC_SPINLOCK
|
||||
bool
|
||||
|
@ -15,6 +15,7 @@ config PARISC
|
||||
select HAVE_GENERIC_HARDIRQS
|
||||
select GENERIC_IRQ_PROBE
|
||||
select IRQ_PER_CPU
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
|
||||
help
|
||||
The PA-RISC microprocessor is designed by Hewlett-Packard and used
|
||||
|
@ -136,6 +136,7 @@ config PPC
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select HAVE_BPF_JIT if (PPC64 && NET)
|
||||
select HAVE_ARCH_JUMP_LABEL
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
|
||||
config EARLY_PRINTK
|
||||
bool
|
||||
|
@ -81,6 +81,7 @@ config S390
|
||||
select INIT_ALL_POSSIBLE
|
||||
select HAVE_IRQ_WORK
|
||||
select HAVE_PERF_EVENTS
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
select HAVE_KERNEL_GZIP
|
||||
select HAVE_KERNEL_BZIP2
|
||||
select HAVE_KERNEL_LZMA
|
||||
|
@ -11,6 +11,7 @@ config SUPERH
|
||||
select HAVE_DMA_ATTRS
|
||||
select HAVE_IRQ_WORK
|
||||
select HAVE_PERF_EVENTS
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
|
||||
select PERF_USE_VMALLOC
|
||||
select HAVE_KERNEL_GZIP
|
||||
select HAVE_KERNEL_BZIP2
|
||||
|
@ -54,6 +54,7 @@ config SPARC64
|
||||
select HAVE_PERF_EVENTS
|
||||
select PERF_USE_VMALLOC
|
||||
select IRQ_PREFLOW_FASTEOI
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
|
||||
config ARCH_DEFCONFIG
|
||||
string
|
||||
|
@ -12,6 +12,7 @@ config TILE
|
||||
select GENERIC_PENDING_IRQ if SMP
|
||||
select GENERIC_IRQ_SHOW
|
||||
select SYS_HYPERVISOR
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386
|
||||
|
||||
# FIXME: investigate whether we need/want these options.
|
||||
# select HAVE_IOREMAP_PROT
|
||||
|
@ -72,6 +72,7 @@ config X86
|
||||
select USE_GENERIC_SMP_HELPERS if SMP
|
||||
select HAVE_BPF_JIT if (X86_64 && NET)
|
||||
select CLKEVT_I8253
|
||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
|
||||
config INSTRUCTION_DECODER
|
||||
def_bool (KPROBES || PERF_EVENTS)
|
||||
|
@ -10,9 +10,11 @@ config ACPI_APEI
|
||||
error injection.
|
||||
|
||||
config ACPI_APEI_GHES
|
||||
tristate "APEI Generic Hardware Error Source"
|
||||
bool "APEI Generic Hardware Error Source"
|
||||
depends on ACPI_APEI && X86
|
||||
select ACPI_HED
|
||||
select LLIST
|
||||
select GENERIC_ALLOCATOR
|
||||
help
|
||||
Generic Hardware Error Source provides a way to report
|
||||
platform hardware errors (such as that from chipset). It
|
||||
@ -30,6 +32,13 @@ config ACPI_APEI_PCIEAER
|
||||
PCIe AER errors may be reported via APEI firmware first mode.
|
||||
Turn on this option to enable the corresponding support.
|
||||
|
||||
config ACPI_APEI_MEMORY_FAILURE
|
||||
bool "APEI memory error recovering support"
|
||||
depends on ACPI_APEI && MEMORY_FAILURE
|
||||
help
|
||||
Memory errors may be reported via APEI firmware first mode.
|
||||
Turn on this option to enable the memory recovering support.
|
||||
|
||||
config ACPI_APEI_EINJ
|
||||
tristate "APEI Error INJection (EINJ)"
|
||||
depends on ACPI_APEI && DEBUG_FS
|
||||
|
@ -157,9 +157,10 @@ EXPORT_SYMBOL_GPL(apei_exec_noop);
|
||||
* Interpret the specified action. Go through whole action table,
|
||||
* execute all instructions belong to the action.
|
||||
*/
|
||||
int apei_exec_run(struct apei_exec_context *ctx, u8 action)
|
||||
int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
|
||||
bool optional)
|
||||
{
|
||||
int rc;
|
||||
int rc = -ENOENT;
|
||||
u32 i, ip;
|
||||
struct acpi_whea_header *entry;
|
||||
apei_exec_ins_func_t run;
|
||||
@ -198,9 +199,9 @@ rewind:
|
||||
goto rewind;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return !optional && rc < 0 ? rc : 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(apei_exec_run);
|
||||
EXPORT_SYMBOL_GPL(__apei_exec_run);
|
||||
|
||||
typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
|
||||
struct acpi_whea_header *entry,
|
||||
@ -603,3 +604,29 @@ struct dentry *apei_get_debugfs_dir(void)
|
||||
return dapei;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
|
||||
|
||||
int apei_osc_setup(void)
|
||||
{
|
||||
static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
|
||||
acpi_handle handle;
|
||||
u32 capbuf[3];
|
||||
struct acpi_osc_context context = {
|
||||
.uuid_str = whea_uuid_str,
|
||||
.rev = 1,
|
||||
.cap.length = sizeof(capbuf),
|
||||
.cap.pointer = capbuf,
|
||||
};
|
||||
|
||||
capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
|
||||
capbuf[OSC_SUPPORT_TYPE] = 0;
|
||||
capbuf[OSC_CONTROL_TYPE] = 0;
|
||||
|
||||
if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
|
||||
|| ACPI_FAILURE(acpi_run_osc(handle, &context)))
|
||||
return -EIO;
|
||||
else {
|
||||
kfree(context.ret.pointer);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(apei_osc_setup);
|
||||
|
@ -50,7 +50,18 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx)
|
||||
return ctx->value;
|
||||
}
|
||||
|
||||
int apei_exec_run(struct apei_exec_context *ctx, u8 action);
|
||||
int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional);
|
||||
|
||||
static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action)
|
||||
{
|
||||
return __apei_exec_run(ctx, action, 0);
|
||||
}
|
||||
|
||||
/* It is optional whether the firmware provides the action */
|
||||
static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action)
|
||||
{
|
||||
return __apei_exec_run(ctx, action, 1);
|
||||
}
|
||||
|
||||
/* Common instruction implementation */
|
||||
|
||||
@ -113,4 +124,6 @@ void apei_estatus_print(const char *pfx,
|
||||
const struct acpi_hest_generic_status *estatus);
|
||||
int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
|
||||
int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
|
||||
|
||||
int apei_osc_setup(void);
|
||||
#endif
|
||||
|
@ -46,7 +46,8 @@
|
||||
* Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
|
||||
* EINJ table through an unpublished extension. Use with caution as
|
||||
* most will ignore the parameter and make their own choice of address
|
||||
* for error injection.
|
||||
* for error injection. This extension is used only if
|
||||
* param_extension module parameter is specified.
|
||||
*/
|
||||
struct einj_parameter {
|
||||
u64 type;
|
||||
@ -65,6 +66,9 @@ struct einj_parameter {
|
||||
((struct acpi_whea_header *)((char *)(tab) + \
|
||||
sizeof(struct acpi_table_einj)))
|
||||
|
||||
static bool param_extension;
|
||||
module_param(param_extension, bool, 0);
|
||||
|
||||
static struct acpi_table_einj *einj_tab;
|
||||
|
||||
static struct apei_resources einj_resources;
|
||||
@ -285,7 +289,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
|
||||
|
||||
einj_exec_ctx_init(&ctx);
|
||||
|
||||
rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION);
|
||||
rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
|
||||
if (rc)
|
||||
return rc;
|
||||
apei_exec_ctx_set_input(&ctx, type);
|
||||
@ -323,7 +327,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
|
||||
rc = __einj_error_trigger(trigger_paddr);
|
||||
if (rc)
|
||||
return rc;
|
||||
rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION);
|
||||
rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -489,14 +493,6 @@ static int __init einj_init(void)
|
||||
einj_debug_dir, NULL, &error_type_fops);
|
||||
if (!fentry)
|
||||
goto err_cleanup;
|
||||
fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
|
||||
einj_debug_dir, &error_param1);
|
||||
if (!fentry)
|
||||
goto err_cleanup;
|
||||
fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
|
||||
einj_debug_dir, &error_param2);
|
||||
if (!fentry)
|
||||
goto err_cleanup;
|
||||
fentry = debugfs_create_file("error_inject", S_IWUSR,
|
||||
einj_debug_dir, NULL, &error_inject_fops);
|
||||
if (!fentry)
|
||||
@ -513,12 +509,23 @@ static int __init einj_init(void)
|
||||
rc = apei_exec_pre_map_gars(&ctx);
|
||||
if (rc)
|
||||
goto err_release;
|
||||
param_paddr = einj_get_parameter_address();
|
||||
if (param_paddr) {
|
||||
einj_param = ioremap(param_paddr, sizeof(*einj_param));
|
||||
rc = -ENOMEM;
|
||||
if (!einj_param)
|
||||
goto err_unmap;
|
||||
if (param_extension) {
|
||||
param_paddr = einj_get_parameter_address();
|
||||
if (param_paddr) {
|
||||
einj_param = ioremap(param_paddr, sizeof(*einj_param));
|
||||
rc = -ENOMEM;
|
||||
if (!einj_param)
|
||||
goto err_unmap;
|
||||
fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
|
||||
einj_debug_dir, &error_param1);
|
||||
if (!fentry)
|
||||
goto err_unmap;
|
||||
fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
|
||||
einj_debug_dir, &error_param2);
|
||||
if (!fentry)
|
||||
goto err_unmap;
|
||||
} else
|
||||
pr_warn(EINJ_PFX "Parameter extension is not supported.\n");
|
||||
}
|
||||
|
||||
pr_info(EINJ_PFX "Error INJection is initialized.\n");
|
||||
@ -526,6 +533,8 @@ static int __init einj_init(void)
|
||||
return 0;
|
||||
|
||||
err_unmap:
|
||||
if (einj_param)
|
||||
iounmap(einj_param);
|
||||
apei_exec_post_unmap_gars(&ctx);
|
||||
err_release:
|
||||
apei_resources_release(&einj_resources);
|
||||
|
@ -33,7 +33,7 @@
|
||||
|
||||
#define ERST_DBG_PFX "ERST DBG: "
|
||||
|
||||
#define ERST_DBG_RECORD_LEN_MAX 4096
|
||||
#define ERST_DBG_RECORD_LEN_MAX 0x4000
|
||||
|
||||
static void *erst_dbg_buf;
|
||||
static unsigned int erst_dbg_buf_len;
|
||||
@ -213,6 +213,10 @@ static struct miscdevice erst_dbg_dev = {
|
||||
|
||||
static __init int erst_dbg_init(void)
|
||||
{
|
||||
if (erst_disable) {
|
||||
pr_info(ERST_DBG_PFX "ERST support is disabled.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
return misc_register(&erst_dbg_dev);
|
||||
}
|
||||
|
||||
|
@ -642,7 +642,7 @@ static int __erst_write_to_storage(u64 offset)
|
||||
int rc;
|
||||
|
||||
erst_exec_ctx_init(&ctx);
|
||||
rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE);
|
||||
rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
|
||||
if (rc)
|
||||
return rc;
|
||||
apei_exec_ctx_set_input(&ctx, offset);
|
||||
@ -666,7 +666,7 @@ static int __erst_write_to_storage(u64 offset)
|
||||
if (rc)
|
||||
return rc;
|
||||
val = apei_exec_ctx_get_output(&ctx);
|
||||
rc = apei_exec_run(&ctx, ACPI_ERST_END);
|
||||
rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
@ -681,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
|
||||
int rc;
|
||||
|
||||
erst_exec_ctx_init(&ctx);
|
||||
rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ);
|
||||
rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
|
||||
if (rc)
|
||||
return rc;
|
||||
apei_exec_ctx_set_input(&ctx, offset);
|
||||
@ -709,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
|
||||
if (rc)
|
||||
return rc;
|
||||
val = apei_exec_ctx_get_output(&ctx);
|
||||
rc = apei_exec_run(&ctx, ACPI_ERST_END);
|
||||
rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
@ -724,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id)
|
||||
int rc;
|
||||
|
||||
erst_exec_ctx_init(&ctx);
|
||||
rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR);
|
||||
rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
|
||||
if (rc)
|
||||
return rc;
|
||||
apei_exec_ctx_set_input(&ctx, record_id);
|
||||
@ -748,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id)
|
||||
if (rc)
|
||||
return rc;
|
||||
val = apei_exec_ctx_get_output(&ctx);
|
||||
rc = apei_exec_run(&ctx, ACPI_ERST_END);
|
||||
rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
* For more information about Generic Hardware Error Source, please
|
||||
* refer to ACPI Specification version 4.0, section 17.3.2.6
|
||||
*
|
||||
* Copyright 2010 Intel Corp.
|
||||
* Copyright 2010,2011 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
@ -42,6 +42,9 @@
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/llist.h>
|
||||
#include <linux/genalloc.h>
|
||||
#include <acpi/apei.h>
|
||||
#include <acpi/atomicio.h>
|
||||
#include <acpi/hed.h>
|
||||
@ -53,6 +56,30 @@
|
||||
#define GHES_PFX "GHES: "
|
||||
|
||||
#define GHES_ESTATUS_MAX_SIZE 65536
|
||||
#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
|
||||
|
||||
#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
|
||||
|
||||
/* This is just an estimation for memory pool allocation */
|
||||
#define GHES_ESTATUS_CACHE_AVG_SIZE 512
|
||||
|
||||
#define GHES_ESTATUS_CACHES_SIZE 4
|
||||
|
||||
#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
|
||||
/* Prevent too many caches are allocated because of RCU */
|
||||
#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
|
||||
|
||||
#define GHES_ESTATUS_CACHE_LEN(estatus_len) \
|
||||
(sizeof(struct ghes_estatus_cache) + (estatus_len))
|
||||
#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
|
||||
((struct acpi_hest_generic_status *) \
|
||||
((struct ghes_estatus_cache *)(estatus_cache) + 1))
|
||||
|
||||
#define GHES_ESTATUS_NODE_LEN(estatus_len) \
|
||||
(sizeof(struct ghes_estatus_node) + (estatus_len))
|
||||
#define GHES_ESTATUS_FROM_NODE(estatus_node) \
|
||||
((struct acpi_hest_generic_status *) \
|
||||
((struct ghes_estatus_node *)(estatus_node) + 1))
|
||||
|
||||
/*
|
||||
* One struct ghes is created for each generic hardware error source.
|
||||
@ -77,6 +104,22 @@ struct ghes {
|
||||
};
|
||||
};
|
||||
|
||||
struct ghes_estatus_node {
|
||||
struct llist_node llnode;
|
||||
struct acpi_hest_generic *generic;
|
||||
};
|
||||
|
||||
struct ghes_estatus_cache {
|
||||
u32 estatus_len;
|
||||
atomic_t count;
|
||||
struct acpi_hest_generic *generic;
|
||||
unsigned long long time_in;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
int ghes_disable;
|
||||
module_param_named(disable, ghes_disable, bool, 0);
|
||||
|
||||
static int ghes_panic_timeout __read_mostly = 30;
|
||||
|
||||
/*
|
||||
@ -121,6 +164,22 @@ static struct vm_struct *ghes_ioremap_area;
|
||||
static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
|
||||
static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
|
||||
|
||||
/*
|
||||
* printk is not safe in NMI context. So in NMI handler, we allocate
|
||||
* required memory from lock-less memory allocator
|
||||
* (ghes_estatus_pool), save estatus into it, put them into lock-less
|
||||
* list (ghes_estatus_llist), then delay printk into IRQ context via
|
||||
* irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
|
||||
* required pool size by all NMI error source.
|
||||
*/
|
||||
static struct gen_pool *ghes_estatus_pool;
|
||||
static unsigned long ghes_estatus_pool_size_request;
|
||||
static struct llist_head ghes_estatus_llist;
|
||||
static struct irq_work ghes_proc_irq_work;
|
||||
|
||||
struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
|
||||
static atomic_t ghes_estatus_cache_alloced;
|
||||
|
||||
static int ghes_ioremap_init(void)
|
||||
{
|
||||
ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
|
||||
@ -180,6 +239,55 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
|
||||
__flush_tlb_one(vaddr);
|
||||
}
|
||||
|
||||
static int ghes_estatus_pool_init(void)
|
||||
{
|
||||
ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
|
||||
if (!ghes_estatus_pool)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
|
||||
struct gen_pool_chunk *chunk,
|
||||
void *data)
|
||||
{
|
||||
free_page(chunk->start_addr);
|
||||
}
|
||||
|
||||
static void ghes_estatus_pool_exit(void)
|
||||
{
|
||||
gen_pool_for_each_chunk(ghes_estatus_pool,
|
||||
ghes_estatus_pool_free_chunk_page, NULL);
|
||||
gen_pool_destroy(ghes_estatus_pool);
|
||||
}
|
||||
|
||||
static int ghes_estatus_pool_expand(unsigned long len)
|
||||
{
|
||||
unsigned long i, pages, size, addr;
|
||||
int ret;
|
||||
|
||||
ghes_estatus_pool_size_request += PAGE_ALIGN(len);
|
||||
size = gen_pool_size(ghes_estatus_pool);
|
||||
if (size >= ghes_estatus_pool_size_request)
|
||||
return 0;
|
||||
pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
|
||||
for (i = 0; i < pages; i++) {
|
||||
addr = __get_free_page(GFP_KERNEL);
|
||||
if (!addr)
|
||||
return -ENOMEM;
|
||||
ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ghes_estatus_pool_shrink(unsigned long len)
|
||||
{
|
||||
ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
|
||||
}
|
||||
|
||||
static struct ghes *ghes_new(struct acpi_hest_generic *generic)
|
||||
{
|
||||
struct ghes *ghes;
|
||||
@ -341,43 +449,196 @@ static void ghes_clear_estatus(struct ghes *ghes)
|
||||
ghes->flags &= ~GHES_TO_CLEAR;
|
||||
}
|
||||
|
||||
static void ghes_do_proc(struct ghes *ghes)
|
||||
static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
|
||||
{
|
||||
int sev, processed = 0;
|
||||
int sev, sec_sev;
|
||||
struct acpi_hest_generic_data *gdata;
|
||||
|
||||
sev = ghes_severity(ghes->estatus->error_severity);
|
||||
apei_estatus_for_each_section(ghes->estatus, gdata) {
|
||||
#ifdef CONFIG_X86_MCE
|
||||
sev = ghes_severity(estatus->error_severity);
|
||||
apei_estatus_for_each_section(estatus, gdata) {
|
||||
sec_sev = ghes_severity(gdata->error_severity);
|
||||
if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
|
||||
CPER_SEC_PLATFORM_MEM)) {
|
||||
apei_mce_report_mem_error(
|
||||
sev == GHES_SEV_CORRECTED,
|
||||
(struct cper_sec_mem_err *)(gdata+1));
|
||||
processed = 1;
|
||||
}
|
||||
struct cper_sec_mem_err *mem_err;
|
||||
mem_err = (struct cper_sec_mem_err *)(gdata+1);
|
||||
#ifdef CONFIG_X86_MCE
|
||||
apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
|
||||
mem_err);
|
||||
#endif
|
||||
#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
|
||||
if (sev == GHES_SEV_RECOVERABLE &&
|
||||
sec_sev == GHES_SEV_RECOVERABLE &&
|
||||
mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
|
||||
unsigned long pfn;
|
||||
pfn = mem_err->physical_addr >> PAGE_SHIFT;
|
||||
memory_failure_queue(pfn, 0, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ghes_print_estatus(const char *pfx, struct ghes *ghes)
|
||||
static void __ghes_print_estatus(const char *pfx,
|
||||
const struct acpi_hest_generic *generic,
|
||||
const struct acpi_hest_generic_status *estatus)
|
||||
{
|
||||
/* Not more than 2 messages every 5 seconds */
|
||||
static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2);
|
||||
|
||||
if (pfx == NULL) {
|
||||
if (ghes_severity(ghes->estatus->error_severity) <=
|
||||
if (ghes_severity(estatus->error_severity) <=
|
||||
GHES_SEV_CORRECTED)
|
||||
pfx = KERN_WARNING HW_ERR;
|
||||
else
|
||||
pfx = KERN_ERR HW_ERR;
|
||||
}
|
||||
if (__ratelimit(&ratelimit)) {
|
||||
printk(
|
||||
"%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
|
||||
pfx, ghes->generic->header.source_id);
|
||||
apei_estatus_print(pfx, ghes->estatus);
|
||||
printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
|
||||
pfx, generic->header.source_id);
|
||||
apei_estatus_print(pfx, estatus);
|
||||
}
|
||||
|
||||
static int ghes_print_estatus(const char *pfx,
|
||||
const struct acpi_hest_generic *generic,
|
||||
const struct acpi_hest_generic_status *estatus)
|
||||
{
|
||||
/* Not more than 2 messages every 5 seconds */
|
||||
static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
|
||||
static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
|
||||
struct ratelimit_state *ratelimit;
|
||||
|
||||
if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
|
||||
ratelimit = &ratelimit_corrected;
|
||||
else
|
||||
ratelimit = &ratelimit_uncorrected;
|
||||
if (__ratelimit(ratelimit)) {
|
||||
__ghes_print_estatus(pfx, generic, estatus);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* GHES error status reporting throttle, to report more kinds of
|
||||
* errors, instead of just most frequently occurred errors.
|
||||
*/
|
||||
static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
|
||||
{
|
||||
u32 len;
|
||||
int i, cached = 0;
|
||||
unsigned long long now;
|
||||
struct ghes_estatus_cache *cache;
|
||||
struct acpi_hest_generic_status *cache_estatus;
|
||||
|
||||
len = apei_estatus_len(estatus);
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
|
||||
cache = rcu_dereference(ghes_estatus_caches[i]);
|
||||
if (cache == NULL)
|
||||
continue;
|
||||
if (len != cache->estatus_len)
|
||||
continue;
|
||||
cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
|
||||
if (memcmp(estatus, cache_estatus, len))
|
||||
continue;
|
||||
atomic_inc(&cache->count);
|
||||
now = sched_clock();
|
||||
if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
|
||||
cached = 1;
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return cached;
|
||||
}
|
||||
|
||||
static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
|
||||
struct acpi_hest_generic *generic,
|
||||
struct acpi_hest_generic_status *estatus)
|
||||
{
|
||||
int alloced;
|
||||
u32 len, cache_len;
|
||||
struct ghes_estatus_cache *cache;
|
||||
struct acpi_hest_generic_status *cache_estatus;
|
||||
|
||||
alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
|
||||
if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
|
||||
atomic_dec(&ghes_estatus_cache_alloced);
|
||||
return NULL;
|
||||
}
|
||||
len = apei_estatus_len(estatus);
|
||||
cache_len = GHES_ESTATUS_CACHE_LEN(len);
|
||||
cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
|
||||
if (!cache) {
|
||||
atomic_dec(&ghes_estatus_cache_alloced);
|
||||
return NULL;
|
||||
}
|
||||
cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
|
||||
memcpy(cache_estatus, estatus, len);
|
||||
cache->estatus_len = len;
|
||||
atomic_set(&cache->count, 0);
|
||||
cache->generic = generic;
|
||||
cache->time_in = sched_clock();
|
||||
return cache;
|
||||
}
|
||||
|
||||
static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
|
||||
{
|
||||
u32 len;
|
||||
|
||||
len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
|
||||
len = GHES_ESTATUS_CACHE_LEN(len);
|
||||
gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
|
||||
atomic_dec(&ghes_estatus_cache_alloced);
|
||||
}
|
||||
|
||||
static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
|
||||
{
|
||||
struct ghes_estatus_cache *cache;
|
||||
|
||||
cache = container_of(head, struct ghes_estatus_cache, rcu);
|
||||
ghes_estatus_cache_free(cache);
|
||||
}
|
||||
|
||||
static void ghes_estatus_cache_add(
|
||||
struct acpi_hest_generic *generic,
|
||||
struct acpi_hest_generic_status *estatus)
|
||||
{
|
||||
int i, slot = -1, count;
|
||||
unsigned long long now, duration, period, max_period = 0;
|
||||
struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
|
||||
|
||||
new_cache = ghes_estatus_cache_alloc(generic, estatus);
|
||||
if (new_cache == NULL)
|
||||
return;
|
||||
rcu_read_lock();
|
||||
now = sched_clock();
|
||||
for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
|
||||
cache = rcu_dereference(ghes_estatus_caches[i]);
|
||||
if (cache == NULL) {
|
||||
slot = i;
|
||||
slot_cache = NULL;
|
||||
break;
|
||||
}
|
||||
duration = now - cache->time_in;
|
||||
if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
|
||||
slot = i;
|
||||
slot_cache = cache;
|
||||
break;
|
||||
}
|
||||
count = atomic_read(&cache->count);
|
||||
period = duration;
|
||||
do_div(period, (count + 1));
|
||||
if (period > max_period) {
|
||||
max_period = period;
|
||||
slot = i;
|
||||
slot_cache = cache;
|
||||
}
|
||||
}
|
||||
/* new_cache must be put into array after its contents are written */
|
||||
smp_wmb();
|
||||
if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
|
||||
slot_cache, new_cache) == slot_cache) {
|
||||
if (slot_cache)
|
||||
call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
|
||||
} else
|
||||
ghes_estatus_cache_free(new_cache);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int ghes_proc(struct ghes *ghes)
|
||||
@ -387,9 +648,11 @@ static int ghes_proc(struct ghes *ghes)
|
||||
rc = ghes_read_estatus(ghes, 0);
|
||||
if (rc)
|
||||
goto out;
|
||||
ghes_print_estatus(NULL, ghes);
|
||||
ghes_do_proc(ghes);
|
||||
|
||||
if (!ghes_estatus_cached(ghes->estatus)) {
|
||||
if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
|
||||
ghes_estatus_cache_add(ghes->generic, ghes->estatus);
|
||||
}
|
||||
ghes_do_proc(ghes->estatus);
|
||||
out:
|
||||
ghes_clear_estatus(ghes);
|
||||
return 0;
|
||||
@ -447,6 +710,45 @@ static int ghes_notify_sci(struct notifier_block *this,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ghes_proc_in_irq(struct irq_work *irq_work)
|
||||
{
|
||||
struct llist_node *llnode, *next, *tail = NULL;
|
||||
struct ghes_estatus_node *estatus_node;
|
||||
struct acpi_hest_generic *generic;
|
||||
struct acpi_hest_generic_status *estatus;
|
||||
u32 len, node_len;
|
||||
|
||||
/*
|
||||
* Because the time order of estatus in list is reversed,
|
||||
* revert it back to proper order.
|
||||
*/
|
||||
llnode = llist_del_all(&ghes_estatus_llist);
|
||||
while (llnode) {
|
||||
next = llnode->next;
|
||||
llnode->next = tail;
|
||||
tail = llnode;
|
||||
llnode = next;
|
||||
}
|
||||
llnode = tail;
|
||||
while (llnode) {
|
||||
next = llnode->next;
|
||||
estatus_node = llist_entry(llnode, struct ghes_estatus_node,
|
||||
llnode);
|
||||
estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
|
||||
len = apei_estatus_len(estatus);
|
||||
node_len = GHES_ESTATUS_NODE_LEN(len);
|
||||
ghes_do_proc(estatus);
|
||||
if (!ghes_estatus_cached(estatus)) {
|
||||
generic = estatus_node->generic;
|
||||
if (ghes_print_estatus(NULL, generic, estatus))
|
||||
ghes_estatus_cache_add(generic, estatus);
|
||||
}
|
||||
gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
|
||||
node_len);
|
||||
llnode = next;
|
||||
}
|
||||
}
|
||||
|
||||
static int ghes_notify_nmi(struct notifier_block *this,
|
||||
unsigned long cmd, void *data)
|
||||
{
|
||||
@ -476,7 +778,8 @@ static int ghes_notify_nmi(struct notifier_block *this,
|
||||
|
||||
if (sev_global >= GHES_SEV_PANIC) {
|
||||
oops_begin();
|
||||
ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global);
|
||||
__ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic,
|
||||
ghes_global->estatus);
|
||||
/* reboot to log the error! */
|
||||
if (panic_timeout == 0)
|
||||
panic_timeout = ghes_panic_timeout;
|
||||
@ -484,12 +787,34 @@ static int ghes_notify_nmi(struct notifier_block *this,
|
||||
}
|
||||
|
||||
list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
|
||||
#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
u32 len, node_len;
|
||||
struct ghes_estatus_node *estatus_node;
|
||||
struct acpi_hest_generic_status *estatus;
|
||||
#endif
|
||||
if (!(ghes->flags & GHES_TO_CLEAR))
|
||||
continue;
|
||||
/* Do not print estatus because printk is not NMI safe */
|
||||
ghes_do_proc(ghes);
|
||||
#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
if (ghes_estatus_cached(ghes->estatus))
|
||||
goto next;
|
||||
/* Save estatus for further processing in IRQ context */
|
||||
len = apei_estatus_len(ghes->estatus);
|
||||
node_len = GHES_ESTATUS_NODE_LEN(len);
|
||||
estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
|
||||
node_len);
|
||||
if (estatus_node) {
|
||||
estatus_node->generic = ghes->generic;
|
||||
estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
|
||||
memcpy(estatus, ghes->estatus, len);
|
||||
llist_add(&estatus_node->llnode, &ghes_estatus_llist);
|
||||
}
|
||||
next:
|
||||
#endif
|
||||
ghes_clear_estatus(ghes);
|
||||
}
|
||||
#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
irq_work_queue(&ghes_proc_irq_work);
|
||||
#endif
|
||||
|
||||
out:
|
||||
raw_spin_unlock(&ghes_nmi_lock);
|
||||
@ -504,10 +829,26 @@ static struct notifier_block ghes_notifier_nmi = {
|
||||
.notifier_call = ghes_notify_nmi,
|
||||
};
|
||||
|
||||
static unsigned long ghes_esource_prealloc_size(
|
||||
const struct acpi_hest_generic *generic)
|
||||
{
|
||||
unsigned long block_length, prealloc_records, prealloc_size;
|
||||
|
||||
block_length = min_t(unsigned long, generic->error_block_length,
|
||||
GHES_ESTATUS_MAX_SIZE);
|
||||
prealloc_records = max_t(unsigned long,
|
||||
generic->records_to_preallocate, 1);
|
||||
prealloc_size = min_t(unsigned long, block_length * prealloc_records,
|
||||
GHES_ESOURCE_PREALLOC_MAX_SIZE);
|
||||
|
||||
return prealloc_size;
|
||||
}
|
||||
|
||||
static int __devinit ghes_probe(struct platform_device *ghes_dev)
|
||||
{
|
||||
struct acpi_hest_generic *generic;
|
||||
struct ghes *ghes = NULL;
|
||||
unsigned long len;
|
||||
int rc = -EINVAL;
|
||||
|
||||
generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
|
||||
@ -573,6 +914,8 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev)
|
||||
mutex_unlock(&ghes_list_mutex);
|
||||
break;
|
||||
case ACPI_HEST_NOTIFY_NMI:
|
||||
len = ghes_esource_prealloc_size(generic);
|
||||
ghes_estatus_pool_expand(len);
|
||||
mutex_lock(&ghes_list_mutex);
|
||||
if (list_empty(&ghes_nmi))
|
||||
register_die_notifier(&ghes_notifier_nmi);
|
||||
@ -597,6 +940,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
|
||||
{
|
||||
struct ghes *ghes;
|
||||
struct acpi_hest_generic *generic;
|
||||
unsigned long len;
|
||||
|
||||
ghes = platform_get_drvdata(ghes_dev);
|
||||
generic = ghes->generic;
|
||||
@ -627,6 +971,8 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
|
||||
* freed after NMI handler finishes.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
len = ghes_esource_prealloc_size(generic);
|
||||
ghes_estatus_pool_shrink(len);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
@ -662,15 +1008,43 @@ static int __init ghes_init(void)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ghes_disable) {
|
||||
pr_info(GHES_PFX "GHES is not enabled!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
|
||||
|
||||
rc = ghes_ioremap_init();
|
||||
if (rc)
|
||||
goto err;
|
||||
|
||||
rc = platform_driver_register(&ghes_platform_driver);
|
||||
rc = ghes_estatus_pool_init();
|
||||
if (rc)
|
||||
goto err_ioremap_exit;
|
||||
|
||||
rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
|
||||
GHES_ESTATUS_CACHE_ALLOCED_MAX);
|
||||
if (rc)
|
||||
goto err_pool_exit;
|
||||
|
||||
rc = platform_driver_register(&ghes_platform_driver);
|
||||
if (rc)
|
||||
goto err_pool_exit;
|
||||
|
||||
rc = apei_osc_setup();
|
||||
if (rc == 0 && osc_sb_apei_support_acked)
|
||||
pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
|
||||
else if (rc == 0 && !osc_sb_apei_support_acked)
|
||||
pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
|
||||
else if (rc && osc_sb_apei_support_acked)
|
||||
pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
|
||||
else
|
||||
pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
|
||||
|
||||
return 0;
|
||||
err_pool_exit:
|
||||
ghes_estatus_pool_exit();
|
||||
err_ioremap_exit:
|
||||
ghes_ioremap_exit();
|
||||
err:
|
||||
@ -680,6 +1054,7 @@ err:
|
||||
static void __exit ghes_exit(void)
|
||||
{
|
||||
platform_driver_unregister(&ghes_platform_driver);
|
||||
ghes_estatus_pool_exit();
|
||||
ghes_ioremap_exit();
|
||||
}
|
||||
|
||||
|
@ -231,16 +231,17 @@ void __init acpi_hest_init(void)
|
||||
goto err;
|
||||
}
|
||||
|
||||
rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
|
||||
if (rc)
|
||||
goto err;
|
||||
|
||||
rc = hest_ghes_dev_register(ghes_count);
|
||||
if (!rc) {
|
||||
pr_info(HEST_PFX "Table parsing has been initialized.\n");
|
||||
return;
|
||||
if (!ghes_disable) {
|
||||
rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
|
||||
if (rc)
|
||||
goto err;
|
||||
rc = hest_ghes_dev_register(ghes_count);
|
||||
if (rc)
|
||||
goto err;
|
||||
}
|
||||
|
||||
pr_info(HEST_PFX "Table parsing has been initialized.\n");
|
||||
return;
|
||||
err:
|
||||
hest_disable = 1;
|
||||
}
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include <linux/pci.h>
|
||||
#include <acpi/acpi_bus.h>
|
||||
#include <acpi/acpi_drivers.h>
|
||||
#include <acpi/apei.h>
|
||||
#include <linux/dmi.h>
|
||||
#include <linux/suspend.h>
|
||||
|
||||
@ -519,6 +520,7 @@ out_kfree:
|
||||
}
|
||||
EXPORT_SYMBOL(acpi_run_osc);
|
||||
|
||||
bool osc_sb_apei_support_acked;
|
||||
static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
|
||||
static void acpi_bus_osc_support(void)
|
||||
{
|
||||
@ -541,11 +543,19 @@ static void acpi_bus_osc_support(void)
|
||||
#if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE)
|
||||
capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT;
|
||||
#endif
|
||||
|
||||
if (!ghes_disable)
|
||||
capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT;
|
||||
if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
|
||||
return;
|
||||
if (ACPI_SUCCESS(acpi_run_osc(handle, &context)))
|
||||
if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) {
|
||||
u32 *capbuf_ret = context.ret.pointer;
|
||||
if (context.ret.length > OSC_SUPPORT_TYPE)
|
||||
osc_sb_apei_support_acked =
|
||||
capbuf_ret[OSC_SUPPORT_TYPE] & OSC_SB_APEI_SUPPORT;
|
||||
kfree(context.ret.pointer);
|
||||
/* do we need to check the returned cap? Sounds no */
|
||||
}
|
||||
/* do we need to check other returned cap? Sounds no */
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------
|
||||
|
@ -18,6 +18,11 @@
|
||||
|
||||
extern int hest_disable;
|
||||
extern int erst_disable;
|
||||
#ifdef CONFIG_ACPI_APEI_GHES
|
||||
extern int ghes_disable;
|
||||
#else
|
||||
#define ghes_disable 1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI
|
||||
void __init acpi_hest_init(void);
|
||||
|
@ -279,6 +279,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
|
||||
#define OSC_SB_CPUHP_OST_SUPPORT 8
|
||||
#define OSC_SB_APEI_SUPPORT 16
|
||||
|
||||
extern bool osc_sb_apei_support_acked;
|
||||
|
||||
/* PCI defined _OSC bits */
|
||||
/* _OSC DW1 Definition (OS Support Fields) */
|
||||
#define OSC_EXT_PCI_CONFIG_SUPPORT 1
|
||||
|
@ -146,6 +146,7 @@ extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
|
||||
extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
|
||||
extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
|
||||
|
||||
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
|
||||
#define BITMAP_LAST_WORD_MASK(nbits) \
|
||||
( \
|
||||
((nbits) % BITS_PER_LONG) ? \
|
||||
|
@ -1,8 +1,26 @@
|
||||
/*
|
||||
* Basic general purpose allocator for managing special purpose memory
|
||||
* not managed by the regular kmalloc/kfree interface.
|
||||
* Uses for this includes on-device special memory, uncached memory
|
||||
* etc.
|
||||
* Basic general purpose allocator for managing special purpose
|
||||
* memory, for example, memory that is not managed by the regular
|
||||
* kmalloc/kfree interface. Uses for this includes on-device special
|
||||
* memory, uncached memory etc.
|
||||
*
|
||||
* It is safe to use the allocator in NMI handlers and other special
|
||||
* unblockable contexts that could otherwise deadlock on locks. This
|
||||
* is implemented by using atomic operations and retries on any
|
||||
* conflicts. The disadvantage is that there may be livelocks in
|
||||
* extreme cases. For better scalability, one allocator can be used
|
||||
* for each CPU.
|
||||
*
|
||||
* The lockless operation only works if there is enough memory
|
||||
* available. If new memory is added to the pool a lock has to be
|
||||
* still taken. So any user relying on locklessness has to ensure
|
||||
* that sufficient memory is preallocated.
|
||||
*
|
||||
* The basic atomic operation of this allocator is cmpxchg on long.
|
||||
* On architectures that don't have NMI-safe cmpxchg implementation,
|
||||
* the allocator can NOT be used in NMI handler. So code uses the
|
||||
* allocator in NMI handler should depend on
|
||||
* CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
|
||||
*
|
||||
* This source code is licensed under the GNU General Public License,
|
||||
* Version 2. See the file COPYING for more details.
|
||||
@ -15,7 +33,7 @@
|
||||
* General purpose special memory pool descriptor.
|
||||
*/
|
||||
struct gen_pool {
|
||||
rwlock_t lock;
|
||||
spinlock_t lock;
|
||||
struct list_head chunks; /* list of chunks in this pool */
|
||||
int min_alloc_order; /* minimum allocation order */
|
||||
};
|
||||
@ -24,8 +42,8 @@ struct gen_pool {
|
||||
* General purpose special memory pool chunk descriptor.
|
||||
*/
|
||||
struct gen_pool_chunk {
|
||||
spinlock_t lock;
|
||||
struct list_head next_chunk; /* next chunk in pool */
|
||||
atomic_t avail;
|
||||
phys_addr_t phys_addr; /* physical starting address of memory chunk */
|
||||
unsigned long start_addr; /* starting address of memory chunk */
|
||||
unsigned long end_addr; /* ending address of memory chunk */
|
||||
@ -56,4 +74,8 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
|
||||
extern void gen_pool_destroy(struct gen_pool *);
|
||||
extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
|
||||
extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
|
||||
extern void gen_pool_for_each_chunk(struct gen_pool *,
|
||||
void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *);
|
||||
extern size_t gen_pool_avail(struct gen_pool *);
|
||||
extern size_t gen_pool_size(struct gen_pool *);
|
||||
#endif /* __GENALLOC_H__ */
|
||||
|
126
include/linux/llist.h
Normal file
126
include/linux/llist.h
Normal file
@ -0,0 +1,126 @@
|
||||
#ifndef LLIST_H
|
||||
#define LLIST_H
|
||||
/*
|
||||
* Lock-less NULL terminated single linked list
|
||||
*
|
||||
* If there are multiple producers and multiple consumers, llist_add
|
||||
* can be used in producers and llist_del_all can be used in
|
||||
* consumers. They can work simultaneously without lock. But
|
||||
* llist_del_first can not be used here. Because llist_del_first
|
||||
* depends on list->first->next does not changed if list->first is not
|
||||
* changed during its operation, but llist_del_first, llist_add,
|
||||
* llist_add (or llist_del_all, llist_add, llist_add) sequence in
|
||||
* another consumer may violate that.
|
||||
*
|
||||
* If there are multiple producers and one consumer, llist_add can be
|
||||
* used in producers and llist_del_all or llist_del_first can be used
|
||||
* in the consumer.
|
||||
*
|
||||
* This can be summarized as follow:
|
||||
*
|
||||
* | add | del_first | del_all
|
||||
* add | - | - | -
|
||||
* del_first | | L | L
|
||||
* del_all | | | -
|
||||
*
|
||||
* Where "-" stands for no lock is needed, while "L" stands for lock
|
||||
* is needed.
|
||||
*
|
||||
* The list entries deleted via llist_del_all can be traversed with
|
||||
* traversing function such as llist_for_each etc. But the list
|
||||
* entries can not be traversed safely before deleted from the list.
|
||||
* The order of deleted entries is from the newest to the oldest added
|
||||
* one. If you want to traverse from the oldest to the newest, you
|
||||
* must reverse the order by yourself before traversing.
|
||||
*
|
||||
* The basic atomic operation of this list is cmpxchg on long. On
|
||||
* architectures that don't have NMI-safe cmpxchg implementation, the
|
||||
* list can NOT be used in NMI handler. So code uses the list in NMI
|
||||
* handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
|
||||
*/
|
||||
|
||||
struct llist_head {
|
||||
struct llist_node *first;
|
||||
};
|
||||
|
||||
struct llist_node {
|
||||
struct llist_node *next;
|
||||
};
|
||||
|
||||
#define LLIST_HEAD_INIT(name) { NULL }
|
||||
#define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name)
|
||||
|
||||
/**
|
||||
* init_llist_head - initialize lock-less list head
|
||||
* @head: the head for your lock-less list
|
||||
*/
|
||||
static inline void init_llist_head(struct llist_head *list)
|
||||
{
|
||||
list->first = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* llist_entry - get the struct of this entry
|
||||
* @ptr: the &struct llist_node pointer.
|
||||
* @type: the type of the struct this is embedded in.
|
||||
* @member: the name of the llist_node within the struct.
|
||||
*/
|
||||
#define llist_entry(ptr, type, member) \
|
||||
container_of(ptr, type, member)
|
||||
|
||||
/**
|
||||
* llist_for_each - iterate over some deleted entries of a lock-less list
|
||||
* @pos: the &struct llist_node to use as a loop cursor
|
||||
* @node: the first entry of deleted list entries
|
||||
*
|
||||
* In general, some entries of the lock-less list can be traversed
|
||||
* safely only after being deleted from list, so start with an entry
|
||||
* instead of list head.
|
||||
*
|
||||
* If being used on entries deleted from lock-less list directly, the
|
||||
* traverse order is from the newest to the oldest added entry. If
|
||||
* you want to traverse from the oldest to the newest, you must
|
||||
* reverse the order by yourself before traversing.
|
||||
*/
|
||||
#define llist_for_each(pos, node) \
|
||||
for ((pos) = (node); pos; (pos) = (pos)->next)
|
||||
|
||||
/**
|
||||
* llist_for_each_entry - iterate over some deleted entries of lock-less list of given type
|
||||
* @pos: the type * to use as a loop cursor.
|
||||
* @node: the fist entry of deleted list entries.
|
||||
* @member: the name of the llist_node with the struct.
|
||||
*
|
||||
* In general, some entries of the lock-less list can be traversed
|
||||
* safely only after being removed from list, so start with an entry
|
||||
* instead of list head.
|
||||
*
|
||||
* If being used on entries deleted from lock-less list directly, the
|
||||
* traverse order is from the newest to the oldest added entry. If
|
||||
* you want to traverse from the oldest to the newest, you must
|
||||
* reverse the order by yourself before traversing.
|
||||
*/
|
||||
#define llist_for_each_entry(pos, node, member) \
|
||||
for ((pos) = llist_entry((node), typeof(*(pos)), member); \
|
||||
&(pos)->member != NULL; \
|
||||
(pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
|
||||
|
||||
/**
|
||||
* llist_empty - tests whether a lock-less list is empty
|
||||
* @head: the list to test
|
||||
*
|
||||
* Not guaranteed to be accurate or up to date. Just a quick way to
|
||||
* test whether the list is empty without deleting something from the
|
||||
* list.
|
||||
*/
|
||||
static inline int llist_empty(const struct llist_head *head)
|
||||
{
|
||||
return ACCESS_ONCE(head->first) == NULL;
|
||||
}
|
||||
|
||||
void llist_add(struct llist_node *new, struct llist_head *head);
|
||||
void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
|
||||
struct llist_head *head);
|
||||
struct llist_node *llist_del_first(struct llist_head *head);
|
||||
struct llist_node *llist_del_all(struct llist_head *head);
|
||||
#endif /* LLIST_H */
|
@ -1600,6 +1600,7 @@ enum mf_flags {
|
||||
};
|
||||
extern void memory_failure(unsigned long pfn, int trapno);
|
||||
extern int __memory_failure(unsigned long pfn, int trapno, int flags);
|
||||
extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
|
||||
extern int unpoison_memory(unsigned long pfn);
|
||||
extern int sysctl_memory_failure_early_kill;
|
||||
extern int sysctl_memory_failure_recovery;
|
||||
|
@ -276,4 +276,7 @@ config CORDIC
|
||||
so its calculations are in fixed point. Modules can select this
|
||||
when they require this function. Module will be called cordic.
|
||||
|
||||
config LLIST
|
||||
bool
|
||||
|
||||
endmenu
|
||||
|
@ -115,6 +115,8 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
|
||||
|
||||
obj-$(CONFIG_CORDIC) += cordic.o
|
||||
|
||||
obj-$(CONFIG_LLIST) += llist.o
|
||||
|
||||
hostprogs-y := gen_crc32table
|
||||
clean-files := crc32table.h
|
||||
|
||||
|
@ -271,8 +271,6 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
|
||||
}
|
||||
EXPORT_SYMBOL(__bitmap_weight);
|
||||
|
||||
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
|
||||
|
||||
void bitmap_set(unsigned long *map, int start, int nr)
|
||||
{
|
||||
unsigned long *p = map + BIT_WORD(start);
|
||||
|
300
lib/genalloc.c
300
lib/genalloc.c
@ -1,8 +1,26 @@
|
||||
/*
|
||||
* Basic general purpose allocator for managing special purpose memory
|
||||
* not managed by the regular kmalloc/kfree interface.
|
||||
* Uses for this includes on-device special memory, uncached memory
|
||||
* etc.
|
||||
* Basic general purpose allocator for managing special purpose
|
||||
* memory, for example, memory that is not managed by the regular
|
||||
* kmalloc/kfree interface. Uses for this includes on-device special
|
||||
* memory, uncached memory etc.
|
||||
*
|
||||
* It is safe to use the allocator in NMI handlers and other special
|
||||
* unblockable contexts that could otherwise deadlock on locks. This
|
||||
* is implemented by using atomic operations and retries on any
|
||||
* conflicts. The disadvantage is that there may be livelocks in
|
||||
* extreme cases. For better scalability, one allocator can be used
|
||||
* for each CPU.
|
||||
*
|
||||
* The lockless operation only works if there is enough memory
|
||||
* available. If new memory is added to the pool a lock has to be
|
||||
* still taken. So any user relying on locklessness has to ensure
|
||||
* that sufficient memory is preallocated.
|
||||
*
|
||||
* The basic atomic operation of this allocator is cmpxchg on long.
|
||||
* On architectures that don't have NMI-safe cmpxchg implementation,
|
||||
* the allocator can NOT be used in NMI handler. So code uses the
|
||||
* allocator in NMI handler should depend on
|
||||
* CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
|
||||
*
|
||||
* Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org>
|
||||
*
|
||||
@ -13,8 +31,109 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/genalloc.h>
|
||||
|
||||
static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set)
|
||||
{
|
||||
unsigned long val, nval;
|
||||
|
||||
nval = *addr;
|
||||
do {
|
||||
val = nval;
|
||||
if (val & mask_to_set)
|
||||
return -EBUSY;
|
||||
cpu_relax();
|
||||
} while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear)
|
||||
{
|
||||
unsigned long val, nval;
|
||||
|
||||
nval = *addr;
|
||||
do {
|
||||
val = nval;
|
||||
if ((val & mask_to_clear) != mask_to_clear)
|
||||
return -EBUSY;
|
||||
cpu_relax();
|
||||
} while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* bitmap_set_ll - set the specified number of bits at the specified position
|
||||
* @map: pointer to a bitmap
|
||||
* @start: a bit position in @map
|
||||
* @nr: number of bits to set
|
||||
*
|
||||
* Set @nr bits start from @start in @map lock-lessly. Several users
|
||||
* can set/clear the same bitmap simultaneously without lock. If two
|
||||
* users set the same bit, one user will return remain bits, otherwise
|
||||
* return 0.
|
||||
*/
|
||||
static int bitmap_set_ll(unsigned long *map, int start, int nr)
|
||||
{
|
||||
unsigned long *p = map + BIT_WORD(start);
|
||||
const int size = start + nr;
|
||||
int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
|
||||
unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
|
||||
|
||||
while (nr - bits_to_set >= 0) {
|
||||
if (set_bits_ll(p, mask_to_set))
|
||||
return nr;
|
||||
nr -= bits_to_set;
|
||||
bits_to_set = BITS_PER_LONG;
|
||||
mask_to_set = ~0UL;
|
||||
p++;
|
||||
}
|
||||
if (nr) {
|
||||
mask_to_set &= BITMAP_LAST_WORD_MASK(size);
|
||||
if (set_bits_ll(p, mask_to_set))
|
||||
return nr;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* bitmap_clear_ll - clear the specified number of bits at the specified position
|
||||
* @map: pointer to a bitmap
|
||||
* @start: a bit position in @map
|
||||
* @nr: number of bits to set
|
||||
*
|
||||
* Clear @nr bits start from @start in @map lock-lessly. Several users
|
||||
* can set/clear the same bitmap simultaneously without lock. If two
|
||||
* users clear the same bit, one user will return remain bits,
|
||||
* otherwise return 0.
|
||||
*/
|
||||
static int bitmap_clear_ll(unsigned long *map, int start, int nr)
|
||||
{
|
||||
unsigned long *p = map + BIT_WORD(start);
|
||||
const int size = start + nr;
|
||||
int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
|
||||
unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
|
||||
|
||||
while (nr - bits_to_clear >= 0) {
|
||||
if (clear_bits_ll(p, mask_to_clear))
|
||||
return nr;
|
||||
nr -= bits_to_clear;
|
||||
bits_to_clear = BITS_PER_LONG;
|
||||
mask_to_clear = ~0UL;
|
||||
p++;
|
||||
}
|
||||
if (nr) {
|
||||
mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
|
||||
if (clear_bits_ll(p, mask_to_clear))
|
||||
return nr;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* gen_pool_create - create a new special memory pool
|
||||
@ -30,7 +149,7 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
|
||||
|
||||
pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid);
|
||||
if (pool != NULL) {
|
||||
rwlock_init(&pool->lock);
|
||||
spin_lock_init(&pool->lock);
|
||||
INIT_LIST_HEAD(&pool->chunks);
|
||||
pool->min_alloc_order = min_alloc_order;
|
||||
}
|
||||
@ -63,14 +182,14 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
|
||||
if (unlikely(chunk == NULL))
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&chunk->lock);
|
||||
chunk->phys_addr = phys;
|
||||
chunk->start_addr = virt;
|
||||
chunk->end_addr = virt + size;
|
||||
atomic_set(&chunk->avail, size);
|
||||
|
||||
write_lock(&pool->lock);
|
||||
list_add(&chunk->next_chunk, &pool->chunks);
|
||||
write_unlock(&pool->lock);
|
||||
spin_lock(&pool->lock);
|
||||
list_add_rcu(&chunk->next_chunk, &pool->chunks);
|
||||
spin_unlock(&pool->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -85,19 +204,19 @@ EXPORT_SYMBOL(gen_pool_add_virt);
|
||||
*/
|
||||
phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr)
|
||||
{
|
||||
struct list_head *_chunk;
|
||||
struct gen_pool_chunk *chunk;
|
||||
phys_addr_t paddr = -1;
|
||||
|
||||
read_lock(&pool->lock);
|
||||
list_for_each(_chunk, &pool->chunks) {
|
||||
chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
|
||||
|
||||
if (addr >= chunk->start_addr && addr < chunk->end_addr)
|
||||
return chunk->phys_addr + addr - chunk->start_addr;
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
|
||||
if (addr >= chunk->start_addr && addr < chunk->end_addr) {
|
||||
paddr = chunk->phys_addr + (addr - chunk->start_addr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
read_unlock(&pool->lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
return -1;
|
||||
return paddr;
|
||||
}
|
||||
EXPORT_SYMBOL(gen_pool_virt_to_phys);
|
||||
|
||||
@ -115,7 +234,6 @@ void gen_pool_destroy(struct gen_pool *pool)
|
||||
int order = pool->min_alloc_order;
|
||||
int bit, end_bit;
|
||||
|
||||
|
||||
list_for_each_safe(_chunk, _next_chunk, &pool->chunks) {
|
||||
chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
|
||||
list_del(&chunk->next_chunk);
|
||||
@ -137,44 +255,50 @@ EXPORT_SYMBOL(gen_pool_destroy);
|
||||
* @size: number of bytes to allocate from the pool
|
||||
*
|
||||
* Allocate the requested number of bytes from the specified pool.
|
||||
* Uses a first-fit algorithm.
|
||||
* Uses a first-fit algorithm. Can not be used in NMI handler on
|
||||
* architectures without NMI-safe cmpxchg implementation.
|
||||
*/
|
||||
unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
|
||||
{
|
||||
struct list_head *_chunk;
|
||||
struct gen_pool_chunk *chunk;
|
||||
unsigned long addr, flags;
|
||||
unsigned long addr = 0;
|
||||
int order = pool->min_alloc_order;
|
||||
int nbits, start_bit, end_bit;
|
||||
int nbits, start_bit = 0, end_bit, remain;
|
||||
|
||||
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
BUG_ON(in_nmi());
|
||||
#endif
|
||||
|
||||
if (size == 0)
|
||||
return 0;
|
||||
|
||||
nbits = (size + (1UL << order) - 1) >> order;
|
||||
|
||||
read_lock(&pool->lock);
|
||||
list_for_each(_chunk, &pool->chunks) {
|
||||
chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
|
||||
if (size > atomic_read(&chunk->avail))
|
||||
continue;
|
||||
|
||||
end_bit = (chunk->end_addr - chunk->start_addr) >> order;
|
||||
|
||||
spin_lock_irqsave(&chunk->lock, flags);
|
||||
start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0,
|
||||
nbits, 0);
|
||||
if (start_bit >= end_bit) {
|
||||
spin_unlock_irqrestore(&chunk->lock, flags);
|
||||
retry:
|
||||
start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit,
|
||||
start_bit, nbits, 0);
|
||||
if (start_bit >= end_bit)
|
||||
continue;
|
||||
remain = bitmap_set_ll(chunk->bits, start_bit, nbits);
|
||||
if (remain) {
|
||||
remain = bitmap_clear_ll(chunk->bits, start_bit,
|
||||
nbits - remain);
|
||||
BUG_ON(remain);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
addr = chunk->start_addr + ((unsigned long)start_bit << order);
|
||||
|
||||
bitmap_set(chunk->bits, start_bit, nbits);
|
||||
spin_unlock_irqrestore(&chunk->lock, flags);
|
||||
read_unlock(&pool->lock);
|
||||
return addr;
|
||||
size = nbits << order;
|
||||
atomic_sub(size, &chunk->avail);
|
||||
break;
|
||||
}
|
||||
read_unlock(&pool->lock);
|
||||
return 0;
|
||||
rcu_read_unlock();
|
||||
return addr;
|
||||
}
|
||||
EXPORT_SYMBOL(gen_pool_alloc);
|
||||
|
||||
@ -184,33 +308,95 @@ EXPORT_SYMBOL(gen_pool_alloc);
|
||||
* @addr: starting address of memory to free back to pool
|
||||
* @size: size in bytes of memory to free
|
||||
*
|
||||
* Free previously allocated special memory back to the specified pool.
|
||||
* Free previously allocated special memory back to the specified
|
||||
* pool. Can not be used in NMI handler on architectures without
|
||||
* NMI-safe cmpxchg implementation.
|
||||
*/
|
||||
void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
|
||||
{
|
||||
struct list_head *_chunk;
|
||||
struct gen_pool_chunk *chunk;
|
||||
unsigned long flags;
|
||||
int order = pool->min_alloc_order;
|
||||
int bit, nbits;
|
||||
int start_bit, nbits, remain;
|
||||
|
||||
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
BUG_ON(in_nmi());
|
||||
#endif
|
||||
|
||||
nbits = (size + (1UL << order) - 1) >> order;
|
||||
|
||||
read_lock(&pool->lock);
|
||||
list_for_each(_chunk, &pool->chunks) {
|
||||
chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
|
||||
if (addr >= chunk->start_addr && addr < chunk->end_addr) {
|
||||
BUG_ON(addr + size > chunk->end_addr);
|
||||
spin_lock_irqsave(&chunk->lock, flags);
|
||||
bit = (addr - chunk->start_addr) >> order;
|
||||
while (nbits--)
|
||||
__clear_bit(bit++, chunk->bits);
|
||||
spin_unlock_irqrestore(&chunk->lock, flags);
|
||||
break;
|
||||
start_bit = (addr - chunk->start_addr) >> order;
|
||||
remain = bitmap_clear_ll(chunk->bits, start_bit, nbits);
|
||||
BUG_ON(remain);
|
||||
size = nbits << order;
|
||||
atomic_add(size, &chunk->avail);
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
}
|
||||
BUG_ON(nbits > 0);
|
||||
read_unlock(&pool->lock);
|
||||
rcu_read_unlock();
|
||||
BUG();
|
||||
}
|
||||
EXPORT_SYMBOL(gen_pool_free);
|
||||
|
||||
/**
|
||||
* gen_pool_for_each_chunk - call func for every chunk of generic memory pool
|
||||
* @pool: the generic memory pool
|
||||
* @func: func to call
|
||||
* @data: additional data used by @func
|
||||
*
|
||||
* Call @func for every chunk of generic memory pool. The @func is
|
||||
* called with rcu_read_lock held.
|
||||
*/
|
||||
void gen_pool_for_each_chunk(struct gen_pool *pool,
|
||||
void (*func)(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data),
|
||||
void *data)
|
||||
{
|
||||
struct gen_pool_chunk *chunk;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk)
|
||||
func(pool, chunk, data);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL(gen_pool_for_each_chunk);
|
||||
|
||||
/**
|
||||
* gen_pool_avail - get available free space of the pool
|
||||
* @pool: pool to get available free space
|
||||
*
|
||||
* Return available free space of the specified pool.
|
||||
*/
|
||||
size_t gen_pool_avail(struct gen_pool *pool)
|
||||
{
|
||||
struct gen_pool_chunk *chunk;
|
||||
size_t avail = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
|
||||
avail += atomic_read(&chunk->avail);
|
||||
rcu_read_unlock();
|
||||
return avail;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gen_pool_avail);
|
||||
|
||||
/**
|
||||
* gen_pool_size - get size in bytes of memory managed by the pool
|
||||
* @pool: pool to get size
|
||||
*
|
||||
* Return size in bytes of memory managed by the pool.
|
||||
*/
|
||||
size_t gen_pool_size(struct gen_pool *pool)
|
||||
{
|
||||
struct gen_pool_chunk *chunk;
|
||||
size_t size = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
|
||||
size += chunk->end_addr - chunk->start_addr;
|
||||
rcu_read_unlock();
|
||||
return size;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gen_pool_size);
|
||||
|
129
lib/llist.c
Normal file
129
lib/llist.c
Normal file
@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Lock-less NULL terminated single linked list
|
||||
*
|
||||
* The basic atomic operation of this list is cmpxchg on long. On
|
||||
* architectures that don't have NMI-safe cmpxchg implementation, the
|
||||
* list can NOT be used in NMI handler. So code uses the list in NMI
|
||||
* handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
|
||||
*
|
||||
* Copyright 2010,2011 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License version
|
||||
* 2 as published by the Free Software Foundation;
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/llist.h>
|
||||
|
||||
#include <asm/system.h>
|
||||
|
||||
/**
|
||||
* llist_add - add a new entry
|
||||
* @new: new entry to be added
|
||||
* @head: the head for your lock-less list
|
||||
*/
|
||||
void llist_add(struct llist_node *new, struct llist_head *head)
|
||||
{
|
||||
struct llist_node *entry, *old_entry;
|
||||
|
||||
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
BUG_ON(in_nmi());
|
||||
#endif
|
||||
|
||||
entry = head->first;
|
||||
do {
|
||||
old_entry = entry;
|
||||
new->next = entry;
|
||||
cpu_relax();
|
||||
} while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(llist_add);
|
||||
|
||||
/**
|
||||
* llist_add_batch - add several linked entries in batch
|
||||
* @new_first: first entry in batch to be added
|
||||
* @new_last: last entry in batch to be added
|
||||
* @head: the head for your lock-less list
|
||||
*/
|
||||
void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
|
||||
struct llist_head *head)
|
||||
{
|
||||
struct llist_node *entry, *old_entry;
|
||||
|
||||
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
BUG_ON(in_nmi());
|
||||
#endif
|
||||
|
||||
entry = head->first;
|
||||
do {
|
||||
old_entry = entry;
|
||||
new_last->next = entry;
|
||||
cpu_relax();
|
||||
} while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(llist_add_batch);
|
||||
|
||||
/**
|
||||
* llist_del_first - delete the first entry of lock-less list
|
||||
* @head: the head for your lock-less list
|
||||
*
|
||||
* If list is empty, return NULL, otherwise, return the first entry
|
||||
* deleted, this is the newest added one.
|
||||
*
|
||||
* Only one llist_del_first user can be used simultaneously with
|
||||
* multiple llist_add users without lock. Because otherwise
|
||||
* llist_del_first, llist_add, llist_add (or llist_del_all, llist_add,
|
||||
* llist_add) sequence in another user may change @head->first->next,
|
||||
* but keep @head->first. If multiple consumers are needed, please
|
||||
* use llist_del_all or use lock between consumers.
|
||||
*/
|
||||
struct llist_node *llist_del_first(struct llist_head *head)
|
||||
{
|
||||
struct llist_node *entry, *old_entry, *next;
|
||||
|
||||
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
BUG_ON(in_nmi());
|
||||
#endif
|
||||
|
||||
entry = head->first;
|
||||
do {
|
||||
if (entry == NULL)
|
||||
return NULL;
|
||||
old_entry = entry;
|
||||
next = entry->next;
|
||||
cpu_relax();
|
||||
} while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry);
|
||||
|
||||
return entry;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(llist_del_first);
|
||||
|
||||
/**
|
||||
* llist_del_all - delete all entries from lock-less list
|
||||
* @head: the head of lock-less list to delete all entries
|
||||
*
|
||||
* If list is empty, return NULL, otherwise, delete all entries and
|
||||
* return the pointer to the first entry. The order of entries
|
||||
* deleted is from the newest to the oldest added one.
|
||||
*/
|
||||
struct llist_node *llist_del_all(struct llist_head *head)
|
||||
{
|
||||
#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
BUG_ON(in_nmi());
|
||||
#endif
|
||||
|
||||
return xchg(&head->first, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(llist_del_all);
|
@ -53,6 +53,7 @@
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/memory_hotplug.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/kfifo.h>
|
||||
#include "internal.h"
|
||||
|
||||
int sysctl_memory_failure_early_kill __read_mostly = 0;
|
||||
@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno)
|
||||
__memory_failure(pfn, trapno, 0);
|
||||
}
|
||||
|
||||
#define MEMORY_FAILURE_FIFO_ORDER 4
|
||||
#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
|
||||
|
||||
struct memory_failure_entry {
|
||||
unsigned long pfn;
|
||||
int trapno;
|
||||
int flags;
|
||||
};
|
||||
|
||||
struct memory_failure_cpu {
|
||||
DECLARE_KFIFO(fifo, struct memory_failure_entry,
|
||||
MEMORY_FAILURE_FIFO_SIZE);
|
||||
spinlock_t lock;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu);
|
||||
|
||||
/**
|
||||
* memory_failure_queue - Schedule handling memory failure of a page.
|
||||
* @pfn: Page Number of the corrupted page
|
||||
* @trapno: Trap number reported in the signal to user space.
|
||||
* @flags: Flags for memory failure handling
|
||||
*
|
||||
* This function is called by the low level hardware error handler
|
||||
* when it detects hardware memory corruption of a page. It schedules
|
||||
* the recovering of error page, including dropping pages, killing
|
||||
* processes etc.
|
||||
*
|
||||
* The function is primarily of use for corruptions that
|
||||
* happen outside the current execution context (e.g. when
|
||||
* detected by a background scrubber)
|
||||
*
|
||||
* Can run in IRQ context.
|
||||
*/
|
||||
void memory_failure_queue(unsigned long pfn, int trapno, int flags)
|
||||
{
|
||||
struct memory_failure_cpu *mf_cpu;
|
||||
unsigned long proc_flags;
|
||||
struct memory_failure_entry entry = {
|
||||
.pfn = pfn,
|
||||
.trapno = trapno,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
mf_cpu = &get_cpu_var(memory_failure_cpu);
|
||||
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
|
||||
if (kfifo_put(&mf_cpu->fifo, &entry))
|
||||
schedule_work_on(smp_processor_id(), &mf_cpu->work);
|
||||
else
|
||||
pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n",
|
||||
pfn);
|
||||
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
|
||||
put_cpu_var(memory_failure_cpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(memory_failure_queue);
|
||||
|
||||
static void memory_failure_work_func(struct work_struct *work)
|
||||
{
|
||||
struct memory_failure_cpu *mf_cpu;
|
||||
struct memory_failure_entry entry = { 0, };
|
||||
unsigned long proc_flags;
|
||||
int gotten;
|
||||
|
||||
mf_cpu = &__get_cpu_var(memory_failure_cpu);
|
||||
for (;;) {
|
||||
spin_lock_irqsave(&mf_cpu->lock, proc_flags);
|
||||
gotten = kfifo_get(&mf_cpu->fifo, &entry);
|
||||
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
|
||||
if (!gotten)
|
||||
break;
|
||||
__memory_failure(entry.pfn, entry.trapno, entry.flags);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init memory_failure_init(void)
|
||||
{
|
||||
struct memory_failure_cpu *mf_cpu;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
|
||||
spin_lock_init(&mf_cpu->lock);
|
||||
INIT_KFIFO(mf_cpu->fifo);
|
||||
INIT_WORK(&mf_cpu->work, memory_failure_work_func);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
core_initcall(memory_failure_init);
|
||||
|
||||
/**
|
||||
* unpoison_memory - Unpoison a previously poisoned page
|
||||
* @pfn: Page number of the to be unpoisoned page
|
||||
|
Loading…
Reference in New Issue
Block a user