[PATCH] kexec: add kexec syscalls

This patch introduces the architecture independent implementation the
sys_kexec_load, the compat_sys_kexec_load system calls.

Kexec on panic support has been integrated into the core patch and is
relatively clean.

In addition the hopefully architecture independent option
crashkernel=size@location has been docuemented.  It's purpose is to reserve
space for the panic kernel to live, and where no DMA transfer will ever be
setup to access.

Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Alexander Nyberg <alexn@telia.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Eric W. Biederman 2005-06-25 14:57:52 -07:00 committed by Linus Torvalds
parent d0537508a9
commit dc009d9243
10 changed files with 1227 additions and 4 deletions

View File

@ -358,6 +358,10 @@ running once the system is up.
cpia_pp= [HW,PPT] cpia_pp= [HW,PPT]
Format: { parport<nr> | auto | none } Format: { parport<nr> | auto | none }
crashkernel=nn[KMG]@ss[KMG]
[KNL] Reserve a chunk of physical memory to
hold a kernel to switch to with kexec on panic.
cs4232= [HW,OSS] cs4232= [HW,OSS]
Format: <io>,<irq>,<dma>,<dma2>,<mpuio>,<mpuirq> Format: <io>,<irq>,<dma>,<dma2>,<mpuio>,<mpuirq>

View File

@ -1330,6 +1330,16 @@ M: rml@novell.com
L: linux-kernel@vger.kernel.org L: linux-kernel@vger.kernel.org
S: Maintained S: Maintained
KEXEC
P: Eric Biederman
P: Randy Dunlap
M: ebiederm@xmission.com
M: rddunlap@osdl.org
W: http://www.xmission.com/~ebiederm/files/kexec/
L: linux-kernel@vger.kernel.org
L: fastboot@osdl.org
S: Maintained
LANMEDIA WAN CARD DRIVER LANMEDIA WAN CARD DRIVER
P: Andrew Stanley-Jones P: Andrew Stanley-Jones
M: asj@lanmedia.com M: asj@lanmedia.com

127
include/linux/kexec.h Normal file
View File

@ -0,0 +1,127 @@
#ifndef LINUX_KEXEC_H
#define LINUX_KEXEC_H
#ifdef CONFIG_KEXEC
#include <linux/types.h>
#include <linux/list.h>
#include <linux/linkage.h>
#include <linux/compat.h>
#include <asm/kexec.h>
/* Verify architecture specific macros are defined */
#ifndef KEXEC_SOURCE_MEMORY_LIMIT
#error KEXEC_SOURCE_MEMORY_LIMIT not defined
#endif
#ifndef KEXEC_DESTINATION_MEMORY_LIMIT
#error KEXEC_DESTINATION_MEMORY_LIMIT not defined
#endif
#ifndef KEXEC_CONTROL_MEMORY_LIMIT
#error KEXEC_CONTROL_MEMORY_LIMIT not defined
#endif
#ifndef KEXEC_CONTROL_CODE_SIZE
#error KEXEC_CONTROL_CODE_SIZE not defined
#endif
#ifndef KEXEC_ARCH
#error KEXEC_ARCH not defined
#endif
/*
* This structure is used to hold the arguments that are used when loading
* kernel binaries.
*/
typedef unsigned long kimage_entry_t;
#define IND_DESTINATION 0x1
#define IND_INDIRECTION 0x2
#define IND_DONE 0x4
#define IND_SOURCE 0x8
#define KEXEC_SEGMENT_MAX 8
struct kexec_segment {
void __user *buf;
size_t bufsz;
unsigned long mem; /* User space sees this as a (void *) ... */
size_t memsz;
};
#ifdef CONFIG_COMPAT
struct compat_kexec_segment {
compat_uptr_t buf;
compat_size_t bufsz;
compat_ulong_t mem; /* User space sees this as a (void *) ... */
compat_size_t memsz;
};
#endif
struct kimage {
kimage_entry_t head;
kimage_entry_t *entry;
kimage_entry_t *last_entry;
unsigned long destination;
unsigned long start;
struct page *control_code_page;
unsigned long nr_segments;
struct kexec_segment segment[KEXEC_SEGMENT_MAX];
struct list_head control_pages;
struct list_head dest_pages;
struct list_head unuseable_pages;
/* Address of next control page to allocate for crash kernels. */
unsigned long control_page;
/* Flags to indicate special processing */
unsigned int type : 1;
#define KEXEC_TYPE_DEFAULT 0
#define KEXEC_TYPE_CRASH 1
};
/* kexec interface functions */
extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET;
extern int machine_kexec_prepare(struct kimage *image);
extern void machine_kexec_cleanup(struct kimage *image);
extern asmlinkage long sys_kexec_load(unsigned long entry,
unsigned long nr_segments, struct kexec_segment __user *segments,
unsigned long flags);
#ifdef CONFIG_COMPAT
extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
unsigned long nr_segments, struct compat_kexec_segment __user *segments,
unsigned long flags);
#endif
extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order);
extern void crash_kexec(void);
extern struct kimage *kexec_image;
#define KEXEC_ON_CRASH 0x00000001
#define KEXEC_ARCH_MASK 0xffff0000
/* These values match the ELF architecture values.
* Unless there is a good reason that should continue to be the case.
*/
#define KEXEC_ARCH_DEFAULT ( 0 << 16)
#define KEXEC_ARCH_386 ( 3 << 16)
#define KEXEC_ARCH_X86_64 (62 << 16)
#define KEXEC_ARCH_PPC (20 << 16)
#define KEXEC_ARCH_PPC64 (21 << 16)
#define KEXEC_ARCH_IA_64 (50 << 16)
#define KEXEC_FLAGS (KEXEC_ON_CRASH) /* List of defined/legal kexec flags */
/* Location of a reserved region to hold the crash kernel.
*/
extern struct resource crashk_res;
#else /* !CONFIG_KEXEC */
static inline void crash_kexec(void) { }
#endif /* CONFIG_KEXEC */
#endif /* LINUX_KEXEC_H */

View File

@ -51,6 +51,9 @@ extern void machine_restart(char *cmd);
extern void machine_halt(void); extern void machine_halt(void);
extern void machine_power_off(void); extern void machine_power_off(void);
extern void machine_shutdown(void);
extern void machine_crash_shutdown(void);
#endif #endif
#endif /* _LINUX_REBOOT_H */ #endif /* _LINUX_REBOOT_H */

View File

@ -159,8 +159,9 @@ asmlinkage long sys_shutdown(int, int);
asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd,
void __user *arg); void __user *arg);
asmlinkage long sys_restart_syscall(void); asmlinkage long sys_restart_syscall(void);
asmlinkage long sys_kexec_load(void *entry, unsigned long nr_segments, asmlinkage long sys_kexec_load(unsigned long entry,
struct kexec_segment *segments, unsigned long flags); unsigned long nr_segments, struct kexec_segment __user *segments,
unsigned long flags);
asmlinkage long sys_exit(int error_code); asmlinkage long sys_exit(int error_code);
asmlinkage void sys_exit_group(int error_code); asmlinkage void sys_exit_group(int error_code);

View File

@ -17,6 +17,7 @@ obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_KALLSYMS) += kallsyms.o
obj-$(CONFIG_PM) += power/ obj-$(CONFIG_PM) += power/
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_IKCONFIG) += configs.o

1036
kernel/kexec.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -18,6 +18,7 @@
#include <linux/sysrq.h> #include <linux/sysrq.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/nmi.h> #include <linux/nmi.h>
#include <linux/kexec.h>
int panic_timeout; int panic_timeout;
int panic_on_oops; int panic_on_oops;
@ -63,6 +64,13 @@ NORET_TYPE void panic(const char * fmt, ...)
unsigned long caller = (unsigned long) __builtin_return_address(0); unsigned long caller = (unsigned long) __builtin_return_address(0);
#endif #endif
/*
* It's possible to come here directly from a panic-assertion and not
* have preempt disabled. Some functions called from here want
* preempt to be disabled. No point enabling it later though...
*/
preempt_disable();
bust_spinlocks(1); bust_spinlocks(1);
va_start(args, fmt); va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args); vsnprintf(buf, sizeof(buf), fmt, args);
@ -70,7 +78,19 @@ NORET_TYPE void panic(const char * fmt, ...)
printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
bust_spinlocks(0); bust_spinlocks(0);
/*
* If we have crashed and we have a crash kernel loaded let it handle
* everything else.
* Do we want to call this before we try to display a message?
*/
crash_kexec();
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/*
* Note smp_send_stop is the usual smp shutdown function, which
* unfortunately means it may not be hardened to work in a panic
* situation.
*/
smp_send_stop(); smp_send_stop();
#endif #endif
@ -79,8 +99,7 @@ NORET_TYPE void panic(const char * fmt, ...)
if (!panic_blink) if (!panic_blink)
panic_blink = no_blink; panic_blink = no_blink;
if (panic_timeout > 0) if (panic_timeout > 0) {
{
/* /*
* Delay timeout seconds before rebooting the machine. * Delay timeout seconds before rebooting the machine.
* We can't use the "normal" timers since we just panicked.. * We can't use the "normal" timers since we just panicked..

View File

@ -16,6 +16,8 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/highuid.h> #include <linux/highuid.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/key.h> #include <linux/key.h>
@ -439,6 +441,24 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
machine_restart(buffer); machine_restart(buffer);
break; break;
#ifdef CONFIG_KEXEC
case LINUX_REBOOT_CMD_KEXEC:
{
struct kimage *image;
image = xchg(&kexec_image, 0);
if (!image) {
unlock_kernel();
return -EINVAL;
}
notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
system_state = SYSTEM_RESTART;
device_shutdown();
printk(KERN_EMERG "Starting new kernel\n");
machine_shutdown();
machine_kexec(image);
break;
}
#endif
#ifdef CONFIG_SOFTWARE_SUSPEND #ifdef CONFIG_SOFTWARE_SUSPEND
case LINUX_REBOOT_CMD_SW_SUSPEND: case LINUX_REBOOT_CMD_SW_SUSPEND:
{ {

View File

@ -18,6 +18,8 @@ cond_syscall(sys_acct);
cond_syscall(sys_lookup_dcookie); cond_syscall(sys_lookup_dcookie);
cond_syscall(sys_swapon); cond_syscall(sys_swapon);
cond_syscall(sys_swapoff); cond_syscall(sys_swapoff);
cond_syscall(sys_kexec_load);
cond_syscall(compat_sys_kexec_load);
cond_syscall(sys_init_module); cond_syscall(sys_init_module);
cond_syscall(sys_delete_module); cond_syscall(sys_delete_module);
cond_syscall(sys_socketpair); cond_syscall(sys_socketpair);