mirror of
https://github.com/torvalds/linux.git
synced 2024-12-28 13:51:44 +00:00
Merge branch 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull misc x86 updates from Ingo Molnar: "Misc changes: - Enhance #GP fault printouts by distinguishing between canonical and non-canonical address faults, and also add KASAN fault decoding. - Fix/enhance the x86 NMI handler by putting the duration check into a direct function call instead of an irq_work which we know to be broken in some cases. - Clean up do_general_protection() a bit" * 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/nmi: Remove irq_work from the long duration NMI handler x86/traps: Cleanup do_general_protection() x86/kasan: Print original address on #GP x86/dumpstack: Introduce die_addr() for die() with #GP fault address x86/traps: Print address on #GP x86/insn-eval: Add support for 64-bit kernel mode
This commit is contained in:
commit
f6170f0afb
@ -33,6 +33,7 @@ enum show_regs_mode {
|
||||
};
|
||||
|
||||
extern void die(const char *, struct pt_regs *,long);
|
||||
void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr);
|
||||
extern int __must_check __die(const char *, struct pt_regs *, long);
|
||||
extern void show_stack_regs(struct pt_regs *regs);
|
||||
extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
|
||||
|
@ -41,7 +41,6 @@ struct nmiaction {
|
||||
struct list_head list;
|
||||
nmi_handler_t handler;
|
||||
u64 max_duration;
|
||||
struct irq_work irq_work;
|
||||
unsigned long flags;
|
||||
const char *name;
|
||||
};
|
||||
|
@ -159,6 +159,19 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine whether the register set came from any context that is running in
|
||||
* 64-bit mode.
|
||||
*/
|
||||
static inline bool any_64bit_mode(struct pt_regs *regs)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return !user_mode(regs) || user_64bit_mode(regs);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define current_user_stack_pointer() current_pt_regs()->sp
|
||||
#define compat_user_stack_pointer() current_pt_regs()->sp
|
||||
|
@ -365,7 +365,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
|
||||
}
|
||||
NOKPROBE_SYMBOL(oops_end);
|
||||
|
||||
int __die(const char *str, struct pt_regs *regs, long err)
|
||||
static void __die_header(const char *str, struct pt_regs *regs, long err)
|
||||
{
|
||||
const char *pr = "";
|
||||
|
||||
@ -384,7 +384,11 @@ int __die(const char *str, struct pt_regs *regs, long err)
|
||||
IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
|
||||
IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
|
||||
(boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
|
||||
}
|
||||
NOKPROBE_SYMBOL(__die_header);
|
||||
|
||||
static int __die_body(const char *str, struct pt_regs *regs, long err)
|
||||
{
|
||||
show_regs(regs);
|
||||
print_modules();
|
||||
|
||||
@ -394,6 +398,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
|
||||
|
||||
return 0;
|
||||
}
|
||||
NOKPROBE_SYMBOL(__die_body);
|
||||
|
||||
int __die(const char *str, struct pt_regs *regs, long err)
|
||||
{
|
||||
__die_header(str, regs, err);
|
||||
return __die_body(str, regs, err);
|
||||
}
|
||||
NOKPROBE_SYMBOL(__die);
|
||||
|
||||
/*
|
||||
@ -410,6 +421,19 @@ void die(const char *str, struct pt_regs *regs, long err)
|
||||
oops_end(flags, regs, sig);
|
||||
}
|
||||
|
||||
void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr)
|
||||
{
|
||||
unsigned long flags = oops_begin();
|
||||
int sig = SIGSEGV;
|
||||
|
||||
__die_header(str, regs, err);
|
||||
if (gp_addr)
|
||||
kasan_non_canonical_hook(gp_addr);
|
||||
if (__die_body(str, regs, err))
|
||||
sig = 0;
|
||||
oops_end(flags, regs, sig);
|
||||
}
|
||||
|
||||
void show_regs(struct pt_regs *regs)
|
||||
{
|
||||
show_regs_print_info(KERN_DEFAULT);
|
||||
|
@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void)
|
||||
}
|
||||
fs_initcall(nmi_warning_debugfs);
|
||||
|
||||
static void nmi_max_handler(struct irq_work *w)
|
||||
static void nmi_check_duration(struct nmiaction *action, u64 duration)
|
||||
{
|
||||
struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
|
||||
u64 whole_msecs = READ_ONCE(action->max_duration);
|
||||
int remainder_ns, decimal_msecs;
|
||||
u64 whole_msecs = READ_ONCE(a->max_duration);
|
||||
|
||||
if (duration < nmi_longest_ns || duration < action->max_duration)
|
||||
return;
|
||||
|
||||
action->max_duration = duration;
|
||||
|
||||
remainder_ns = do_div(whole_msecs, (1000 * 1000));
|
||||
decimal_msecs = remainder_ns / 1000;
|
||||
|
||||
printk_ratelimited(KERN_INFO
|
||||
"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
|
||||
a->handler, whole_msecs, decimal_msecs);
|
||||
action->handler, whole_msecs, decimal_msecs);
|
||||
}
|
||||
|
||||
static int nmi_handle(unsigned int type, struct pt_regs *regs)
|
||||
@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs)
|
||||
delta = sched_clock() - delta;
|
||||
trace_nmi_handler(a->handler, (int)delta, thishandled);
|
||||
|
||||
if (delta < nmi_longest_ns || delta < a->max_duration)
|
||||
continue;
|
||||
|
||||
a->max_duration = delta;
|
||||
irq_work_queue(&a->irq_work);
|
||||
nmi_check_duration(a, delta);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
|
||||
if (!action->handler)
|
||||
return -EINVAL;
|
||||
|
||||
init_irq_work(&action->irq_work, nmi_max_handler);
|
||||
|
||||
raw_spin_lock_irqsave(&desc->lock, flags);
|
||||
|
||||
/*
|
||||
|
@ -56,6 +56,8 @@
|
||||
#include <asm/mpx.h>
|
||||
#include <asm/vm86.h>
|
||||
#include <asm/umip.h>
|
||||
#include <asm/insn.h>
|
||||
#include <asm/insn-eval.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/x86_init.h>
|
||||
@ -518,11 +520,57 @@ exit_trap:
|
||||
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL);
|
||||
}
|
||||
|
||||
dotraplinkage void
|
||||
do_general_protection(struct pt_regs *regs, long error_code)
|
||||
enum kernel_gp_hint {
|
||||
GP_NO_HINT,
|
||||
GP_NON_CANONICAL,
|
||||
GP_CANONICAL
|
||||
};
|
||||
|
||||
/*
|
||||
* When an uncaught #GP occurs, try to determine the memory address accessed by
|
||||
* the instruction and return that address to the caller. Also, try to figure
|
||||
* out whether any part of the access to that address was non-canonical.
|
||||
*/
|
||||
static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
|
||||
unsigned long *addr)
|
||||
{
|
||||
const char *desc = "general protection fault";
|
||||
u8 insn_buf[MAX_INSN_SIZE];
|
||||
struct insn insn;
|
||||
|
||||
if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
|
||||
return GP_NO_HINT;
|
||||
|
||||
kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
|
||||
insn_get_modrm(&insn);
|
||||
insn_get_sib(&insn);
|
||||
|
||||
*addr = (unsigned long)insn_get_addr_ref(&insn, regs);
|
||||
if (*addr == -1UL)
|
||||
return GP_NO_HINT;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Check that:
|
||||
* - the operand is not in the kernel half
|
||||
* - the last byte of the operand is not in the user canonical half
|
||||
*/
|
||||
if (*addr < ~__VIRTUAL_MASK &&
|
||||
*addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK)
|
||||
return GP_NON_CANONICAL;
|
||||
#endif
|
||||
|
||||
return GP_CANONICAL;
|
||||
}
|
||||
|
||||
#define GPFSTR "general protection fault"
|
||||
|
||||
dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
|
||||
enum kernel_gp_hint hint = GP_NO_HINT;
|
||||
struct task_struct *tsk;
|
||||
unsigned long gp_addr;
|
||||
int ret;
|
||||
|
||||
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
|
||||
cond_local_irq_enable(regs);
|
||||
@ -539,34 +587,56 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
||||
}
|
||||
|
||||
tsk = current;
|
||||
if (!user_mode(regs)) {
|
||||
if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
|
||||
return;
|
||||
|
||||
if (user_mode(regs)) {
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_GP;
|
||||
|
||||
/*
|
||||
* To be potentially processing a kprobe fault and to
|
||||
* trust the result from kprobe_running(), we have to
|
||||
* be non-preemptible.
|
||||
*/
|
||||
if (!preemptible() && kprobe_running() &&
|
||||
kprobe_fault_handler(regs, X86_TRAP_GP))
|
||||
return;
|
||||
show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
|
||||
force_sig(SIGSEGV);
|
||||
|
||||
if (notify_die(DIE_GPF, desc, regs, error_code,
|
||||
X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
|
||||
die(desc, regs, error_code);
|
||||
return;
|
||||
}
|
||||
|
||||
if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
|
||||
return;
|
||||
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_GP;
|
||||
|
||||
show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
|
||||
/*
|
||||
* To be potentially processing a kprobe fault and to trust the result
|
||||
* from kprobe_running(), we have to be non-preemptible.
|
||||
*/
|
||||
if (!preemptible() &&
|
||||
kprobe_running() &&
|
||||
kprobe_fault_handler(regs, X86_TRAP_GP))
|
||||
return;
|
||||
|
||||
ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV);
|
||||
if (ret == NOTIFY_STOP)
|
||||
return;
|
||||
|
||||
if (error_code)
|
||||
snprintf(desc, sizeof(desc), "segment-related " GPFSTR);
|
||||
else
|
||||
hint = get_kernel_gp_address(regs, &gp_addr);
|
||||
|
||||
if (hint != GP_NO_HINT)
|
||||
snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx",
|
||||
(hint == GP_NON_CANONICAL) ? "probably for non-canonical address"
|
||||
: "maybe for address",
|
||||
gp_addr);
|
||||
|
||||
/*
|
||||
* KASAN is interested only in the non-canonical case, clear it
|
||||
* otherwise.
|
||||
*/
|
||||
if (hint != GP_NON_CANONICAL)
|
||||
gp_addr = 0;
|
||||
|
||||
die_addr(desc, regs, error_code, gp_addr);
|
||||
|
||||
force_sig(SIGSEGV);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_general_protection);
|
||||
|
||||
|
@ -155,7 +155,7 @@ static bool check_seg_overrides(struct insn *insn, int regoff)
|
||||
*/
|
||||
static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off)
|
||||
{
|
||||
if (user_64bit_mode(regs))
|
||||
if (any_64bit_mode(regs))
|
||||
return INAT_SEG_REG_IGNORE;
|
||||
/*
|
||||
* Resolve the default segment register as described in Section 3.7.4
|
||||
@ -266,7 +266,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
|
||||
* which may be invalid at this point.
|
||||
*/
|
||||
if (regoff == offsetof(struct pt_regs, ip)) {
|
||||
if (user_64bit_mode(regs))
|
||||
if (any_64bit_mode(regs))
|
||||
return INAT_SEG_REG_IGNORE;
|
||||
else
|
||||
return INAT_SEG_REG_CS;
|
||||
@ -289,7 +289,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
|
||||
* In long mode, segment override prefixes are ignored, except for
|
||||
* overrides for FS and GS.
|
||||
*/
|
||||
if (user_64bit_mode(regs)) {
|
||||
if (any_64bit_mode(regs)) {
|
||||
if (idx != INAT_SEG_REG_FS &&
|
||||
idx != INAT_SEG_REG_GS)
|
||||
idx = INAT_SEG_REG_IGNORE;
|
||||
@ -646,23 +646,27 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
|
||||
*/
|
||||
return (unsigned long)(sel << 4);
|
||||
|
||||
if (user_64bit_mode(regs)) {
|
||||
if (any_64bit_mode(regs)) {
|
||||
/*
|
||||
* Only FS or GS will have a base address, the rest of
|
||||
* the segments' bases are forced to 0.
|
||||
*/
|
||||
unsigned long base;
|
||||
|
||||
if (seg_reg_idx == INAT_SEG_REG_FS)
|
||||
if (seg_reg_idx == INAT_SEG_REG_FS) {
|
||||
rdmsrl(MSR_FS_BASE, base);
|
||||
else if (seg_reg_idx == INAT_SEG_REG_GS)
|
||||
} else if (seg_reg_idx == INAT_SEG_REG_GS) {
|
||||
/*
|
||||
* swapgs was called at the kernel entry point. Thus,
|
||||
* MSR_KERNEL_GS_BASE will have the user-space GS base.
|
||||
*/
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, base);
|
||||
else
|
||||
if (user_mode(regs))
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, base);
|
||||
else
|
||||
rdmsrl(MSR_GS_BASE, base);
|
||||
} else {
|
||||
base = 0;
|
||||
}
|
||||
return base;
|
||||
}
|
||||
|
||||
@ -703,7 +707,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
|
||||
if (sel < 0)
|
||||
return 0;
|
||||
|
||||
if (user_64bit_mode(regs) || v8086_mode(regs))
|
||||
if (any_64bit_mode(regs) || v8086_mode(regs))
|
||||
return -1L;
|
||||
|
||||
if (!sel)
|
||||
@ -948,7 +952,7 @@ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs,
|
||||
* following instruction.
|
||||
*/
|
||||
if (*regoff == -EDOM) {
|
||||
if (user_64bit_mode(regs))
|
||||
if (any_64bit_mode(regs))
|
||||
tmp = regs->ip + insn->length;
|
||||
else
|
||||
tmp = 0;
|
||||
@ -1250,7 +1254,7 @@ static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs)
|
||||
* After computed, the effective address is treated as an unsigned
|
||||
* quantity.
|
||||
*/
|
||||
if (!user_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
|
||||
if (!any_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
|
@ -288,23 +288,6 @@ static void __init kasan_shallow_populate_pgds(void *start, void *end)
|
||||
} while (pgd++, addr = next, addr != (unsigned long)end);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN_INLINE
|
||||
static int kasan_die_handler(struct notifier_block *self,
|
||||
unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
if (val == DIE_GPF) {
|
||||
pr_emerg("CONFIG_KASAN_INLINE enabled\n");
|
||||
pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n");
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block kasan_die_notifier = {
|
||||
.notifier_call = kasan_die_handler,
|
||||
};
|
||||
#endif
|
||||
|
||||
void __init kasan_early_init(void)
|
||||
{
|
||||
int i;
|
||||
@ -341,10 +324,6 @@ void __init kasan_init(void)
|
||||
int i;
|
||||
void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
|
||||
|
||||
#ifdef CONFIG_KASAN_INLINE
|
||||
register_die_notifier(&kasan_die_notifier);
|
||||
#endif
|
||||
|
||||
memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
|
||||
|
||||
/*
|
||||
|
@ -228,4 +228,10 @@ static inline void kasan_release_vmalloc(unsigned long start,
|
||||
unsigned long free_region_end) {}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN_INLINE
|
||||
void kasan_non_canonical_hook(unsigned long addr);
|
||||
#else /* CONFIG_KASAN_INLINE */
|
||||
static inline void kasan_non_canonical_hook(unsigned long addr) { }
|
||||
#endif /* CONFIG_KASAN_INLINE */
|
||||
|
||||
#endif /* LINUX_KASAN_H */
|
||||
|
@ -512,3 +512,43 @@ void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned lon
|
||||
|
||||
end_report(&flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN_INLINE
|
||||
/*
|
||||
* With CONFIG_KASAN_INLINE, accesses to bogus pointers (outside the high
|
||||
* canonical half of the address space) cause out-of-bounds shadow memory reads
|
||||
* before the actual access. For addresses in the low canonical half of the
|
||||
* address space, as well as most non-canonical addresses, that out-of-bounds
|
||||
* shadow memory access lands in the non-canonical part of the address space.
|
||||
* Help the user figure out what the original bogus pointer was.
|
||||
*/
|
||||
void kasan_non_canonical_hook(unsigned long addr)
|
||||
{
|
||||
unsigned long orig_addr;
|
||||
const char *bug_type;
|
||||
|
||||
if (addr < KASAN_SHADOW_OFFSET)
|
||||
return;
|
||||
|
||||
orig_addr = (addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT;
|
||||
/*
|
||||
* For faults near the shadow address for NULL, we can be fairly certain
|
||||
* that this is a KASAN shadow memory access.
|
||||
* For faults that correspond to shadow for low canonical addresses, we
|
||||
* can still be pretty sure - that shadow region is a fairly narrow
|
||||
* chunk of the non-canonical address space.
|
||||
* But faults that look like shadow for non-canonical addresses are a
|
||||
* really large chunk of the address space. In that case, we still
|
||||
* print the decoded address, but make it clear that this is not
|
||||
* necessarily what's actually going on.
|
||||
*/
|
||||
if (orig_addr < PAGE_SIZE)
|
||||
bug_type = "null-ptr-deref";
|
||||
else if (orig_addr < TASK_SIZE)
|
||||
bug_type = "probably user-memory-access";
|
||||
else
|
||||
bug_type = "maybe wild-memory-access";
|
||||
pr_alert("KASAN: %s in range [0x%016lx-0x%016lx]\n", bug_type,
|
||||
orig_addr, orig_addr + KASAN_SHADOW_MASK);
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user