Merge branch 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull misc x86 updates from Ingo Molnar:
 "Misc changes:

   - Enhance #GP fault printouts by distinguishing between canonical and
     non-canonical address faults, and also add KASAN fault decoding.

   - Fix/enhance the x86 NMI handler by putting the duration check into
     a direct function call instead of an irq_work which we know to be
     broken in some cases.

   - Clean up do_general_protection() a bit"

* 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/nmi: Remove irq_work from the long duration NMI handler
  x86/traps: Cleanup do_general_protection()
  x86/kasan: Print original address on #GP
  x86/dumpstack: Introduce die_addr() for die() with #GP fault address
  x86/traps: Print address on #GP
  x86/insn-eval: Add support for 64-bit kernel mode
This commit is contained in:
Linus Torvalds 2020-01-28 12:28:06 -08:00
commit f6170f0afb
10 changed files with 198 additions and 64 deletions

View File

@ -33,6 +33,7 @@ enum show_regs_mode {
};
extern void die(const char *, struct pt_regs *,long);
void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);

View File

@ -41,7 +41,6 @@ struct nmiaction {
struct list_head list;
nmi_handler_t handler;
u64 max_duration;
struct irq_work irq_work;
unsigned long flags;
const char *name;
};

View File

@ -159,6 +159,19 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#endif
}
/*
* Determine whether the register set came from any context that is running in
* 64-bit mode.
*/
static inline bool any_64bit_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_64
return !user_mode(regs) || user_64bit_mode(regs);
#else
return false;
#endif
}
#ifdef CONFIG_X86_64
#define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp

View File

@ -365,7 +365,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
}
NOKPROBE_SYMBOL(oops_end);
int __die(const char *str, struct pt_regs *regs, long err)
static void __die_header(const char *str, struct pt_regs *regs, long err)
{
const char *pr = "";
@ -384,7 +384,11 @@ int __die(const char *str, struct pt_regs *regs, long err)
IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
(boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
}
NOKPROBE_SYMBOL(__die_header);
static int __die_body(const char *str, struct pt_regs *regs, long err)
{
show_regs(regs);
print_modules();
@ -394,6 +398,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
return 0;
}
NOKPROBE_SYMBOL(__die_body);
int __die(const char *str, struct pt_regs *regs, long err)
{
__die_header(str, regs, err);
return __die_body(str, regs, err);
}
NOKPROBE_SYMBOL(__die);
/*
@ -410,6 +421,19 @@ void die(const char *str, struct pt_regs *regs, long err)
oops_end(flags, regs, sig);
}
void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr)
{
unsigned long flags = oops_begin();
int sig = SIGSEGV;
__die_header(str, regs, err);
if (gp_addr)
kasan_non_canonical_hook(gp_addr);
if (__die_body(str, regs, err))
sig = 0;
oops_end(flags, regs, sig);
}
void show_regs(struct pt_regs *regs)
{
show_regs_print_info(KERN_DEFAULT);

View File

@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void)
}
fs_initcall(nmi_warning_debugfs);
static void nmi_max_handler(struct irq_work *w)
static void nmi_check_duration(struct nmiaction *action, u64 duration)
{
struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
u64 whole_msecs = READ_ONCE(action->max_duration);
int remainder_ns, decimal_msecs;
u64 whole_msecs = READ_ONCE(a->max_duration);
if (duration < nmi_longest_ns || duration < action->max_duration)
return;
action->max_duration = duration;
remainder_ns = do_div(whole_msecs, (1000 * 1000));
decimal_msecs = remainder_ns / 1000;
printk_ratelimited(KERN_INFO
"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
a->handler, whole_msecs, decimal_msecs);
action->handler, whole_msecs, decimal_msecs);
}
static int nmi_handle(unsigned int type, struct pt_regs *regs)
@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs)
delta = sched_clock() - delta;
trace_nmi_handler(a->handler, (int)delta, thishandled);
if (delta < nmi_longest_ns || delta < a->max_duration)
continue;
a->max_duration = delta;
irq_work_queue(&a->irq_work);
nmi_check_duration(a, delta);
}
rcu_read_unlock();
@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
if (!action->handler)
return -EINVAL;
init_irq_work(&action->irq_work, nmi_max_handler);
raw_spin_lock_irqsave(&desc->lock, flags);
/*

View File

@ -56,6 +56,8 @@
#include <asm/mpx.h>
#include <asm/vm86.h>
#include <asm/umip.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
@ -518,11 +520,57 @@ exit_trap:
do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL);
}
dotraplinkage void
do_general_protection(struct pt_regs *regs, long error_code)
enum kernel_gp_hint {
GP_NO_HINT,
GP_NON_CANONICAL,
GP_CANONICAL
};
/*
* When an uncaught #GP occurs, try to determine the memory address accessed by
* the instruction and return that address to the caller. Also, try to figure
* out whether any part of the access to that address was non-canonical.
*/
static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
unsigned long *addr)
{
const char *desc = "general protection fault";
u8 insn_buf[MAX_INSN_SIZE];
struct insn insn;
if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
return GP_NO_HINT;
kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
insn_get_modrm(&insn);
insn_get_sib(&insn);
*addr = (unsigned long)insn_get_addr_ref(&insn, regs);
if (*addr == -1UL)
return GP_NO_HINT;
#ifdef CONFIG_X86_64
/*
* Check that:
* - the operand is not in the kernel half
* - the last byte of the operand is not in the user canonical half
*/
if (*addr < ~__VIRTUAL_MASK &&
*addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK)
return GP_NON_CANONICAL;
#endif
return GP_CANONICAL;
}
#define GPFSTR "general protection fault"
dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code)
{
char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
enum kernel_gp_hint hint = GP_NO_HINT;
struct task_struct *tsk;
unsigned long gp_addr;
int ret;
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
cond_local_irq_enable(regs);
@ -539,34 +587,56 @@ do_general_protection(struct pt_regs *regs, long error_code)
}
tsk = current;
if (!user_mode(regs)) {
if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
return;
if (user_mode(regs)) {
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
/*
* To be potentially processing a kprobe fault and to
* trust the result from kprobe_running(), we have to
* be non-preemptible.
*/
if (!preemptible() && kprobe_running() &&
kprobe_fault_handler(regs, X86_TRAP_GP))
return;
show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
force_sig(SIGSEGV);
if (notify_die(DIE_GPF, desc, regs, error_code,
X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
die(desc, regs, error_code);
return;
}
if (fixup_exception(regs, X86_TRAP_GP, error_code, 0))
return;
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
/*
* To be potentially processing a kprobe fault and to trust the result
* from kprobe_running(), we have to be non-preemptible.
*/
if (!preemptible() &&
kprobe_running() &&
kprobe_fault_handler(regs, X86_TRAP_GP))
return;
ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV);
if (ret == NOTIFY_STOP)
return;
if (error_code)
snprintf(desc, sizeof(desc), "segment-related " GPFSTR);
else
hint = get_kernel_gp_address(regs, &gp_addr);
if (hint != GP_NO_HINT)
snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx",
(hint == GP_NON_CANONICAL) ? "probably for non-canonical address"
: "maybe for address",
gp_addr);
/*
* KASAN is interested only in the non-canonical case, clear it
* otherwise.
*/
if (hint != GP_NON_CANONICAL)
gp_addr = 0;
die_addr(desc, regs, error_code, gp_addr);
force_sig(SIGSEGV);
}
NOKPROBE_SYMBOL(do_general_protection);

View File

@ -155,7 +155,7 @@ static bool check_seg_overrides(struct insn *insn, int regoff)
*/
static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off)
{
if (user_64bit_mode(regs))
if (any_64bit_mode(regs))
return INAT_SEG_REG_IGNORE;
/*
* Resolve the default segment register as described in Section 3.7.4
@ -266,7 +266,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
* which may be invalid at this point.
*/
if (regoff == offsetof(struct pt_regs, ip)) {
if (user_64bit_mode(regs))
if (any_64bit_mode(regs))
return INAT_SEG_REG_IGNORE;
else
return INAT_SEG_REG_CS;
@ -289,7 +289,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff)
* In long mode, segment override prefixes are ignored, except for
* overrides for FS and GS.
*/
if (user_64bit_mode(regs)) {
if (any_64bit_mode(regs)) {
if (idx != INAT_SEG_REG_FS &&
idx != INAT_SEG_REG_GS)
idx = INAT_SEG_REG_IGNORE;
@ -646,23 +646,27 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
*/
return (unsigned long)(sel << 4);
if (user_64bit_mode(regs)) {
if (any_64bit_mode(regs)) {
/*
* Only FS or GS will have a base address, the rest of
* the segments' bases are forced to 0.
*/
unsigned long base;
if (seg_reg_idx == INAT_SEG_REG_FS)
if (seg_reg_idx == INAT_SEG_REG_FS) {
rdmsrl(MSR_FS_BASE, base);
else if (seg_reg_idx == INAT_SEG_REG_GS)
} else if (seg_reg_idx == INAT_SEG_REG_GS) {
/*
* swapgs was called at the kernel entry point. Thus,
* MSR_KERNEL_GS_BASE will have the user-space GS base.
*/
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
if (user_mode(regs))
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
rdmsrl(MSR_GS_BASE, base);
} else {
base = 0;
}
return base;
}
@ -703,7 +707,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx)
if (sel < 0)
return 0;
if (user_64bit_mode(regs) || v8086_mode(regs))
if (any_64bit_mode(regs) || v8086_mode(regs))
return -1L;
if (!sel)
@ -948,7 +952,7 @@ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs,
* following instruction.
*/
if (*regoff == -EDOM) {
if (user_64bit_mode(regs))
if (any_64bit_mode(regs))
tmp = regs->ip + insn->length;
else
tmp = 0;
@ -1250,7 +1254,7 @@ static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs)
* After computed, the effective address is treated as an unsigned
* quantity.
*/
if (!user_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
if (!any_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit))
goto out;
/*

View File

@ -288,23 +288,6 @@ static void __init kasan_shallow_populate_pgds(void *start, void *end)
} while (pgd++, addr = next, addr != (unsigned long)end);
}
#ifdef CONFIG_KASAN_INLINE
static int kasan_die_handler(struct notifier_block *self,
unsigned long val,
void *data)
{
if (val == DIE_GPF) {
pr_emerg("CONFIG_KASAN_INLINE enabled\n");
pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n");
}
return NOTIFY_OK;
}
static struct notifier_block kasan_die_notifier = {
.notifier_call = kasan_die_handler,
};
#endif
void __init kasan_early_init(void)
{
int i;
@ -341,10 +324,6 @@ void __init kasan_init(void)
int i;
void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
#ifdef CONFIG_KASAN_INLINE
register_die_notifier(&kasan_die_notifier);
#endif
memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
/*

View File

@ -228,4 +228,10 @@ static inline void kasan_release_vmalloc(unsigned long start,
unsigned long free_region_end) {}
#endif
#ifdef CONFIG_KASAN_INLINE
void kasan_non_canonical_hook(unsigned long addr);
#else /* CONFIG_KASAN_INLINE */
static inline void kasan_non_canonical_hook(unsigned long addr) { }
#endif /* CONFIG_KASAN_INLINE */
#endif /* LINUX_KASAN_H */

View File

@ -512,3 +512,43 @@ void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned lon
end_report(&flags);
}
#ifdef CONFIG_KASAN_INLINE
/*
* With CONFIG_KASAN_INLINE, accesses to bogus pointers (outside the high
* canonical half of the address space) cause out-of-bounds shadow memory reads
* before the actual access. For addresses in the low canonical half of the
* address space, as well as most non-canonical addresses, that out-of-bounds
* shadow memory access lands in the non-canonical part of the address space.
* Help the user figure out what the original bogus pointer was.
*/
void kasan_non_canonical_hook(unsigned long addr)
{
unsigned long orig_addr;
const char *bug_type;
if (addr < KASAN_SHADOW_OFFSET)
return;
orig_addr = (addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT;
/*
* For faults near the shadow address for NULL, we can be fairly certain
* that this is a KASAN shadow memory access.
* For faults that correspond to shadow for low canonical addresses, we
* can still be pretty sure - that shadow region is a fairly narrow
* chunk of the non-canonical address space.
* But faults that look like shadow for non-canonical addresses are a
* really large chunk of the address space. In that case, we still
* print the decoded address, but make it clear that this is not
* necessarily what's actually going on.
*/
if (orig_addr < PAGE_SIZE)
bug_type = "null-ptr-deref";
else if (orig_addr < TASK_SIZE)
bug_type = "probably user-memory-access";
else
bug_type = "maybe wild-memory-access";
pr_alert("KASAN: %s in range [0x%016lx-0x%016lx]\n", bug_type,
orig_addr, orig_addr + KASAN_SHADOW_MASK);
}
#endif