- A couple of SEV-ES fixes and robustifications: verify usermode stack

pointer in NMI is not coming from the syscall gap, correctly track IRQ
 states in the #VC handler and access user insn bytes atomically in same
 handler as latter cannot sleep.
 
 - Balance 32-bit fast syscall exit path to do the proper work on exit
 and thus not confuse audit and ptrace frameworks.
 
 - Two fixes for the ORC unwinder going "off the rails" into KASAN
 redzones and when ORC data is missing.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmBN+ksACgkQEsHwGGHe
 VUpYtBAAj199n50ipP2x+jjgCueIytMqwCCRozrgZ8JF0Al83piVfjhuAYQpfvD8
 cKCxN/jSEF0YoUg/grBTPLG6f0J4B2GoekSlSc3ljnuhBby4iJ9B4YgE7qym6tuT
 G/mBOuAo2HBzvB70i1BYPN6mrA+6SG1d4tIhRLGKHCz+hQm8yYnJYVbiOkLBECeP
 0QOOpX6hR5ytOOCRqwD/O5YIdZD8NvlA4sQE522Mrw/4PWz9XcS2kwpOQFHoRsFL
 if3t2yLMiGMfV0dyUCMoGZl0NqpnIZynoNdVPq/bllTW5obnmh6z8Eir44PzJmVJ
 ftVZTcReRqm5ObgwZh0g1H7CRjKe0xU9FyJHRmQl3Xb5g3wRAm3OkMJ2hQcOUPy9
 VOB4vp7kbDg3MmGJe2xOtsEeAyVHGzTaWlmZ0moxjJXiLTjUy69eelmvLepypO3P
 Bo/xpjn9hN7L9ptKv1exsSatQRN7KWTCxtz+NBJgC4pEpkdtDBkaWunIKeauPTZ2
 CAJJrp2sn7i5/CKPOuhjbQ+nSTMptpuZQxTDrjVUO0/6qs4ffQT3O+WXRV1bQ07v
 ObRqi0hIYgm4vSiBfVRfxOU+Zrx0j3kny4/xUs6CIjMjrjIp4RgBbqvZ95ZMooMi
 yeyZdVfzQ7PRaam5J2V3IHxKz7554hvMl5Zf4zAdl0qcQw3YZ0o=
 =rw8S
 -----END PGP SIGNATURE-----

Merge tag 'x86_urgent_for_v5.12_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:

 - A couple of SEV-ES fixes and robustifications: verify usermode stack
   pointer in NMI is not coming from the syscall gap, correctly track
   IRQ states in the #VC handler and access user insn bytes atomically
   in same handler as latter cannot sleep.

 - Balance 32-bit fast syscall exit path to do the proper work on exit
   and thus not confuse audit and ptrace frameworks.

 - Two fixes for the ORC unwinder going "off the rails" into KASAN
   redzones and when ORC data is missing.

* tag 'x86_urgent_for_v5.12_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/sev-es: Use __copy_from_user_inatomic()
  x86/sev-es: Correctly track IRQ states in runtime #VC handler
  x86/sev-es: Check regs->sp is trusted before adjusting #VC IST stack
  x86/sev-es: Introduce ip_within_syscall_gap() helper
  x86/entry: Fix entry/exit mismatch on failed fast 32-bit syscalls
  x86/unwind/orc: Silence warnings caused by missing ORC data
  x86/unwind/orc: Disable KASAN checking in the ORC unwinder, part 2
This commit is contained in:
Linus Torvalds 2021-03-14 12:48:10 -07:00
commit 0a7c10df49
9 changed files with 99 additions and 29 deletions

View File

@ -128,7 +128,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
regs->ax = -EFAULT; regs->ax = -EFAULT;
instrumentation_end(); instrumentation_end();
syscall_exit_to_user_mode(regs); local_irq_disable();
irqentry_exit_to_user_mode(regs);
return false; return false;
} }

View File

@ -210,6 +210,8 @@ SYM_CODE_START(entry_SYSCALL_compat)
/* Switch to the kernel stack */ /* Switch to the kernel stack */
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
/* Construct struct pt_regs on stack */ /* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */ pushq $__USER32_DS /* pt_regs->ss */
pushq %r8 /* pt_regs->sp */ pushq %r8 /* pt_regs->sp */

View File

@ -23,6 +23,8 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
int insn_get_code_seg_params(struct pt_regs *regs); int insn_get_code_seg_params(struct pt_regs *regs);
int insn_fetch_from_user(struct pt_regs *regs, int insn_fetch_from_user(struct pt_regs *regs,
unsigned char buf[MAX_INSN_SIZE]); unsigned char buf[MAX_INSN_SIZE]);
int insn_fetch_from_user_inatomic(struct pt_regs *regs,
unsigned char buf[MAX_INSN_SIZE]);
bool insn_decode(struct insn *insn, struct pt_regs *regs, bool insn_decode(struct insn *insn, struct pt_regs *regs,
unsigned char buf[MAX_INSN_SIZE], int buf_size); unsigned char buf[MAX_INSN_SIZE], int buf_size);

View File

@ -25,6 +25,7 @@ void __end_SYSENTER_singlestep_region(void);
void entry_SYSENTER_compat(void); void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void); void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void); void entry_SYSCALL_compat(void);
void entry_SYSCALL_compat_safe_stack(void);
void entry_INT80_compat(void); void entry_INT80_compat(void);
#ifdef CONFIG_XEN_PV #ifdef CONFIG_XEN_PV
void xen_entry_INT80_compat(void); void xen_entry_INT80_compat(void);

View File

@ -94,6 +94,8 @@ struct pt_regs {
#include <asm/paravirt_types.h> #include <asm/paravirt_types.h>
#endif #endif
#include <asm/proto.h>
struct cpuinfo_x86; struct cpuinfo_x86;
struct task_struct; struct task_struct;
@ -175,6 +177,19 @@ static inline bool any_64bit_mode(struct pt_regs *regs)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#define current_user_stack_pointer() current_pt_regs()->sp #define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp
static inline bool ip_within_syscall_gap(struct pt_regs *regs)
{
bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack);
#ifdef CONFIG_IA32_EMULATION
ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack);
#endif
return ret;
}
#endif #endif
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)

View File

@ -121,8 +121,18 @@ static void __init setup_vc_stacks(int cpu)
cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
} }
static __always_inline bool on_vc_stack(unsigned long sp) static __always_inline bool on_vc_stack(struct pt_regs *regs)
{ {
unsigned long sp = regs->sp;
/* User-mode RSP is not trusted */
if (user_mode(regs))
return false;
/* SYSCALL gap still has user-mode RSP */
if (ip_within_syscall_gap(regs))
return false;
return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
} }
@ -144,7 +154,7 @@ void noinstr __sev_es_ist_enter(struct pt_regs *regs)
old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
/* Make room on the IST stack */ /* Make room on the IST stack */
if (on_vc_stack(regs->sp)) if (on_vc_stack(regs))
new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist); new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
else else
new_ist = old_ist - sizeof(old_ist); new_ist = old_ist - sizeof(old_ist);
@ -248,7 +258,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
int res; int res;
if (user_mode(ctxt->regs)) { if (user_mode(ctxt->regs)) {
res = insn_fetch_from_user(ctxt->regs, buffer); res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
if (!res) { if (!res) {
ctxt->fi.vector = X86_TRAP_PF; ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
@ -1248,13 +1258,12 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
{ {
struct sev_es_runtime_data *data = this_cpu_read(runtime_data); struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
irqentry_state_t irq_state;
struct ghcb_state state; struct ghcb_state state;
struct es_em_ctxt ctxt; struct es_em_ctxt ctxt;
enum es_result result; enum es_result result;
struct ghcb *ghcb; struct ghcb *ghcb;
lockdep_assert_irqs_disabled();
/* /*
* Handle #DB before calling into !noinstr code to avoid recursive #DB. * Handle #DB before calling into !noinstr code to avoid recursive #DB.
*/ */
@ -1263,6 +1272,8 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
return; return;
} }
irq_state = irqentry_nmi_enter(regs);
lockdep_assert_irqs_disabled();
instrumentation_begin(); instrumentation_begin();
/* /*
@ -1325,6 +1336,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
out: out:
instrumentation_end(); instrumentation_end();
irqentry_nmi_exit(regs, irq_state);
return; return;

View File

@ -694,8 +694,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
* In the SYSCALL entry path the RSP value comes from user-space - don't * In the SYSCALL entry path the RSP value comes from user-space - don't
* trust it and switch to the current kernel stack * trust it and switch to the current kernel stack
*/ */
if (regs->ip >= (unsigned long)entry_SYSCALL_64 && if (ip_within_syscall_gap(regs)) {
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack) {
sp = this_cpu_read(cpu_current_top_of_stack); sp = this_cpu_read(cpu_current_top_of_stack);
goto sync; goto sync;
} }

View File

@ -13,7 +13,7 @@
#define orc_warn_current(args...) \ #define orc_warn_current(args...) \
({ \ ({ \
if (state->task == current) \ if (state->task == current && !state->error) \
orc_warn(args); \ orc_warn(args); \
}) })
@ -367,8 +367,8 @@ static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
if (!stack_access_ok(state, addr, sizeof(struct pt_regs))) if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
return false; return false;
*ip = regs->ip; *ip = READ_ONCE_NOCHECK(regs->ip);
*sp = regs->sp; *sp = READ_ONCE_NOCHECK(regs->sp);
return true; return true;
} }
@ -380,8 +380,8 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
if (!stack_access_ok(state, addr, IRET_FRAME_SIZE)) if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
return false; return false;
*ip = regs->ip; *ip = READ_ONCE_NOCHECK(regs->ip);
*sp = regs->sp; *sp = READ_ONCE_NOCHECK(regs->sp);
return true; return true;
} }
@ -402,12 +402,12 @@ static bool get_reg(struct unwind_state *state, unsigned int reg_off,
return false; return false;
if (state->full_regs) { if (state->full_regs) {
*val = ((unsigned long *)state->regs)[reg]; *val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]);
return true; return true;
} }
if (state->prev_regs) { if (state->prev_regs) {
*val = ((unsigned long *)state->prev_regs)[reg]; *val = READ_ONCE_NOCHECK(((unsigned long *)state->prev_regs)[reg]);
return true; return true;
} }

View File

@ -1415,6 +1415,25 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
} }
} }
static unsigned long insn_get_effective_ip(struct pt_regs *regs)
{
unsigned long seg_base = 0;
/*
* If not in user-space long mode, a custom code segment could be in
* use. This is true in protected mode (if the process defined a local
* descriptor table), or virtual-8086 mode. In most of the cases
* seg_base will be zero as in USER_CS.
*/
if (!user_64bit_mode(regs)) {
seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
if (seg_base == -1L)
return 0;
}
return seg_base + regs->ip;
}
/** /**
* insn_fetch_from_user() - Copy instruction bytes from user-space memory * insn_fetch_from_user() - Copy instruction bytes from user-space memory
* @regs: Structure with register values as seen when entering kernel mode * @regs: Structure with register values as seen when entering kernel mode
@ -1431,24 +1450,43 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
*/ */
int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
{ {
unsigned long seg_base = 0; unsigned long ip;
int not_copied; int not_copied;
/* ip = insn_get_effective_ip(regs);
* If not in user-space long mode, a custom code segment could be in if (!ip)
* use. This is true in protected mode (if the process defined a local return 0;
* descriptor table), or virtual-8086 mode. In most of the cases
* seg_base will be zero as in USER_CS.
*/
if (!user_64bit_mode(regs)) {
seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
if (seg_base == -1L)
return 0;
}
not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);
not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip), return MAX_INSN_SIZE - not_copied;
MAX_INSN_SIZE); }
/**
* insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space memory
* while in atomic code
* @regs: Structure with register values as seen when entering kernel mode
* @buf: Array to store the fetched instruction
*
* Gets the linear address of the instruction and copies the instruction bytes
* to the buf. This function must be used in atomic context.
*
* Returns:
*
* Number of instruction bytes copied.
*
* 0 if nothing was copied.
*/
int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
{
unsigned long ip;
int not_copied;
ip = insn_get_effective_ip(regs);
if (!ip)
return 0;
not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE);
return MAX_INSN_SIZE - not_copied; return MAX_INSN_SIZE - not_copied;
} }