Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm updates from Thomas Gleixner:
 "The lowlevel and ASM code updates for x86:

   - Make stack trace unwinding more reliable

   - ASM instruction updates for better code generation

   - Various cleanups"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/entry/64: Add two more instruction suffixes
  x86/asm/64: Use 32-bit XOR to zero registers
  x86/build/vdso: Simplify 'cmd_vdso2c'
  x86/build/vdso: Remove unused vdso-syms.lds
  x86/stacktrace: Enable HAVE_RELIABLE_STACKTRACE for the ORC unwinder
  x86/unwind/orc: Detect the end of the stack
  x86/stacktrace: Do not fail for ORC with regs on stack
  x86/stacktrace: Clarify the reliable success paths
  x86/stacktrace: Remove STACKTRACE_DUMP_ONCE
  x86/stacktrace: Do not unwind after user regs
  x86/asm: Use CC_SET/CC_OUT in percpu_cmpxchg8b_double() to micro-optimize code generation
This commit is contained in:
Linus Torvalds 2018-08-13 13:35:26 -07:00
commit f24d6f2654
28 changed files with 91 additions and 100 deletions

View File

@ -180,7 +180,7 @@ config X86
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
select HAVE_STACK_VALIDATION if X86_64
select HAVE_RSEQ

View File

@ -75,7 +75,7 @@
* %r9
*/
__load_partial:
xor %r9, %r9
xor %r9d, %r9d
pxor MSG, MSG
mov LEN, %r8

View File

@ -66,7 +66,7 @@
* %r9
*/
__load_partial:
xor %r9, %r9
xor %r9d, %r9d
pxor MSG0, MSG0
pxor MSG1, MSG1

View File

@ -59,7 +59,7 @@
* %r9
*/
__load_partial:
xor %r9, %r9
xor %r9d, %r9d
pxor MSG, MSG
mov LEN, %r8

View File

@ -258,7 +258,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
.macro GCM_INIT Iv SUBKEY AAD AADLEN
mov \AADLEN, %r11
mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
xor %r11, %r11
xor %r11d, %r11d
mov %r11, InLen(%arg2) # ctx_data.in_length = 0
mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
@ -286,7 +286,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
movdqu HashKey(%arg2), %xmm13
add %arg5, InLen(%arg2)
xor %r11, %r11 # initialise the data pointer offset as zero
xor %r11d, %r11d # initialise the data pointer offset as zero
PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
sub %r11, %arg5 # sub partial block data used
@ -702,7 +702,7 @@ _no_extra_mask_1_\@:
# GHASH computation for the last <16 Byte block
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
xor %rax,%rax
xor %eax, %eax
mov %rax, PBlockLen(%arg2)
jmp _dec_done_\@
@ -737,7 +737,7 @@ _no_extra_mask_2_\@:
# GHASH computation for the last <16 Byte block
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
xor %rax,%rax
xor %eax, %eax
mov %rax, PBlockLen(%arg2)
jmp _encode_done_\@

View File

@ -463,7 +463,7 @@ _get_AAD_rest_final\@:
_get_AAD_done\@:
# initialize the data pointer offset as zero
xor %r11, %r11
xor %r11d, %r11d
# start AES for num_initial_blocks blocks
mov arg5, %rax # rax = *Y0
@ -1770,7 +1770,7 @@ _get_AAD_rest_final\@:
_get_AAD_done\@:
# initialize the data pointer offset as zero
xor %r11, %r11
xor %r11d, %r11d
# start AES for num_initial_blocks blocks
mov arg5, %rax # rax = *Y0

View File

@ -113,7 +113,7 @@ ENDPROC(__morus1280_update_zero)
* %r9
*/
__load_partial:
xor %r9, %r9
xor %r9d, %r9d
vpxor MSG, MSG, MSG
mov %rcx, %r8

View File

@ -235,7 +235,7 @@ ENDPROC(__morus1280_update_zero)
* %r9
*/
__load_partial:
xor %r9, %r9
xor %r9d, %r9d
pxor MSG_LO, MSG_LO
pxor MSG_HI, MSG_HI

View File

@ -113,7 +113,7 @@ ENDPROC(__morus640_update_zero)
* %r9
*/
__load_partial:
xor %r9, %r9
xor %r9d, %r9d
pxor MSG, MSG
mov %rcx, %r8

View File

@ -96,7 +96,7 @@
# cleanup workspace
mov $8, %ecx
mov %rsp, %rdi
xor %rax, %rax
xor %eax, %eax
rep stosq
mov %rbp, %rsp # deallocate workspace

View File

@ -92,7 +92,7 @@ END(native_usergs_sysret64)
.endm
.macro TRACE_IRQS_IRETQ_DEBUG
bt $9, EFLAGS(%rsp) /* interrupts off? */
btl $9, EFLAGS(%rsp) /* interrupts off? */
jnc 1f
TRACE_IRQS_ON_DEBUG
1:
@ -408,6 +408,7 @@ ENTRY(ret_from_fork)
1:
/* kernel thread */
UNWIND_HINT_EMPTY
movq %r12, %rdi
CALL_NOSPEC %rbx
/*
@ -701,7 +702,7 @@ retint_kernel:
#ifdef CONFIG_PREEMPT
/* Interrupts are off */
/* Check if we need preemption */
bt $9, EFLAGS(%rsp) /* were interrupts off? */
btl $9, EFLAGS(%rsp) /* were interrupts off? */
jnc 1f
0: cmpl $0, PER_CPU_VAR(__preempt_count)
jnz 1f

View File

@ -58,9 +58,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(src
hostprogs-y += vdso2c
quiet_cmd_vdso2c = VDSO2C $@
define cmd_vdso2c
$(obj)/vdso2c $< $(<:%.dbg=%) $@
endef
cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@
$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
$(call if_changed,vdso2c)

View File

@ -88,6 +88,7 @@ struct orc_entry {
unsigned sp_reg:4;
unsigned bp_reg:4;
unsigned type:2;
unsigned end:1;
} __packed;
/*
@ -101,6 +102,7 @@ struct unwind_hint {
s16 sp_offset;
u8 sp_reg;
u8 type;
u8 end;
};
#endif /* __ASSEMBLY__ */

View File

@ -450,9 +450,10 @@ do { \
bool __ret; \
typeof(pcp1) __o1 = (o1), __n1 = (n1); \
typeof(pcp2) __o2 = (o2), __n2 = (n2); \
asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \
: "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \
: "b" (__n1), "c" (__n2), "a" (__o1)); \
asm volatile("cmpxchg8b "__percpu_arg(1) \
CC_SET(z) \
: CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \
: "b" (__n1), "c" (__n2)); \
__ret; \
})

View File

@ -26,7 +26,7 @@
* the debuginfo as necessary. It will also warn if it sees any
* inconsistencies.
*/
.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL
.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL end=0
#ifdef CONFIG_STACK_VALIDATION
.Lunwind_hint_ip_\@:
.pushsection .discard.unwind_hints
@ -35,12 +35,14 @@
.short \sp_offset
.byte \sp_reg
.byte \type
.byte \end
.balign 4
.popsection
#endif
.endm
.macro UNWIND_HINT_EMPTY
UNWIND_HINT sp_reg=ORC_REG_UNDEFINED
UNWIND_HINT sp_reg=ORC_REG_UNDEFINED end=1
.endm
.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
@ -86,19 +88,21 @@
#else /* !__ASSEMBLY__ */
#define UNWIND_HINT(sp_reg, sp_offset, type) \
#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
"987: \n\t" \
".pushsection .discard.unwind_hints\n\t" \
/* struct unwind_hint */ \
".long 987b - .\n\t" \
".short " __stringify(sp_offset) "\n\t" \
".short " __stringify(sp_offset) "\n\t" \
".byte " __stringify(sp_reg) "\n\t" \
".byte " __stringify(type) "\n\t" \
".byte " __stringify(end) "\n\t" \
".balign 4 \n\t" \
".popsection\n\t"
#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE)
#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0)
#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE)
#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0)
#endif /* __ASSEMBLY__ */

View File

@ -235,7 +235,7 @@ ENTRY(secondary_startup_64)
* address given in m16:64.
*/
pushq $.Lafter_lret # put return address on stack for unwinder
xorq %rbp, %rbp # clear frame pointer
xorl %ebp, %ebp # clear frame pointer
movq initial_code(%rip), %rax
pushq $__KERNEL_CS # set correct cs
pushq %rax # target address in negative space

View File

@ -20,7 +20,7 @@ DEF_NATIVE(, mov64, "mov %rdi, %rax");
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax");
DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax");
#endif
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)

View File

@ -81,16 +81,6 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
#define STACKTRACE_DUMP_ONCE(task) ({ \
static bool __section(.data.unlikely) __dumped; \
\
if (!__dumped) { \
__dumped = true; \
WARN_ON(1); \
show_stack(task, NULL); \
} \
})
static int __always_inline
__save_stack_trace_reliable(struct stack_trace *trace,
struct task_struct *task)
@ -99,30 +89,25 @@ __save_stack_trace_reliable(struct stack_trace *trace,
struct pt_regs *regs;
unsigned long addr;
for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
for (unwind_start(&state, task, NULL, NULL);
!unwind_done(&state) && !unwind_error(&state);
unwind_next_frame(&state)) {
regs = unwind_get_entry_regs(&state, NULL);
if (regs) {
/* Success path for user tasks */
if (user_mode(regs))
goto success;
/*
* Kernel mode registers on the stack indicate an
* in-kernel interrupt or exception (e.g., preemption
* or a page fault), which can make frame pointers
* unreliable.
*/
if (!user_mode(regs))
return -EINVAL;
/*
* The last frame contains the user mode syscall
* pt_regs. Skip it and finish the unwind.
*/
unwind_next_frame(&state);
if (!unwind_done(&state)) {
STACKTRACE_DUMP_ONCE(task);
if (IS_ENABLED(CONFIG_FRAME_POINTER))
return -EINVAL;
}
break;
}
addr = unwind_get_return_address(&state);
@ -132,21 +117,22 @@ __save_stack_trace_reliable(struct stack_trace *trace,
* generated code which __kernel_text_address() doesn't know
* about.
*/
if (!addr) {
STACKTRACE_DUMP_ONCE(task);
if (!addr)
return -EINVAL;
}
if (save_stack_address(trace, addr, false))
return -EINVAL;
}
/* Check for stack corruption */
if (unwind_error(&state)) {
STACKTRACE_DUMP_ONCE(task);
if (unwind_error(&state))
return -EINVAL;
}
/* Success path for non-user tasks, i.e. kthreads and idle tasks */
if (!(task->flags & (PF_KTHREAD | PF_IDLE)))
return -EINVAL;
success:
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;

View File

@ -198,7 +198,7 @@ static int orc_sort_cmp(const void *_a, const void *_b)
* whitelisted .o files which didn't get objtool generation.
*/
orc_a = cur_orc_table + (a - cur_orc_ip_table);
return orc_a->sp_reg == ORC_REG_UNDEFINED ? -1 : 1;
return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1;
}
#ifdef CONFIG_MODULES
@ -352,7 +352,7 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
bool unwind_next_frame(struct unwind_state *state)
{
unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp;
enum stack_type prev_type = state->stack_info.type;
struct orc_entry *orc;
bool indirect = false;
@ -363,9 +363,9 @@ bool unwind_next_frame(struct unwind_state *state)
/* Don't let modules unload while we're reading their ORC data. */
preempt_disable();
/* Have we reached the end? */
/* End-of-stack check for user tasks: */
if (state->regs && user_mode(state->regs))
goto done;
goto the_end;
/*
* Find the orc_entry associated with the text address.
@ -374,9 +374,16 @@ bool unwind_next_frame(struct unwind_state *state)
* calls and calls to noreturn functions.
*/
orc = orc_find(state->signal ? state->ip : state->ip - 1);
if (!orc || orc->sp_reg == ORC_REG_UNDEFINED)
goto done;
orig_ip = state->ip;
if (!orc)
goto err;
/* End-of-stack check for kernel threads: */
if (orc->sp_reg == ORC_REG_UNDEFINED) {
if (!orc->end)
goto err;
goto the_end;
}
/* Find the previous frame's stack: */
switch (orc->sp_reg) {
@ -402,7 +409,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!state->regs || !state->full_regs) {
orc_warn("missing regs for base reg R10 at ip %pB\n",
(void *)state->ip);
goto done;
goto err;
}
sp = state->regs->r10;
break;
@ -411,7 +418,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!state->regs || !state->full_regs) {
orc_warn("missing regs for base reg R13 at ip %pB\n",
(void *)state->ip);
goto done;
goto err;
}
sp = state->regs->r13;
break;
@ -420,7 +427,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!state->regs || !state->full_regs) {
orc_warn("missing regs for base reg DI at ip %pB\n",
(void *)state->ip);
goto done;
goto err;
}
sp = state->regs->di;
break;
@ -429,7 +436,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!state->regs || !state->full_regs) {
orc_warn("missing regs for base reg DX at ip %pB\n",
(void *)state->ip);
goto done;
goto err;
}
sp = state->regs->dx;
break;
@ -437,12 +444,12 @@ bool unwind_next_frame(struct unwind_state *state)
default:
orc_warn("unknown SP base reg %d for ip %pB\n",
orc->sp_reg, (void *)state->ip);
goto done;
goto err;
}
if (indirect) {
if (!deref_stack_reg(state, sp, &sp))
goto done;
goto err;
}
/* Find IP, SP and possibly regs: */
@ -451,7 +458,7 @@ bool unwind_next_frame(struct unwind_state *state)
ip_p = sp - sizeof(long);
if (!deref_stack_reg(state, ip_p, &state->ip))
goto done;
goto err;
state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
state->ip, (void *)ip_p);
@ -465,7 +472,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
orc_warn("can't dereference registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip);
goto done;
goto err;
}
state->regs = (struct pt_regs *)sp;
@ -477,7 +484,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
orc_warn("can't dereference iret registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip);
goto done;
goto err;
}
state->regs = (void *)sp - IRET_FRAME_OFFSET;
@ -500,18 +507,18 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_REG_PREV_SP:
if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp))
goto done;
goto err;
break;
case ORC_REG_BP:
if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp))
goto done;
goto err;
break;
default:
orc_warn("unknown BP base reg %d for ip %pB\n",
orc->bp_reg, (void *)orig_ip);
goto done;
goto err;
}
/* Prevent a recursive loop due to bad ORC data: */
@ -520,13 +527,16 @@ bool unwind_next_frame(struct unwind_state *state)
state->sp <= prev_sp) {
orc_warn("stack going in the wrong direction? ip=%pB\n",
(void *)orig_ip);
goto done;
goto err;
}
preempt_enable();
return true;
done:
err:
state->error = true;
the_end:
preempt_enable();
state->stack_info.type = STACK_TYPE_UNKNOWN;
return false;

View File

@ -256,7 +256,7 @@ ENTRY(__memcpy_mcsafe)
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorq %rax, %rax
xorl %eax, %eax
ret
ENDPROC(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)

View File

@ -137,7 +137,7 @@ ENTRY(restore_registers)
/* Saved in save_processor_state. */
lgdt saved_context_gdt_desc(%rax)
xorq %rax, %rax
xorl %eax, %eax
/* tell the hibernation core that we've just restored the memory */
movq %rax, in_suspend(%rip)

View File

@ -1,2 +1 @@
vdso-syms.lds
vdso.lds

View File

@ -53,22 +53,6 @@ $(vobjs): KBUILD_CFLAGS += $(CFL)
CFLAGS_REMOVE_vdso-note.o = -pg -fprofile-arcs -ftest-coverage
CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage
targets += vdso-syms.lds
extra-$(VDSO64-y) += vdso-syms.lds
#
# Match symbols in the DSO that look like VDSO*; produce a file of constants.
#
sed-vdsosym := -e 's/^00*/0/' \
-e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
quiet_cmd_vdsosym = VDSOSYM $@
define cmd_vdsosym
$(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
endef
$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
$(call if_changed,vdsosym)
#
# The DSO images are built using a special linker script.
#

View File

@ -88,6 +88,7 @@ struct orc_entry {
unsigned sp_reg:4;
unsigned bp_reg:4;
unsigned type:2;
unsigned end:1;
} __packed;
/*
@ -101,6 +102,7 @@ struct unwind_hint {
s16 sp_offset;
u8 sp_reg;
u8 type;
u8 end;
};
#endif /* __ASSEMBLY__ */

View File

@ -1157,6 +1157,7 @@ static int read_unwind_hints(struct objtool_file *file)
cfa->offset = hint->sp_offset;
insn->state.type = hint->type;
insn->state.end = hint->end;
}
return 0;

View File

@ -31,7 +31,7 @@ struct insn_state {
int stack_size;
unsigned char type;
bool bp_scratch;
bool drap;
bool drap, end;
int drap_reg, drap_offset;
struct cfi_reg vals[CFI_NUM_REGS];
};

View File

@ -203,7 +203,8 @@ int orc_dump(const char *_objname)
print_reg(orc[i].bp_reg, orc[i].bp_offset);
printf(" type:%s\n", orc_type_name(orc[i].type));
printf(" type:%s end:%d\n",
orc_type_name(orc[i].type), orc[i].end);
}
elf_end(elf);

View File

@ -31,6 +31,8 @@ int create_orc(struct objtool_file *file)
struct cfi_reg *cfa = &insn->state.cfa;
struct cfi_reg *bp = &insn->state.regs[CFI_BP];
orc->end = insn->state.end;
if (cfa->base == CFI_UNDEFINED) {
orc->sp_reg = ORC_REG_UNDEFINED;
continue;