diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index a0533e672496..435ca24c5e1d 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -18,7 +18,7 @@ /* Handles exceptions in both to and from, but doesn't do access_ok */ __must_check unsigned long -copy_user_generic_unrolled(void *to, const void *from, unsigned len); +rep_movs_alternative(void *to, const void *from, unsigned len); static __always_inline __must_check unsigned long copy_user_generic(void *to, const void *from, unsigned long len) @@ -26,16 +26,16 @@ copy_user_generic(void *to, const void *from, unsigned long len) stac(); /* * If CPU has FSRM feature, use 'rep movs'. - * Otherwise, use copy_user_generic_unrolled. + * Otherwise, use rep_movs_alternative. */ asm volatile( "1:\n\t" ALTERNATIVE("rep movsb", - "call copy_user_generic_unrolled", ALT_NOT(X86_FEATURE_FSRM)) + "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM)) "2:\n" _ASM_EXTABLE_UA(1b, 2b) :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT - : : "memory", "rax", "rdx", "r8", "r9", "r10", "r11"); + : : "memory", "rax", "r8", "r9", "r10", "r11"); clac(); return len; } diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 16a743f11b11..85e6c45b1ca9 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -17,6 +17,113 @@ #include #include +/* + * rep_movs_alternative - memory copy with exception handling. + * This version is for CPUs that don't have FSRM (Fast Short Rep Movs) + * + * Input: + * rdi destination + * rsi source + * rcx count + * + * Output: + * rcx uncopied bytes or 0 if successful. + * + * NOTE! The calling convention is very intentionally the same as + * for 'rep movs', so that we can rewrite the function call with + * just a plain 'rep movs' on machines that have FSRM. But to make + * it simpler for us, we can clobber rsi/rdi and rax/r8-r11 freely. + */ +SYM_FUNC_START(rep_movs_alternative) + cmpq $64,%rcx + jae .Lunrolled + + cmp $8,%ecx + jae .Lword + + testl %ecx,%ecx + je .Lexit + +.Lcopy_user_tail: +0: movb (%rsi),%al +1: movb %al,(%rdi) + inc %rdi + inc %rsi + dec %rcx + jne .Lcopy_user_tail +.Lexit: + RET + + _ASM_EXTABLE_UA( 0b, .Lexit) + _ASM_EXTABLE_UA( 1b, .Lexit) + + .p2align 4 +.Lword: +2: movq (%rsi),%rax +3: movq %rax,(%rdi) + addq $8,%rsi + addq $8,%rdi + sub $8,%ecx + je .Lexit + cmp $8,%ecx + jae .Lword + jmp .Lcopy_user_tail + + _ASM_EXTABLE_UA( 2b, .Lcopy_user_tail) + _ASM_EXTABLE_UA( 3b, .Lcopy_user_tail) + + .p2align 4 +.Lunrolled: +10: movq (%rsi),%r8 +11: movq 8(%rsi),%r9 +12: movq 16(%rsi),%r10 +13: movq 24(%rsi),%r11 +14: movq %r8,(%rdi) +15: movq %r9,8(%rdi) +16: movq %r10,16(%rdi) +17: movq %r11,24(%rdi) +20: movq 32(%rsi),%r8 +21: movq 40(%rsi),%r9 +22: movq 48(%rsi),%r10 +23: movq 56(%rsi),%r11 +24: movq %r8,32(%rdi) +25: movq %r9,40(%rdi) +26: movq %r10,48(%rdi) +27: movq %r11,56(%rdi) + addq $64,%rsi + addq $64,%rdi + subq $64,%rcx + cmpq $64,%rcx + jae .Lunrolled + cmpl $8,%ecx + jae .Lword + testl %ecx,%ecx + jne .Lcopy_user_tail + RET + + _ASM_EXTABLE_UA(10b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(11b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(12b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(13b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(14b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(15b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(16b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(17b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(20b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(21b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(22b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(23b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(24b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(25b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(26b, .Lcopy_user_tail) + _ASM_EXTABLE_UA(27b, .Lcopy_user_tail) +SYM_FUNC_END(rep_movs_alternative) +EXPORT_SYMBOL(rep_movs_alternative) + +/* + * The uncached copy needs to align the destination for + * movnti and friends. + */ .macro ALIGN_DESTINATION /* check for bad alignment of destination */ movl %edi,%ecx @@ -37,171 +144,6 @@ _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align) .endm -/* - * copy_user_generic_unrolled - memory copy with exception handling. - * This version is for CPUs like P4 that don't have efficient micro - * code for rep movsq - * - * Input: - * rdi destination - * rsi source - * rcx count - * - * Output: - * rcx uncopied bytes or 0 if successful. - * - * NOTE! The calling convention is very intentionally the same as - * for 'rep movs', so that we can rewrite the function call with - * just a plain 'rep movs' on machines that have FSRM. - * - * HOWEVER! This function ends up having a lot of the code common - * with __copy_user_nocache(), which is a normal C function, and - * has a similar calling convention, but gets the 'count' in %rdx, - * and returns the result in %rax. - * - * To share as much code as possible, we end up returning the - * result in *both* %rcx/%rax, and we also move the initial count - * into %rdx. - * - * We can clobber rdx/rsi/rdi and r8-r11 - */ -SYM_FUNC_START(copy_user_generic_unrolled) - movl %ecx,%edx - cmpl $8,%ecx - jb .Lcopy_user_short_string_bytes - ALIGN_DESTINATION - movl %edx,%ecx - andl $63,%edx - shrl $6,%ecx - jz copy_user_short_string -1: movq (%rsi),%r8 -2: movq 1*8(%rsi),%r9 -3: movq 2*8(%rsi),%r10 -4: movq 3*8(%rsi),%r11 -5: movq %r8,(%rdi) -6: movq %r9,1*8(%rdi) -7: movq %r10,2*8(%rdi) -8: movq %r11,3*8(%rdi) -9: movq 4*8(%rsi),%r8 -10: movq 5*8(%rsi),%r9 -11: movq 6*8(%rsi),%r10 -12: movq 7*8(%rsi),%r11 -13: movq %r8,4*8(%rdi) -14: movq %r9,5*8(%rdi) -15: movq %r10,6*8(%rdi) -16: movq %r11,7*8(%rdi) - leaq 64(%rsi),%rsi - leaq 64(%rdi),%rdi - decl %ecx - jnz 1b - jmp copy_user_short_string - -30: shll $6,%ecx - addl %ecx,%edx - jmp .Lcopy_user_handle_tail - - _ASM_EXTABLE_CPY(1b, 30b) - _ASM_EXTABLE_CPY(2b, 30b) - _ASM_EXTABLE_CPY(3b, 30b) - _ASM_EXTABLE_CPY(4b, 30b) - _ASM_EXTABLE_CPY(5b, 30b) - _ASM_EXTABLE_CPY(6b, 30b) - _ASM_EXTABLE_CPY(7b, 30b) - _ASM_EXTABLE_CPY(8b, 30b) - _ASM_EXTABLE_CPY(9b, 30b) - _ASM_EXTABLE_CPY(10b, 30b) - _ASM_EXTABLE_CPY(11b, 30b) - _ASM_EXTABLE_CPY(12b, 30b) - _ASM_EXTABLE_CPY(13b, 30b) - _ASM_EXTABLE_CPY(14b, 30b) - _ASM_EXTABLE_CPY(15b, 30b) - _ASM_EXTABLE_CPY(16b, 30b) -SYM_FUNC_END(copy_user_generic_unrolled) -EXPORT_SYMBOL(copy_user_generic_unrolled) - -/* - * Try to copy last bytes and clear the rest if needed. - * Since protection fault in copy_from/to_user is not a normal situation, - * it is not necessary to optimize tail handling. - * Don't try to copy the tail if machine check happened - * - * Input: - * eax trap number written by ex_handler_copy() - * rdi destination - * rsi source - * rdx count - * - * Output: - * eax uncopied bytes or 0 if successful. - */ -SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) - cmp $X86_TRAP_MC,%eax - je 3f - - movl %edx,%ecx -1: rep movsb -2: mov %ecx,%eax - RET - -3: - movl %edx,%eax - movl %edx,%ecx - RET - - _ASM_EXTABLE_CPY(1b, 2b) - -.Lcopy_user_handle_align: - addl %ecx,%edx /* ecx is zerorest also */ - jmp .Lcopy_user_handle_tail - -SYM_CODE_END(.Lcopy_user_handle_tail) - -/* - * Finish memcpy of less than 64 bytes. #AC should already be set. - * - * Input: - * rdi destination - * rsi source - * rdx count (< 64) - * - * Output: - * eax uncopied bytes or 0 if successful. - */ -SYM_CODE_START_LOCAL(copy_user_short_string) - movl %edx,%ecx - andl $7,%edx - shrl $3,%ecx - jz .Lcopy_user_short_string_bytes -18: movq (%rsi),%r8 -19: movq %r8,(%rdi) - leaq 8(%rsi),%rsi - leaq 8(%rdi),%rdi - decl %ecx - jnz 18b -.Lcopy_user_short_string_bytes: - andl %edx,%edx - jz 23f - movl %edx,%ecx -21: movb (%rsi),%al -22: movb %al,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz 21b -23: xor %eax,%eax - xor %ecx,%ecx - RET - -40: leal (%rdx,%rcx,8),%edx - jmp 60f -50: movl %ecx,%edx /* ecx is zerorest also */ -60: jmp .Lcopy_user_handle_tail - - _ASM_EXTABLE_CPY(18b, 40b) - _ASM_EXTABLE_CPY(19b, 40b) - _ASM_EXTABLE_CPY(21b, 50b) - _ASM_EXTABLE_CPY(22b, 50b) -SYM_CODE_END(copy_user_short_string) /* * copy_user_nocache - Uncached memory copy with exception handling @@ -346,5 +288,40 @@ SYM_FUNC_START(__copy_user_nocache) _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy) _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy) _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy) + +/* + * Try to copy last bytes and clear the rest if needed. + * Since protection fault in copy_from/to_user is not a normal situation, + * it is not necessary to optimize tail handling. + * Don't try to copy the tail if machine check happened + * + * Input: + * eax trap number written by ex_handler_copy() + * rdi destination + * rsi source + * rdx count + * + * Output: + * eax uncopied bytes or 0 if successful. + */ +.Lcopy_user_handle_tail: + cmp $X86_TRAP_MC,%eax + je 3f + + movl %edx,%ecx +1: rep movsb +2: mov %ecx,%eax + RET + +3: + movl %edx,%eax + RET + + _ASM_EXTABLE_CPY(1b, 2b) + +.Lcopy_user_handle_align: + addl %ecx,%edx /* ecx is zerorest also */ + jmp .Lcopy_user_handle_tail + SYM_FUNC_END(__copy_user_nocache) EXPORT_SYMBOL(__copy_user_nocache) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ac96c9939cd1..50ed63f701f1 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1285,7 +1285,7 @@ static const char *uaccess_safe_builtin[] = { "copy_mc_enhanced_fast_string", "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ "rep_stos_alternative", - "copy_user_generic_unrolled", + "rep_movs_alternative", "__copy_user_nocache", NULL };