mirror of
https://github.com/torvalds/linux.git
synced 2024-11-14 16:12:02 +00:00
14e4ec9c3e
Up until now it was perceived that FSRM is an improvement to ERMS and thus it was made dependent on latter. However, there are AMD BIOSes out there which allow for disabling of either features and thus preventing kernels from booting due to the CMP disappearing and thus breaking the logic in the memmove() function. Similar observation happens on some VM migration scenarios. Patch the proper sequences depending on which feature is enabled. Reported-by: Daniel Verkamp <dverkamp@chromium.org> Reported-by: Jiri Slaby <jirislaby@kernel.org> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Link: https://lore.kernel.org/r/Y/yK0dyzI0MMdTie@zn.tnic
217 lines
3.7 KiB
ArmAsm
217 lines
3.7 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Normally compiler builtins are used, but sometimes the compiler calls out
|
|
* of line code. Based on asm-i386/string.h.
|
|
*
|
|
* This assembly file is re-written from memmove_64.c file.
|
|
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
|
|
*/
|
|
#include <linux/linkage.h>
|
|
#include <asm/cpufeatures.h>
|
|
#include <asm/alternative.h>
|
|
#include <asm/export.h>
|
|
|
|
#undef memmove
|
|
|
|
.section .noinstr.text, "ax"
|
|
|
|
/*
|
|
* Implement memmove(). This can handle overlap between src and dst.
|
|
*
|
|
* Input:
|
|
* rdi: dest
|
|
* rsi: src
|
|
* rdx: count
|
|
*
|
|
* Output:
|
|
* rax: dest
|
|
*/
|
|
SYM_FUNC_START(__memmove)
|
|
|
|
mov %rdi, %rax
|
|
|
|
/* Decide forward/backward copy mode */
|
|
cmp %rdi, %rsi
|
|
jge .Lmemmove_begin_forward
|
|
mov %rsi, %r8
|
|
add %rdx, %r8
|
|
cmp %rdi, %r8
|
|
jg 2f
|
|
|
|
#define CHECK_LEN cmp $0x20, %rdx; jb 1f
|
|
#define MEMMOVE_BYTES movq %rdx, %rcx; rep movsb; RET
|
|
.Lmemmove_begin_forward:
|
|
ALTERNATIVE_2 __stringify(CHECK_LEN), \
|
|
__stringify(CHECK_LEN; MEMMOVE_BYTES), X86_FEATURE_ERMS, \
|
|
__stringify(MEMMOVE_BYTES), X86_FEATURE_FSRM
|
|
|
|
/*
|
|
* movsq instruction have many startup latency
|
|
* so we handle small size by general register.
|
|
*/
|
|
cmp $680, %rdx
|
|
jb 3f
|
|
/*
|
|
* movsq instruction is only good for aligned case.
|
|
*/
|
|
|
|
cmpb %dil, %sil
|
|
je 4f
|
|
3:
|
|
sub $0x20, %rdx
|
|
/*
|
|
* We gobble 32 bytes forward in each loop.
|
|
*/
|
|
5:
|
|
sub $0x20, %rdx
|
|
movq 0*8(%rsi), %r11
|
|
movq 1*8(%rsi), %r10
|
|
movq 2*8(%rsi), %r9
|
|
movq 3*8(%rsi), %r8
|
|
leaq 4*8(%rsi), %rsi
|
|
|
|
movq %r11, 0*8(%rdi)
|
|
movq %r10, 1*8(%rdi)
|
|
movq %r9, 2*8(%rdi)
|
|
movq %r8, 3*8(%rdi)
|
|
leaq 4*8(%rdi), %rdi
|
|
jae 5b
|
|
addq $0x20, %rdx
|
|
jmp 1f
|
|
/*
|
|
* Handle data forward by movsq.
|
|
*/
|
|
.p2align 4
|
|
4:
|
|
movq %rdx, %rcx
|
|
movq -8(%rsi, %rdx), %r11
|
|
lea -8(%rdi, %rdx), %r10
|
|
shrq $3, %rcx
|
|
rep movsq
|
|
movq %r11, (%r10)
|
|
jmp 13f
|
|
.Lmemmove_end_forward:
|
|
|
|
/*
|
|
* Handle data backward by movsq.
|
|
*/
|
|
.p2align 4
|
|
7:
|
|
movq %rdx, %rcx
|
|
movq (%rsi), %r11
|
|
movq %rdi, %r10
|
|
leaq -8(%rsi, %rdx), %rsi
|
|
leaq -8(%rdi, %rdx), %rdi
|
|
shrq $3, %rcx
|
|
std
|
|
rep movsq
|
|
cld
|
|
movq %r11, (%r10)
|
|
jmp 13f
|
|
|
|
/*
|
|
* Start to prepare for backward copy.
|
|
*/
|
|
.p2align 4
|
|
2:
|
|
cmp $0x20, %rdx
|
|
jb 1f
|
|
cmp $680, %rdx
|
|
jb 6f
|
|
cmp %dil, %sil
|
|
je 7b
|
|
6:
|
|
/*
|
|
* Calculate copy position to tail.
|
|
*/
|
|
addq %rdx, %rsi
|
|
addq %rdx, %rdi
|
|
subq $0x20, %rdx
|
|
/*
|
|
* We gobble 32 bytes backward in each loop.
|
|
*/
|
|
8:
|
|
subq $0x20, %rdx
|
|
movq -1*8(%rsi), %r11
|
|
movq -2*8(%rsi), %r10
|
|
movq -3*8(%rsi), %r9
|
|
movq -4*8(%rsi), %r8
|
|
leaq -4*8(%rsi), %rsi
|
|
|
|
movq %r11, -1*8(%rdi)
|
|
movq %r10, -2*8(%rdi)
|
|
movq %r9, -3*8(%rdi)
|
|
movq %r8, -4*8(%rdi)
|
|
leaq -4*8(%rdi), %rdi
|
|
jae 8b
|
|
/*
|
|
* Calculate copy position to head.
|
|
*/
|
|
addq $0x20, %rdx
|
|
subq %rdx, %rsi
|
|
subq %rdx, %rdi
|
|
1:
|
|
cmpq $16, %rdx
|
|
jb 9f
|
|
/*
|
|
* Move data from 16 bytes to 31 bytes.
|
|
*/
|
|
movq 0*8(%rsi), %r11
|
|
movq 1*8(%rsi), %r10
|
|
movq -2*8(%rsi, %rdx), %r9
|
|
movq -1*8(%rsi, %rdx), %r8
|
|
movq %r11, 0*8(%rdi)
|
|
movq %r10, 1*8(%rdi)
|
|
movq %r9, -2*8(%rdi, %rdx)
|
|
movq %r8, -1*8(%rdi, %rdx)
|
|
jmp 13f
|
|
.p2align 4
|
|
9:
|
|
cmpq $8, %rdx
|
|
jb 10f
|
|
/*
|
|
* Move data from 8 bytes to 15 bytes.
|
|
*/
|
|
movq 0*8(%rsi), %r11
|
|
movq -1*8(%rsi, %rdx), %r10
|
|
movq %r11, 0*8(%rdi)
|
|
movq %r10, -1*8(%rdi, %rdx)
|
|
jmp 13f
|
|
10:
|
|
cmpq $4, %rdx
|
|
jb 11f
|
|
/*
|
|
* Move data from 4 bytes to 7 bytes.
|
|
*/
|
|
movl (%rsi), %r11d
|
|
movl -4(%rsi, %rdx), %r10d
|
|
movl %r11d, (%rdi)
|
|
movl %r10d, -4(%rdi, %rdx)
|
|
jmp 13f
|
|
11:
|
|
cmp $2, %rdx
|
|
jb 12f
|
|
/*
|
|
* Move data from 2 bytes to 3 bytes.
|
|
*/
|
|
movw (%rsi), %r11w
|
|
movw -2(%rsi, %rdx), %r10w
|
|
movw %r11w, (%rdi)
|
|
movw %r10w, -2(%rdi, %rdx)
|
|
jmp 13f
|
|
12:
|
|
cmp $1, %rdx
|
|
jb 13f
|
|
/*
|
|
* Move data for 1 byte.
|
|
*/
|
|
movb (%rsi), %r11b
|
|
movb %r11b, (%rdi)
|
|
13:
|
|
RET
|
|
SYM_FUNC_END(__memmove)
|
|
EXPORT_SYMBOL(__memmove)
|
|
|
|
SYM_FUNC_ALIAS(memmove, __memmove)
|
|
EXPORT_SYMBOL(memmove)
|