x86, mem: memset_64.S: Optimize memset by enhanced REP MOVSB/STOSB
Support memset() with enhanced rep stosb. On processors supporting enhanced REP MOVSB/STOSB, the alternative memset_c_e function using enhanced rep stosb overrides the fast string alternative memset_c and the original function. Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Link: http://lkml.kernel.org/r/1305671358-14478-10-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
parent
057e05c1d6
commit
2f19e06ac3
@ -2,9 +2,13 @@
|
|||||||
|
|
||||||
#include <linux/linkage.h>
|
#include <linux/linkage.h>
|
||||||
#include <asm/dwarf2.h>
|
#include <asm/dwarf2.h>
|
||||||
|
#include <asm/cpufeature.h>
|
||||||
|
#include <asm/alternative-asm.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ISO C memset - set a memory block to a byte value.
|
* ISO C memset - set a memory block to a byte value. This function uses fast
|
||||||
|
* string to get better performance than the original function. The code is
|
||||||
|
* simpler and shorter than the orignal function as well.
|
||||||
*
|
*
|
||||||
* rdi destination
|
* rdi destination
|
||||||
* rsi value (char)
|
* rsi value (char)
|
||||||
@ -31,6 +35,28 @@
|
|||||||
.Lmemset_e:
|
.Lmemset_e:
|
||||||
.previous
|
.previous
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ISO C memset - set a memory block to a byte value. This function uses
|
||||||
|
* enhanced rep stosb to override the fast string function.
|
||||||
|
* The code is simpler and shorter than the fast string function as well.
|
||||||
|
*
|
||||||
|
* rdi destination
|
||||||
|
* rsi value (char)
|
||||||
|
* rdx count (bytes)
|
||||||
|
*
|
||||||
|
* rax original destination
|
||||||
|
*/
|
||||||
|
.section .altinstr_replacement, "ax", @progbits
|
||||||
|
.Lmemset_c_e:
|
||||||
|
movq %rdi,%r9
|
||||||
|
movb %sil,%al
|
||||||
|
movl %edx,%ecx
|
||||||
|
rep stosb
|
||||||
|
movq %r9,%rax
|
||||||
|
ret
|
||||||
|
.Lmemset_e_e:
|
||||||
|
.previous
|
||||||
|
|
||||||
ENTRY(memset)
|
ENTRY(memset)
|
||||||
ENTRY(__memset)
|
ENTRY(__memset)
|
||||||
CFI_STARTPROC
|
CFI_STARTPROC
|
||||||
@ -112,16 +138,20 @@ ENTRY(__memset)
|
|||||||
ENDPROC(memset)
|
ENDPROC(memset)
|
||||||
ENDPROC(__memset)
|
ENDPROC(__memset)
|
||||||
|
|
||||||
/* Some CPUs run faster using the string instructions.
|
/* Some CPUs support enhanced REP MOVSB/STOSB feature.
|
||||||
It is also a lot simpler. Use this when possible */
|
* It is recommended to use this when possible.
|
||||||
|
*
|
||||||
#include <asm/cpufeature.h>
|
* If enhanced REP MOVSB/STOSB feature is not available, use fast string
|
||||||
|
* instructions.
|
||||||
|
*
|
||||||
|
* Otherwise, use original memset function.
|
||||||
|
*
|
||||||
|
* In .altinstructions section, ERMS feature is placed after REG_GOOD
|
||||||
|
* feature to implement the right patch order.
|
||||||
|
*/
|
||||||
.section .altinstructions,"a"
|
.section .altinstructions,"a"
|
||||||
.align 8
|
altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
|
||||||
.quad memset
|
.Lfinal-memset,.Lmemset_e-.Lmemset_c
|
||||||
.quad .Lmemset_c
|
altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
|
||||||
.word X86_FEATURE_REP_GOOD
|
.Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
|
||||||
.byte .Lfinal - memset
|
|
||||||
.byte .Lmemset_e - .Lmemset_c
|
|
||||||
.previous
|
.previous
|
||||||
|
Loading…
Reference in New Issue
Block a user