x86/i386: Use less assembly in strlen(), speed things up a bit

Current i386 strlen() hardcodes NOT/DEC sequence. DEC is
mentioned to be suboptimal on Core2. So, put only REPNE SCASB
sequence in assembly, compiler can do the rest.

The difference in generated code is like below (MCORE2=y):

	<strlen>:
		push   %edi
		mov    $0xffffffff,%ecx
		mov    %eax,%edi
		xor    %eax,%eax
		repnz scas %es:(%edi),%al
		not    %ecx

	-	dec    %ecx
	-	mov    %ecx,%eax
	+	lea    -0x1(%ecx),%eax

		pop    %edi
		ret

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jan Beulich <JBeulich@suse.com>
Link: http://lkml.kernel.org/r/20111211181319.GA17097@p183.telecom.by
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Alexey Dobriyan 2011-12-11 21:13:19 +03:00 committed by Ingo Molnar
parent 79f1ddd064
commit 890890cb8e

View File

@ -164,15 +164,13 @@ EXPORT_SYMBOL(strchr);
size_t strlen(const char *s) size_t strlen(const char *s)
{ {
int d0; int d0;
int res; size_t res;
asm volatile("repne\n\t" asm volatile("repne\n\t"
"scasb\n\t" "scasb"
"notl %0\n\t"
"decl %0"
: "=c" (res), "=&D" (d0) : "=c" (res), "=&D" (d0)
: "1" (s), "a" (0), "0" (0xffffffffu) : "1" (s), "a" (0), "0" (0xffffffffu)
: "memory"); : "memory");
return res; return ~res - 1;
} }
EXPORT_SYMBOL(strlen); EXPORT_SYMBOL(strlen);
#endif #endif