linux/arch/powerpc/lib/Makefile
Anton Blanchard 15c2d45d17 powerpc: Add 64bit optimised memcmp
I noticed ksm spending quite a lot of time in memcmp on a large
KVM box. The current memcmp loop is very unoptimised - byte at a
time compares with no loop unrolling. We can do much much better.

Optimise the loop in a few ways:

- Unroll the byte at a time loop

- For large (at least 32 byte) comparisons that are also 8 byte
  aligned, use an unrolled modulo scheduled loop using 8 byte
  loads. This is similar to our glibc memcmp.

A simple microbenchmark testing 10000000 iterations of an 8192 byte
memcmp was used to measure the performance:

baseline:	29.93 s

modified:	 1.70 s

Just over 17x faster.

v2: Incorporated some suggestions from Segher:

- Use andi. instead of rdlicl.

- Convert bdnzt eq, to bdnz. It's just duplicating the earlier compare
  and was a relic from a previous version.

- Don't use cr5, we have plans to use that CR field for fast local
  atomics.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2015-01-23 14:02:55 +11:00

42 lines
1.0 KiB
Makefile

#
# Makefile for ppc-specific library files..
#
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
CFLAGS_REMOVE_code-patching.o = -pg
CFLAGS_REMOVE_feature-fixups.o = -pg
obj-y := string.o alloc.o \
crtsavres.o ppc_ksyms.o
obj-$(CONFIG_PPC32) += div64.o copy_32.o
obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
usercopy_64.o mem_64.o hweight_64.o \
copyuser_power7.o string_64.o copypage_power7.o \
memcmp_64.o
ifeq ($(CONFIG_GENERIC_CSUM),)
obj-y += checksum_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC64) += checksum_wrappers_64.o
endif
obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o
obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
ifeq ($(CONFIG_PPC64),y)
obj-$(CONFIG_SMP) += locks.o
obj-$(CONFIG_ALTIVEC) += vmx-helper.o
endif
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
obj-y += code-patching.o
obj-y += feature-fixups.o
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
obj-$(CONFIG_ALTIVEC) += xor_vmx.o
CFLAGS_xor_vmx.o += -maltivec -mabi=altivec