powerpc: Pair loads and stores in copy_4k_page

A number of our chips like loads and stores to be paired. A small kernel
module testcase shows the improvement of pairing loads and stores in
copy_4k_page:

POWER6: +9%
POWER7: +1.5%

#include <linux/module.h>
#include <linux/mm.h>

#define ITERATIONS 10000000

static int __init copypage_init(void)
{
	struct timespec before, after;
	unsigned long i;
	struct page *destpage, *srcpage;
	char *dest, *src;

	destpage = alloc_page(GFP_KERNEL);
	srcpage = alloc_page(GFP_KERNEL);

	dest = page_address(destpage);
	src = page_address(srcpage);

	getnstimeofday(&before);

	for (i = 0; i < ITERATIONS; i++)
		copy_4K_page(dest, src);

	getnstimeofday(&after);

	free_page((unsigned long)dest);
	free_page((unsigned long)src);

	printk(KERN_DEBUG "copy_4K_page loop took %lu ns\n",
		(after.tv_sec - before.tv_sec) * NSEC_PER_SEC +
		(after.tv_nsec - before.tv_nsec));

	return 0;
}

static void __exit copypage_exit(void)
{
}

module_init(copypage_init)
module_exit(copypage_exit)
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Anton Blanchard");

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
Anton Blanchard 2010-02-10 18:07:54 +00:00 committed by Benjamin Herrenschmidt
parent 5a0e9b5718
commit 63e6c5b810

View File

@ -43,62 +43,62 @@ END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
ld r7,16(r4)
ldu r8,24(r4)
1: std r5,8(r3)
ld r9,8(r4)
std r6,16(r3)
ld r9,8(r4)
ld r10,16(r4)
std r7,24(r3)
ld r11,24(r4)
std r8,32(r3)
ld r11,24(r4)
ld r12,32(r4)
std r9,40(r3)
ld r5,40(r4)
std r10,48(r3)
ld r5,40(r4)
ld r6,48(r4)
std r11,56(r3)
ld r7,56(r4)
std r12,64(r3)
ld r7,56(r4)
ld r8,64(r4)
std r5,72(r3)
ld r9,72(r4)
std r6,80(r3)
ld r9,72(r4)
ld r10,80(r4)
std r7,88(r3)
ld r11,88(r4)
std r8,96(r3)
ld r11,88(r4)
ld r12,96(r4)
std r9,104(r3)
ld r5,104(r4)
std r10,112(r3)
ld r5,104(r4)
ld r6,112(r4)
std r11,120(r3)
ld r7,120(r4)
stdu r12,128(r3)
ld r7,120(r4)
ldu r8,128(r4)
bdnz 1b
std r5,8(r3)
ld r9,8(r4)
std r6,16(r3)
ld r9,8(r4)
ld r10,16(r4)
std r7,24(r3)
ld r11,24(r4)
std r8,32(r3)
ld r11,24(r4)
ld r12,32(r4)
std r9,40(r3)
ld r5,40(r4)
std r10,48(r3)
ld r5,40(r4)
ld r6,48(r4)
std r11,56(r3)
ld r7,56(r4)
std r12,64(r3)
ld r7,56(r4)
ld r8,64(r4)
std r5,72(r3)
ld r9,72(r4)
std r6,80(r3)
ld r9,72(r4)
ld r10,80(r4)
std r7,88(r3)
ld r11,88(r4)
std r8,96(r3)
ld r11,88(r4)
ld r12,96(r4)
std r9,104(r3)
std r10,112(r3)