mirror of
https://github.com/torvalds/linux.git
synced 2024-11-13 15:41:39 +00:00
microblaze: Add loop unrolling for PAGE in copy_tofrom_user
Increase performance by loop unrolling. Signed-off-by: Michal Simek <monstr@monstr.eu>
This commit is contained in:
parent
782d491fc2
commit
ebe211254b
@ -10,6 +10,7 @@
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
/*
|
||||
* int __strncpy_user(char *to, char *from, int len);
|
||||
@ -102,6 +103,49 @@ __strnlen_user:
|
||||
.section __ex_table,"a"
|
||||
.word 1b,4b
|
||||
|
||||
/* Loop unrolling for __copy_tofrom_user */
|
||||
#define COPY(offset) \
|
||||
1: lwi r4 , r6, 0x0000 + offset; \
|
||||
2: lwi r19, r6, 0x0004 + offset; \
|
||||
3: lwi r20, r6, 0x0008 + offset; \
|
||||
4: lwi r21, r6, 0x000C + offset; \
|
||||
5: lwi r22, r6, 0x0010 + offset; \
|
||||
6: lwi r23, r6, 0x0014 + offset; \
|
||||
7: lwi r24, r6, 0x0018 + offset; \
|
||||
8: lwi r25, r6, 0x001C + offset; \
|
||||
9: swi r4 , r5, 0x0000 + offset; \
|
||||
10: swi r19, r5, 0x0004 + offset; \
|
||||
11: swi r20, r5, 0x0008 + offset; \
|
||||
12: swi r21, r5, 0x000C + offset; \
|
||||
13: swi r22, r5, 0x0010 + offset; \
|
||||
14: swi r23, r5, 0x0014 + offset; \
|
||||
15: swi r24, r5, 0x0018 + offset; \
|
||||
16: swi r25, r5, 0x001C + offset; \
|
||||
.section __ex_table,"a"; \
|
||||
.word 1b, 0f; \
|
||||
.word 2b, 0f; \
|
||||
.word 3b, 0f; \
|
||||
.word 4b, 0f; \
|
||||
.word 5b, 0f; \
|
||||
.word 6b, 0f; \
|
||||
.word 7b, 0f; \
|
||||
.word 8b, 0f; \
|
||||
.word 9b, 0f; \
|
||||
.word 10b, 0f; \
|
||||
.word 11b, 0f; \
|
||||
.word 12b, 0f; \
|
||||
.word 13b, 0f; \
|
||||
.word 14b, 0f; \
|
||||
.word 15b, 0f; \
|
||||
.word 16b, 0f; \
|
||||
.text
|
||||
|
||||
#define COPY_80(offset) \
|
||||
COPY(0x00 + offset);\
|
||||
COPY(0x20 + offset);\
|
||||
COPY(0x40 + offset);\
|
||||
COPY(0x60 + offset);
|
||||
|
||||
/*
|
||||
* int __copy_tofrom_user(char *to, char *from, int len)
|
||||
* Return:
|
||||
@ -126,6 +170,10 @@ __copy_tofrom_user:
|
||||
bneid r3, bu1 /* if r3 is not zero then byte copying */
|
||||
or r3, r0, r0
|
||||
|
||||
rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
|
||||
beqid r3, page;
|
||||
or r3, r0, r0
|
||||
|
||||
w1: lw r4, r6, r3 /* at least one 4 byte copy */
|
||||
w2: sw r4, r5, r3
|
||||
addik r7, r7, -4
|
||||
@ -140,6 +188,42 @@ w2: sw r4, r5, r3
|
||||
.word w2, 0f;
|
||||
.text
|
||||
|
||||
.align 4 /* Alignment is important to keep icache happy */
|
||||
page: /* Create room on stack and save registers for storign values */
|
||||
addik r1, r1, -32
|
||||
swi r19, r1, 4
|
||||
swi r20, r1, 8
|
||||
swi r21, r1, 12
|
||||
swi r22, r1, 16
|
||||
swi r23, r1, 20
|
||||
swi r24, r1, 24
|
||||
swi r25, r1, 28
|
||||
loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
|
||||
/* Loop unrolling to get performance boost */
|
||||
COPY_80(0x000);
|
||||
COPY_80(0x080);
|
||||
COPY_80(0x100);
|
||||
COPY_80(0x180);
|
||||
/* copy loop */
|
||||
addik r6, r6, 0x200
|
||||
addik r7, r7, -0x200
|
||||
bneid r7, loop
|
||||
addik r5, r5, 0x200
|
||||
/* Restore register content */
|
||||
lwi r19, r1, 4
|
||||
lwi r20, r1, 8
|
||||
lwi r21, r1, 12
|
||||
lwi r22, r1, 16
|
||||
lwi r23, r1, 20
|
||||
lwi r24, r1, 24
|
||||
lwi r25, r1, 28
|
||||
addik r1, r1, 32
|
||||
/* return back */
|
||||
addik r3, r7, 0
|
||||
rtsd r15, 8
|
||||
nop
|
||||
|
||||
.align 4 /* Alignment is important to keep icache happy */
|
||||
bu1: lbu r4,r6,r3
|
||||
bu2: sb r4,r5,r3
|
||||
addik r7,r7,-1
|
||||
|
Loading…
Reference in New Issue
Block a user