powerpc/vdso32: inline __get_datapage()
__get_datapage() is only a few instructions to retrieve the address of the page where the kernel stores data to the VDSO. By inlining this function into its users, a bl/blr pair and a mflr/mtlr pair is avoided, plus a few reg moves. The improvement is noticeable (about 55 nsec/call on an 8xx) vdsotest before the patch: gettimeofday: vdso: 731 nsec/call clock-gettime-realtime-coarse: vdso: 668 nsec/call clock-gettime-monotonic-coarse: vdso: 745 nsec/call vdsotest after the patch: gettimeofday: vdso: 677 nsec/call clock-gettime-realtime-coarse: vdso: 613 nsec/call clock-gettime-monotonic-coarse: vdso: 690 nsec/call Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/c39ef7f3dfa25356b01e211d539671f279086c09.1575273217.git.christophe.leroy@c-s.fr
This commit is contained in:
parent
654abc69ef
commit
ec0895f08f
@ -118,6 +118,16 @@ struct vdso_data {
|
|||||||
|
|
||||||
extern struct vdso_data *vdso_data;
|
extern struct vdso_data *vdso_data;
|
||||||
|
|
||||||
|
#else /* __ASSEMBLY__ */
|
||||||
|
|
||||||
|
.macro get_datapage ptr, tmp
|
||||||
|
bcl 20, 31, .+4
|
||||||
|
mflr \ptr
|
||||||
|
addi \ptr, \ptr, (__kernel_datapage_offset - (.-4))@l
|
||||||
|
lwz \tmp, 0(\ptr)
|
||||||
|
add \ptr, \tmp, \ptr
|
||||||
|
.endm
|
||||||
|
|
||||||
#endif /* __ASSEMBLY__ */
|
#endif /* __ASSEMBLY__ */
|
||||||
|
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/ppc_asm.h>
|
#include <asm/ppc_asm.h>
|
||||||
#include <asm/vdso.h>
|
#include <asm/vdso.h>
|
||||||
|
#include <asm/vdso_datapage.h>
|
||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
|
|
||||||
.text
|
.text
|
||||||
@ -24,14 +25,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
|
|||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
mflr r12
|
mflr r12
|
||||||
.cfi_register lr,r12
|
.cfi_register lr,r12
|
||||||
mr r11,r3
|
get_datapage r10, r0
|
||||||
bl __get_datapage@local
|
|
||||||
mtlr r12
|
mtlr r12
|
||||||
mr r10,r3
|
|
||||||
|
|
||||||
lwz r7,CFG_DCACHE_BLOCKSZ(r10)
|
lwz r7,CFG_DCACHE_BLOCKSZ(r10)
|
||||||
addi r5,r7,-1
|
addi r5,r7,-1
|
||||||
andc r6,r11,r5 /* round low to line bdy */
|
andc r6,r3,r5 /* round low to line bdy */
|
||||||
subf r8,r6,r4 /* compute length */
|
subf r8,r6,r4 /* compute length */
|
||||||
add r8,r8,r5 /* ensure we get enough */
|
add r8,r8,r5 /* ensure we get enough */
|
||||||
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
|
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
|
||||||
@ -48,7 +47,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
|
|||||||
|
|
||||||
lwz r7,CFG_ICACHE_BLOCKSZ(r10)
|
lwz r7,CFG_ICACHE_BLOCKSZ(r10)
|
||||||
addi r5,r7,-1
|
addi r5,r7,-1
|
||||||
andc r6,r11,r5 /* round low to line bdy */
|
andc r6,r3,r5 /* round low to line bdy */
|
||||||
subf r8,r6,r4 /* compute length */
|
subf r8,r6,r4 /* compute length */
|
||||||
add r8,r8,r5
|
add r8,r8,r5
|
||||||
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
|
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
|
||||||
|
@ -10,35 +10,13 @@
|
|||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
#include <asm/unistd.h>
|
#include <asm/unistd.h>
|
||||||
#include <asm/vdso.h>
|
#include <asm/vdso.h>
|
||||||
|
#include <asm/vdso_datapage.h>
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.global __kernel_datapage_offset;
|
.global __kernel_datapage_offset;
|
||||||
__kernel_datapage_offset:
|
__kernel_datapage_offset:
|
||||||
.long 0
|
.long 0
|
||||||
|
|
||||||
V_FUNCTION_BEGIN(__get_datapage)
|
|
||||||
.cfi_startproc
|
|
||||||
/* We don't want that exposed or overridable as we want other objects
|
|
||||||
* to be able to bl directly to here
|
|
||||||
*/
|
|
||||||
.protected __get_datapage
|
|
||||||
.hidden __get_datapage
|
|
||||||
|
|
||||||
mflr r0
|
|
||||||
.cfi_register lr,r0
|
|
||||||
|
|
||||||
bcl 20,31,data_page_branch
|
|
||||||
data_page_branch:
|
|
||||||
mflr r3
|
|
||||||
mtlr r0
|
|
||||||
addi r3, r3, __kernel_datapage_offset-data_page_branch
|
|
||||||
lwz r0,0(r3)
|
|
||||||
.cfi_restore lr
|
|
||||||
add r3,r0,r3
|
|
||||||
blr
|
|
||||||
.cfi_endproc
|
|
||||||
V_FUNCTION_END(__get_datapage)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
|
* void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
|
||||||
*
|
*
|
||||||
@ -53,7 +31,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
|
|||||||
mflr r12
|
mflr r12
|
||||||
.cfi_register lr,r12
|
.cfi_register lr,r12
|
||||||
mr r4,r3
|
mr r4,r3
|
||||||
bl __get_datapage@local
|
get_datapage r3, r0
|
||||||
mtlr r12
|
mtlr r12
|
||||||
addi r3,r3,CFG_SYSCALL_MAP32
|
addi r3,r3,CFG_SYSCALL_MAP32
|
||||||
cmpli cr0,r4,0
|
cmpli cr0,r4,0
|
||||||
@ -75,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
|
|||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
mflr r12
|
mflr r12
|
||||||
.cfi_register lr,r12
|
.cfi_register lr,r12
|
||||||
bl __get_datapage@local
|
get_datapage r3, r0
|
||||||
lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
|
lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
|
||||||
lwz r3,CFG_TB_TICKS_PER_SEC(r3)
|
lwz r3,CFG_TB_TICKS_PER_SEC(r3)
|
||||||
mtlr r12
|
mtlr r12
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/ppc_asm.h>
|
#include <asm/ppc_asm.h>
|
||||||
#include <asm/vdso.h>
|
#include <asm/vdso.h>
|
||||||
|
#include <asm/vdso_datapage.h>
|
||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
#include <asm/unistd.h>
|
#include <asm/unistd.h>
|
||||||
|
|
||||||
@ -33,8 +34,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
|
|||||||
|
|
||||||
mr r10,r3 /* r10 saves tv */
|
mr r10,r3 /* r10 saves tv */
|
||||||
mr r11,r4 /* r11 saves tz */
|
mr r11,r4 /* r11 saves tz */
|
||||||
bl __get_datapage@local /* get data page */
|
get_datapage r9, r0
|
||||||
mr r9, r3 /* datapage ptr in r9 */
|
|
||||||
cmplwi r10,0 /* check if tv is NULL */
|
cmplwi r10,0 /* check if tv is NULL */
|
||||||
beq 3f
|
beq 3f
|
||||||
lis r7,1000000@ha /* load up USEC_PER_SEC */
|
lis r7,1000000@ha /* load up USEC_PER_SEC */
|
||||||
@ -80,8 +80,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
|
|||||||
mflr r12 /* r12 saves lr */
|
mflr r12 /* r12 saves lr */
|
||||||
.cfi_register lr,r12
|
.cfi_register lr,r12
|
||||||
mr r11,r4 /* r11 saves tp */
|
mr r11,r4 /* r11 saves tp */
|
||||||
bl __get_datapage@local /* get data page */
|
get_datapage r9, r0
|
||||||
mr r9,r3 /* datapage ptr in r9 */
|
|
||||||
lis r7,NSEC_PER_SEC@h /* want nanoseconds */
|
lis r7,NSEC_PER_SEC@h /* want nanoseconds */
|
||||||
ori r7,r7,NSEC_PER_SEC@l
|
ori r7,r7,NSEC_PER_SEC@l
|
||||||
beq cr5, .Lcoarse_clocks
|
beq cr5, .Lcoarse_clocks
|
||||||
@ -206,7 +205,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
|
|||||||
|
|
||||||
mflr r12
|
mflr r12
|
||||||
.cfi_register lr,r12
|
.cfi_register lr,r12
|
||||||
bl __get_datapage@local /* get data page */
|
get_datapage r3, r0
|
||||||
lwz r5, CLOCK_HRTIMER_RES(r3)
|
lwz r5, CLOCK_HRTIMER_RES(r3)
|
||||||
mtlr r12
|
mtlr r12
|
||||||
li r3,0
|
li r3,0
|
||||||
@ -240,8 +239,7 @@ V_FUNCTION_BEGIN(__kernel_time)
|
|||||||
.cfi_register lr,r12
|
.cfi_register lr,r12
|
||||||
|
|
||||||
mr r11,r3 /* r11 holds t */
|
mr r11,r3 /* r11 holds t */
|
||||||
bl __get_datapage@local
|
get_datapage r9, r0
|
||||||
mr r9, r3 /* datapage ptr in r9 */
|
|
||||||
|
|
||||||
lwz r3,STAMP_XTIME_SEC+LOPART(r9)
|
lwz r3,STAMP_XTIME_SEC+LOPART(r9)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user