mirror of
https://github.com/torvalds/linux.git
synced 2024-12-31 23:31:29 +00:00
[IA64] Schedule fp-clearing insns at least 6 cycles after reading ar.bsp.
Decreases syscall overhead by approximately 6 cycles. Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
This commit is contained in:
parent
9ec1a7ad43
commit
3c79c8b1d9
@ -705,15 +705,15 @@ ENTRY(ia64_leave_syscall)
|
|||||||
// start restoring the state saved on the kernel stack (struct pt_regs):
|
// start restoring the state saved on the kernel stack (struct pt_regs):
|
||||||
ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
|
ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
|
||||||
ld8 r11=[r3],PT(CR_IIP)-PT(R11)
|
ld8 r11=[r3],PT(CR_IIP)-PT(R11)
|
||||||
mov f6=f0 // clear f6
|
nop.i 0
|
||||||
;;
|
;;
|
||||||
invala // M0|1 invalidate ALAT
|
invala // M0|1 invalidate ALAT
|
||||||
rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection
|
rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection
|
||||||
mov f9=f0 // clear f9
|
nop.i 0
|
||||||
|
|
||||||
ld8 r29=[r2],16 // load cr.ipsr
|
ld8 r29=[r2],16 // load cr.ipsr
|
||||||
ld8 r28=[r3],16 // load cr.iip
|
ld8 r28=[r3],16 // load cr.iip
|
||||||
mov f8=f0 // clear f8
|
mov r22=r0 // clear r22
|
||||||
;;
|
;;
|
||||||
ld8 r30=[r2],16 // M0|1 load cr.ifs
|
ld8 r30=[r2],16 // M0|1 load cr.ifs
|
||||||
ld8 r25=[r3],16 // M0|1 load ar.unat
|
ld8 r25=[r3],16 // M0|1 load ar.unat
|
||||||
@ -721,15 +721,15 @@ ENTRY(ia64_leave_syscall)
|
|||||||
;;
|
;;
|
||||||
ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
|
ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
|
||||||
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
|
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
|
||||||
mov f10=f0 // clear f10
|
(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
|
||||||
;;
|
;;
|
||||||
ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
|
ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
|
||||||
ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc
|
ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc
|
||||||
mov f11=f0 // clear f11
|
mov f6=f0 // clear f6
|
||||||
;;
|
;;
|
||||||
ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage)
|
ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage)
|
||||||
ld8 r31=[r3],PT(R1)-PT(PR) // load predicates
|
ld8 r31=[r3],PT(R1)-PT(PR) // load predicates
|
||||||
(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
|
mov f7=f0 // clear f7
|
||||||
;;
|
;;
|
||||||
ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr
|
ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr
|
||||||
ld8.fill r1=[r3],16 // load r1
|
ld8.fill r1=[r3],16 // load r1
|
||||||
@ -737,24 +737,29 @@ ENTRY(ia64_leave_syscall)
|
|||||||
;;
|
;;
|
||||||
srlz.d // M0 ensure interruption collection is off
|
srlz.d // M0 ensure interruption collection is off
|
||||||
ld8.fill r13=[r3],16
|
ld8.fill r13=[r3],16
|
||||||
mov f7=f0 // clear f7
|
mov f8=f0 // clear f8
|
||||||
;;
|
;;
|
||||||
ld8.fill r12=[r2] // restore r12 (sp)
|
ld8.fill r12=[r2] // restore r12 (sp)
|
||||||
mov.m ar.ssd=r0 // M2 clear ar.ssd
|
mov.m ar.ssd=r0 // M2 clear ar.ssd
|
||||||
mov r22=r0 // clear r22
|
mov b6=r18 // I0 restore b6
|
||||||
|
|
||||||
|
nop.m 0
|
||||||
|
mov f9=f0 // clear f9
|
||||||
|
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
|
||||||
|
;;
|
||||||
ld8.fill r15=[r3] // restore r15
|
ld8.fill r15=[r3] // restore r15
|
||||||
(pUStk) st1 [r14]=r17
|
(pUStk) st1 [r14]=r17
|
||||||
addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
|
addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
|
||||||
;;
|
;;
|
||||||
(pUStk) ld4 r17=[r3] // r17 = cpu_data->phys_stacked_size_p8
|
(pUStk) ld4 r17=[r3] // r17 = cpu_data->phys_stacked_size_p8
|
||||||
mov.m ar.csd=r0 // M2 clear ar.csd
|
mov.m ar.csd=r0 // M2 clear ar.csd
|
||||||
mov b6=r18 // I0 restore b6
|
mov f10=f0 // clear f10
|
||||||
;;
|
;;
|
||||||
mov r14=r0 // clear r14
|
mov r14=r0 // clear r14
|
||||||
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
|
mov f11=f0 // clear f11
|
||||||
(pKStk) br.cond.dpnt.many skip_rbs_switch
|
(pKStk) br.cond.dpnt.many skip_rbs_switch
|
||||||
|
|
||||||
|
|
||||||
mov.m ar.ccv=r0 // clear ar.ccv
|
mov.m ar.ccv=r0 // clear ar.ccv
|
||||||
(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
|
(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
|
||||||
br.cond.sptk.many rbs_switch
|
br.cond.sptk.many rbs_switch
|
||||||
|
Loading…
Reference in New Issue
Block a user