mirror of
https://github.com/torvalds/linux.git
synced 2024-11-15 16:41:58 +00:00
00b65985fb
Instead of pinning per-cpu TLB into a DTR, use DTC. This will free up one TLB entry for application, or even kernel if access pattern to per-cpu data area has high temporal locality. Since per-cpu is mapped at the top of region 7 address, we just need to add special case in alt_dtlb_miss. The physical address of per-cpu data is already conveniently stored in IA64_KR(PER_CPU_DATA). Latency for alt_dtlb_miss is not affected as we can hide all the latency. It was measured that alt_dtlb_miss handler has 23 cycles latency before and after the patch. The performance effect is massive for applications that put lots of tlb pressure on CPU. Workload environment like database online transaction processing or application uses tera-byte of memory would benefit the most. Measurement with industry standard database benchmark shown an upward of 1.6% gain. While smaller workloads like cpu, java also showing small improvement. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
1079 lines
26 KiB
ArmAsm
1079 lines
26 KiB
ArmAsm
//
|
|
// assembly portion of the IA64 MCA handling
|
|
//
|
|
// Mods by cfleck to integrate into kernel build
|
|
// 00/03/15 davidm Added various stop bits to get a clean compile
|
|
//
|
|
// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp
|
|
// kstack, switch modes, jump to C INIT handler
|
|
//
|
|
// 02/01/04 J.Hall <jenna.s.hall@intel.com>
|
|
// Before entering virtual mode code:
|
|
// 1. Check for TLB CPU error
|
|
// 2. Restore current thread pointer to kr6
|
|
// 3. Move stack ptr 16 bytes to conform to C calling convention
|
|
//
|
|
// 04/11/12 Russ Anderson <rja@sgi.com>
|
|
// Added per cpu MCA/INIT stack save areas.
|
|
//
|
|
// 12/08/05 Keith Owens <kaos@sgi.com>
|
|
// Use per cpu MCA/INIT stacks for all data.
|
|
//
|
|
#include <linux/threads.h>
|
|
|
|
#include <asm/asmmacro.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/mca_asm.h>
|
|
#include <asm/mca.h>
|
|
|
|
#include "entry.h"
|
|
|
|
#define GET_IA64_MCA_DATA(reg) \
|
|
GET_THIS_PADDR(reg, ia64_mca_data) \
|
|
;; \
|
|
ld8 reg=[reg]
|
|
|
|
.global ia64_do_tlb_purge
|
|
.global ia64_os_mca_dispatch
|
|
.global ia64_os_init_dispatch_monarch
|
|
.global ia64_os_init_dispatch_slave
|
|
|
|
.text
|
|
.align 16
|
|
|
|
//StartMain////////////////////////////////////////////////////////////////////
|
|
|
|
/*
|
|
* Just the TLB purge part is moved to a separate function
|
|
* so we can re-use the code for cpu hotplug code as well
|
|
* Caller should now setup b1, so we can branch once the
|
|
* tlb flush is complete.
|
|
*/
|
|
|
|
ia64_do_tlb_purge:
|
|
#define O(member) IA64_CPUINFO_##member##_OFFSET
|
|
|
|
GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2
|
|
;;
|
|
addl r17=O(PTCE_STRIDE),r2
|
|
addl r2=O(PTCE_BASE),r2
|
|
;;
|
|
ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base
|
|
ld4 r19=[r2],4 // r19=ptce_count[0]
|
|
ld4 r21=[r17],4 // r21=ptce_stride[0]
|
|
;;
|
|
ld4 r20=[r2] // r20=ptce_count[1]
|
|
ld4 r22=[r17] // r22=ptce_stride[1]
|
|
mov r24=0
|
|
;;
|
|
adds r20=-1,r20
|
|
;;
|
|
#undef O
|
|
|
|
2:
|
|
cmp.ltu p6,p7=r24,r19
|
|
(p7) br.cond.dpnt.few 4f
|
|
mov ar.lc=r20
|
|
3:
|
|
ptc.e r18
|
|
;;
|
|
add r18=r22,r18
|
|
br.cloop.sptk.few 3b
|
|
;;
|
|
add r18=r21,r18
|
|
add r24=1,r24
|
|
;;
|
|
br.sptk.few 2b
|
|
4:
|
|
srlz.i // srlz.i implies srlz.d
|
|
;;
|
|
|
|
// Now purge addresses formerly mapped by TR registers
|
|
// 1. Purge ITR&DTR for kernel.
|
|
movl r16=KERNEL_START
|
|
mov r18=KERNEL_TR_PAGE_SHIFT<<2
|
|
;;
|
|
ptr.i r16, r18
|
|
ptr.d r16, r18
|
|
;;
|
|
srlz.i
|
|
;;
|
|
srlz.d
|
|
;;
|
|
// 3. Purge ITR for PAL code.
|
|
GET_THIS_PADDR(r2, ia64_mca_pal_base)
|
|
;;
|
|
ld8 r16=[r2]
|
|
mov r18=IA64_GRANULE_SHIFT<<2
|
|
;;
|
|
ptr.i r16,r18
|
|
;;
|
|
srlz.i
|
|
;;
|
|
// 4. Purge DTR for stack.
|
|
mov r16=IA64_KR(CURRENT_STACK)
|
|
;;
|
|
shl r16=r16,IA64_GRANULE_SHIFT
|
|
movl r19=PAGE_OFFSET
|
|
;;
|
|
add r16=r19,r16
|
|
mov r18=IA64_GRANULE_SHIFT<<2
|
|
;;
|
|
ptr.d r16,r18
|
|
;;
|
|
srlz.i
|
|
;;
|
|
// Now branch away to caller.
|
|
br.sptk.many b1
|
|
;;
|
|
|
|
//EndMain//////////////////////////////////////////////////////////////////////
|
|
|
|
//StartMain////////////////////////////////////////////////////////////////////
|
|
|
|
ia64_os_mca_dispatch:
|
|
// Serialize all MCA processing
|
|
mov r3=1;;
|
|
LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);;
|
|
ia64_os_mca_spin:
|
|
xchg4 r4=[r2],r3;;
|
|
cmp.ne p6,p0=r4,r0
|
|
(p6) br ia64_os_mca_spin
|
|
|
|
mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack
|
|
LOAD_PHYSICAL(p0,r2,1f) // return address
|
|
mov r19=1 // All MCA events are treated as monarch (for now)
|
|
br.sptk ia64_state_save // save the state that is not in minstate
|
|
1:
|
|
|
|
GET_IA64_MCA_DATA(r2)
|
|
// Using MCA stack, struct ia64_sal_os_state, variable proc_state_param
|
|
;;
|
|
add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+SOS(PROC_STATE_PARAM), r2
|
|
;;
|
|
ld8 r18=[r3] // Get processor state parameter on existing PALE_CHECK.
|
|
;;
|
|
tbit.nz p6,p7=r18,60
|
|
(p7) br.spnt done_tlb_purge_and_reload
|
|
|
|
// The following code purges TC and TR entries. Then reload all TC entries.
|
|
// Purge percpu data TC entries.
|
|
begin_tlb_purge_and_reload:
|
|
movl r18=ia64_reload_tr;;
|
|
LOAD_PHYSICAL(p0,r18,ia64_reload_tr);;
|
|
mov b1=r18;;
|
|
br.sptk.many ia64_do_tlb_purge;;
|
|
|
|
ia64_reload_tr:
|
|
// Finally reload the TR registers.
|
|
// 1. Reload DTR/ITR registers for kernel.
|
|
mov r18=KERNEL_TR_PAGE_SHIFT<<2
|
|
movl r17=KERNEL_START
|
|
;;
|
|
mov cr.itir=r18
|
|
mov cr.ifa=r17
|
|
mov r16=IA64_TR_KERNEL
|
|
mov r19=ip
|
|
movl r18=PAGE_KERNEL
|
|
;;
|
|
dep r17=0,r19,0, KERNEL_TR_PAGE_SHIFT
|
|
;;
|
|
or r18=r17,r18
|
|
;;
|
|
itr.i itr[r16]=r18
|
|
;;
|
|
itr.d dtr[r16]=r18
|
|
;;
|
|
srlz.i
|
|
srlz.d
|
|
;;
|
|
// 3. Reload ITR for PAL code.
|
|
GET_THIS_PADDR(r2, ia64_mca_pal_pte)
|
|
;;
|
|
ld8 r18=[r2] // load PAL PTE
|
|
;;
|
|
GET_THIS_PADDR(r2, ia64_mca_pal_base)
|
|
;;
|
|
ld8 r16=[r2] // load PAL vaddr
|
|
mov r19=IA64_GRANULE_SHIFT<<2
|
|
;;
|
|
mov cr.itir=r19
|
|
mov cr.ifa=r16
|
|
mov r20=IA64_TR_PALCODE
|
|
;;
|
|
itr.i itr[r20]=r18
|
|
;;
|
|
srlz.i
|
|
;;
|
|
// 4. Reload DTR for stack.
|
|
mov r16=IA64_KR(CURRENT_STACK)
|
|
;;
|
|
shl r16=r16,IA64_GRANULE_SHIFT
|
|
movl r19=PAGE_OFFSET
|
|
;;
|
|
add r18=r19,r16
|
|
movl r20=PAGE_KERNEL
|
|
;;
|
|
add r16=r20,r16
|
|
mov r19=IA64_GRANULE_SHIFT<<2
|
|
;;
|
|
mov cr.itir=r19
|
|
mov cr.ifa=r18
|
|
mov r20=IA64_TR_CURRENT_STACK
|
|
;;
|
|
itr.d dtr[r20]=r16
|
|
;;
|
|
srlz.d
|
|
|
|
done_tlb_purge_and_reload:
|
|
|
|
// switch to per cpu MCA stack
|
|
mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack
|
|
LOAD_PHYSICAL(p0,r2,1f) // return address
|
|
br.sptk ia64_new_stack
|
|
1:
|
|
|
|
// everything saved, now we can set the kernel registers
|
|
mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack
|
|
LOAD_PHYSICAL(p0,r2,1f) // return address
|
|
br.sptk ia64_set_kernel_registers
|
|
1:
|
|
|
|
// This must be done in physical mode
|
|
GET_IA64_MCA_DATA(r2)
|
|
;;
|
|
mov r7=r2
|
|
|
|
// Enter virtual mode from physical mode
|
|
VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
|
|
|
|
// This code returns to SAL via SOS r2, in general SAL has no unwind
|
|
// data. To get a clean termination when backtracing the C MCA/INIT
|
|
// handler, set a dummy return address of 0 in this routine. That
|
|
// requires that ia64_os_mca_virtual_begin be a global function.
|
|
ENTRY(ia64_os_mca_virtual_begin)
|
|
.prologue
|
|
.save rp,r0
|
|
.body
|
|
|
|
mov ar.rsc=3 // set eager mode for C handler
|
|
mov r2=r7 // see GET_IA64_MCA_DATA above
|
|
;;
|
|
|
|
// Call virtual mode handler
|
|
alloc r14=ar.pfs,0,0,3,0
|
|
;;
|
|
DATA_PA_TO_VA(r2,r7)
|
|
;;
|
|
add out0=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2
|
|
add out1=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2
|
|
add out2=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET, r2
|
|
br.call.sptk.many b0=ia64_mca_handler
|
|
|
|
// Revert back to physical mode before going back to SAL
|
|
PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4)
|
|
ia64_os_mca_virtual_end:
|
|
|
|
END(ia64_os_mca_virtual_begin)
|
|
|
|
// switch back to previous stack
|
|
alloc r14=ar.pfs,0,0,0,0 // remove the MCA handler frame
|
|
mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack
|
|
LOAD_PHYSICAL(p0,r2,1f) // return address
|
|
br.sptk ia64_old_stack
|
|
1:
|
|
|
|
mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack
|
|
LOAD_PHYSICAL(p0,r2,1f) // return address
|
|
br.sptk ia64_state_restore // restore the SAL state
|
|
1:
|
|
|
|
mov b0=r12 // SAL_CHECK return address
|
|
|
|
// release lock
|
|
LOAD_PHYSICAL(p0,r3,ia64_mca_serialize);;
|
|
st4.rel [r3]=r0
|
|
|
|
br b0
|
|
|
|
//EndMain//////////////////////////////////////////////////////////////////////
|
|
|
|
//StartMain////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
// SAL to OS entry point for INIT on all processors. This has been defined for
|
|
// registration purposes with SAL as a part of ia64_mca_init. Monarch and
|
|
// slave INIT have identical processing, except for the value of the
|
|
// sos->monarch flag in r19.
|
|
//
|
|
|
|
ia64_os_init_dispatch_monarch:
|
|
mov r19=1 // Bow, bow, ye lower middle classes!
|
|
br.sptk ia64_os_init_dispatch
|
|
|
|
ia64_os_init_dispatch_slave:
|
|
mov r19=0 // <igor>yeth, mathter</igor>
|
|
|
|
ia64_os_init_dispatch:
|
|
|
|
mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack
|
|
LOAD_PHYSICAL(p0,r2,1f) // return address
|
|
br.sptk ia64_state_save // save the state that is not in minstate
|
|
1:
|
|
|
|
// switch to per cpu INIT stack
|
|
mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack
|
|
LOAD_PHYSICAL(p0,r2,1f) // return address
|
|
br.sptk ia64_new_stack
|
|
1:
|
|
|
|
// everything saved, now we can set the kernel registers
|
|
mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack
|
|
LOAD_PHYSICAL(p0,r2,1f) // return address
|
|
br.sptk ia64_set_kernel_registers
|
|
1:
|
|
|
|
// This must be done in physical mode
|
|
GET_IA64_MCA_DATA(r2)
|
|
;;
|
|
mov r7=r2
|
|
|
|
// Enter virtual mode from physical mode
|
|
VIRTUAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_begin, r4)
|
|
|
|
// This code returns to SAL via SOS r2, in general SAL has no unwind
|
|
// data. To get a clean termination when backtracing the C MCA/INIT
|
|
// handler, set a dummy return address of 0 in this routine. That
|
|
// requires that ia64_os_init_virtual_begin be a global function.
|
|
ENTRY(ia64_os_init_virtual_begin)
|
|
.prologue
|
|
.save rp,r0
|
|
.body
|
|
|
|
mov ar.rsc=3 // set eager mode for C handler
|
|
mov r2=r7 // see GET_IA64_MCA_DATA above
|
|
;;
|
|
|
|
// Call virtual mode handler
|
|
alloc r14=ar.pfs,0,0,3,0
|
|
;;
|
|
DATA_PA_TO_VA(r2,r7)
|
|
;;
|
|
add out0=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2
|
|
add out1=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2
|
|
add out2=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SOS_OFFSET, r2
|
|
br.call.sptk.many b0=ia64_init_handler
|
|
|
|
// Revert back to physical mode before going back to SAL
|
|
PHYSICAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_end, r4)
|
|
ia64_os_init_virtual_end:
|
|
|
|
END(ia64_os_init_virtual_begin)
|
|
|
|
mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack
|
|
LOAD_PHYSICAL(p0,r2,1f) // return address
|
|
br.sptk ia64_state_restore // restore the SAL state
|
|
1:
|
|
|
|
// switch back to previous stack
|
|
alloc r14=ar.pfs,0,0,0,0 // remove the INIT handler frame
|
|
mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack
|
|
LOAD_PHYSICAL(p0,r2,1f) // return address
|
|
br.sptk ia64_old_stack
|
|
1:
|
|
|
|
mov b0=r12 // SAL_CHECK return address
|
|
br b0
|
|
|
|
//EndMain//////////////////////////////////////////////////////////////////////
|
|
|
|
// common defines for the stubs
|
|
#define ms r4
|
|
#define regs r5
|
|
#define temp1 r2 /* careful, it overlaps with input registers */
|
|
#define temp2 r3 /* careful, it overlaps with input registers */
|
|
#define temp3 r7
|
|
#define temp4 r14
|
|
|
|
|
|
//++
|
|
// Name:
|
|
// ia64_state_save()
|
|
//
|
|
// Stub Description:
|
|
//
|
|
// Save the state that is not in minstate. This is sensitive to the layout of
|
|
// struct ia64_sal_os_state in mca.h.
|
|
//
|
|
// r2 contains the return address, r3 contains either
|
|
// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
|
|
//
|
|
// The OS to SAL section of struct ia64_sal_os_state is set to a default
|
|
// value of cold boot (MCA) or warm boot (INIT) and return to the same
|
|
// context. ia64_sal_os_state is also used to hold some registers that
|
|
// need to be saved and restored across the stack switches.
|
|
//
|
|
// Most input registers to this stub come from PAL/SAL
|
|
// r1 os gp, physical
|
|
// r8 pal_proc entry point
|
|
// r9 sal_proc entry point
|
|
// r10 sal gp
|
|
// r11 MCA - rendevzous state, INIT - reason code
|
|
// r12 sal return address
|
|
// r17 pal min_state
|
|
// r18 processor state parameter
|
|
// r19 monarch flag, set by the caller of this routine
|
|
//
|
|
// In addition to the SAL to OS state, this routine saves all the
|
|
// registers that appear in struct pt_regs and struct switch_stack,
|
|
// excluding those that are already in the PAL minstate area. This
|
|
// results in a partial pt_regs and switch_stack, the C code copies the
|
|
// remaining registers from PAL minstate to pt_regs and switch_stack. The
|
|
// resulting structures contain all the state of the original process when
|
|
// MCA/INIT occurred.
|
|
//
|
|
//--
|
|
|
|
ia64_state_save:
|
|
add regs=MCA_SOS_OFFSET, r3
|
|
add ms=MCA_SOS_OFFSET+8, r3
|
|
mov b0=r2 // save return address
|
|
cmp.eq p1,p2=IA64_MCA_CPU_MCA_STACK_OFFSET, r3
|
|
;;
|
|
GET_IA64_MCA_DATA(temp2)
|
|
;;
|
|
add temp1=temp2, regs // struct ia64_sal_os_state on MCA or INIT stack
|
|
add temp2=temp2, ms // struct ia64_sal_os_state+8 on MCA or INIT stack
|
|
;;
|
|
mov regs=temp1 // save the start of sos
|
|
st8 [temp1]=r1,16 // os_gp
|
|
st8 [temp2]=r8,16 // pal_proc
|
|
;;
|
|
st8 [temp1]=r9,16 // sal_proc
|
|
st8 [temp2]=r11,16 // rv_rc
|
|
mov r11=cr.iipa
|
|
;;
|
|
st8 [temp1]=r18 // proc_state_param
|
|
st8 [temp2]=r19 // monarch
|
|
mov r6=IA64_KR(CURRENT)
|
|
add temp1=SOS(SAL_RA), regs
|
|
add temp2=SOS(SAL_GP), regs
|
|
;;
|
|
st8 [temp1]=r12,16 // sal_ra
|
|
st8 [temp2]=r10,16 // sal_gp
|
|
mov r12=cr.isr
|
|
;;
|
|
st8 [temp1]=r17,16 // pal_min_state
|
|
st8 [temp2]=r6,16 // prev_IA64_KR_CURRENT
|
|
mov r6=IA64_KR(CURRENT_STACK)
|
|
;;
|
|
st8 [temp1]=r6,16 // prev_IA64_KR_CURRENT_STACK
|
|
st8 [temp2]=r0,16 // prev_task, starts off as NULL
|
|
mov r6=cr.ifa
|
|
;;
|
|
st8 [temp1]=r12,16 // cr.isr
|
|
st8 [temp2]=r6,16 // cr.ifa
|
|
mov r12=cr.itir
|
|
;;
|
|
st8 [temp1]=r12,16 // cr.itir
|
|
st8 [temp2]=r11,16 // cr.iipa
|
|
mov r12=cr.iim
|
|
;;
|
|
st8 [temp1]=r12 // cr.iim
|
|
(p1) mov r12=IA64_MCA_COLD_BOOT
|
|
(p2) mov r12=IA64_INIT_WARM_BOOT
|
|
mov r6=cr.iha
|
|
add temp1=SOS(OS_STATUS), regs
|
|
;;
|
|
st8 [temp2]=r6 // cr.iha
|
|
add temp2=SOS(CONTEXT), regs
|
|
st8 [temp1]=r12 // os_status, default is cold boot
|
|
mov r6=IA64_MCA_SAME_CONTEXT
|
|
;;
|
|
st8 [temp2]=r6 // context, default is same context
|
|
|
|
// Save the pt_regs data that is not in minstate. The previous code
|
|
// left regs at sos.
|
|
add regs=MCA_PT_REGS_OFFSET-MCA_SOS_OFFSET, regs
|
|
;;
|
|
add temp1=PT(B6), regs
|
|
mov temp3=b6
|
|
mov temp4=b7
|
|
add temp2=PT(B7), regs
|
|
;;
|
|
st8 [temp1]=temp3,PT(AR_CSD)-PT(B6) // save b6
|
|
st8 [temp2]=temp4,PT(AR_SSD)-PT(B7) // save b7
|
|
mov temp3=ar.csd
|
|
mov temp4=ar.ssd
|
|
cover // must be last in group
|
|
;;
|
|
st8 [temp1]=temp3,PT(AR_UNAT)-PT(AR_CSD) // save ar.csd
|
|
st8 [temp2]=temp4,PT(AR_PFS)-PT(AR_SSD) // save ar.ssd
|
|
mov temp3=ar.unat
|
|
mov temp4=ar.pfs
|
|
;;
|
|
st8 [temp1]=temp3,PT(AR_RNAT)-PT(AR_UNAT) // save ar.unat
|
|
st8 [temp2]=temp4,PT(AR_BSPSTORE)-PT(AR_PFS) // save ar.pfs
|
|
mov temp3=ar.rnat
|
|
mov temp4=ar.bspstore
|
|
;;
|
|
st8 [temp1]=temp3,PT(LOADRS)-PT(AR_RNAT) // save ar.rnat
|
|
st8 [temp2]=temp4,PT(AR_FPSR)-PT(AR_BSPSTORE) // save ar.bspstore
|
|
mov temp3=ar.bsp
|
|
;;
|
|
sub temp3=temp3, temp4 // ar.bsp - ar.bspstore
|
|
mov temp4=ar.fpsr
|
|
;;
|
|
shl temp3=temp3,16 // compute ar.rsc to be used for "loadrs"
|
|
;;
|
|
st8 [temp1]=temp3,PT(AR_CCV)-PT(LOADRS) // save loadrs
|
|
st8 [temp2]=temp4,PT(F6)-PT(AR_FPSR) // save ar.fpsr
|
|
mov temp3=ar.ccv
|
|
;;
|
|
st8 [temp1]=temp3,PT(F7)-PT(AR_CCV) // save ar.ccv
|
|
stf.spill [temp2]=f6,PT(F8)-PT(F6)
|
|
;;
|
|
stf.spill [temp1]=f7,PT(F9)-PT(F7)
|
|
stf.spill [temp2]=f8,PT(F10)-PT(F8)
|
|
;;
|
|
stf.spill [temp1]=f9,PT(F11)-PT(F9)
|
|
stf.spill [temp2]=f10
|
|
;;
|
|
stf.spill [temp1]=f11
|
|
|
|
// Save the switch_stack data that is not in minstate nor pt_regs. The
|
|
// previous code left regs at pt_regs.
|
|
add regs=MCA_SWITCH_STACK_OFFSET-MCA_PT_REGS_OFFSET, regs
|
|
;;
|
|
add temp1=SW(F2), regs
|
|
add temp2=SW(F3), regs
|
|
;;
|
|
stf.spill [temp1]=f2,32
|
|
stf.spill [temp2]=f3,32
|
|
;;
|
|
stf.spill [temp1]=f4,32
|
|
stf.spill [temp2]=f5,32
|
|
;;
|
|
stf.spill [temp1]=f12,32
|
|
stf.spill [temp2]=f13,32
|
|
;;
|
|
stf.spill [temp1]=f14,32
|
|
stf.spill [temp2]=f15,32
|
|
;;
|
|
stf.spill [temp1]=f16,32
|
|
stf.spill [temp2]=f17,32
|
|
;;
|
|
stf.spill [temp1]=f18,32
|
|
stf.spill [temp2]=f19,32
|
|
;;
|
|
stf.spill [temp1]=f20,32
|
|
stf.spill [temp2]=f21,32
|
|
;;
|
|
stf.spill [temp1]=f22,32
|
|
stf.spill [temp2]=f23,32
|
|
;;
|
|
stf.spill [temp1]=f24,32
|
|
stf.spill [temp2]=f25,32
|
|
;;
|
|
stf.spill [temp1]=f26,32
|
|
stf.spill [temp2]=f27,32
|
|
;;
|
|
stf.spill [temp1]=f28,32
|
|
stf.spill [temp2]=f29,32
|
|
;;
|
|
stf.spill [temp1]=f30,SW(B2)-SW(F30)
|
|
stf.spill [temp2]=f31,SW(B3)-SW(F31)
|
|
mov temp3=b2
|
|
mov temp4=b3
|
|
;;
|
|
st8 [temp1]=temp3,16 // save b2
|
|
st8 [temp2]=temp4,16 // save b3
|
|
mov temp3=b4
|
|
mov temp4=b5
|
|
;;
|
|
st8 [temp1]=temp3,SW(AR_LC)-SW(B4) // save b4
|
|
st8 [temp2]=temp4 // save b5
|
|
mov temp3=ar.lc
|
|
;;
|
|
st8 [temp1]=temp3 // save ar.lc
|
|
|
|
// FIXME: Some proms are incorrectly accessing the minstate area as
|
|
// cached data. The C code uses region 6, uncached virtual. Ensure
|
|
// that there is no cache data lying around for the first 1K of the
|
|
// minstate area.
|
|
// Remove this code in September 2006, that gives platforms a year to
|
|
// fix their proms and get their customers updated.
|
|
|
|
add r1=32*1,r17
|
|
add r2=32*2,r17
|
|
add r3=32*3,r17
|
|
add r4=32*4,r17
|
|
add r5=32*5,r17
|
|
add r6=32*6,r17
|
|
add r7=32*7,r17
|
|
;;
|
|
fc r17
|
|
fc r1
|
|
fc r2
|
|
fc r3
|
|
fc r4
|
|
fc r5
|
|
fc r6
|
|
fc r7
|
|
add r17=32*8,r17
|
|
add r1=32*8,r1
|
|
add r2=32*8,r2
|
|
add r3=32*8,r3
|
|
add r4=32*8,r4
|
|
add r5=32*8,r5
|
|
add r6=32*8,r6
|
|
add r7=32*8,r7
|
|
;;
|
|
fc r17
|
|
fc r1
|
|
fc r2
|
|
fc r3
|
|
fc r4
|
|
fc r5
|
|
fc r6
|
|
fc r7
|
|
add r17=32*8,r17
|
|
add r1=32*8,r1
|
|
add r2=32*8,r2
|
|
add r3=32*8,r3
|
|
add r4=32*8,r4
|
|
add r5=32*8,r5
|
|
add r6=32*8,r6
|
|
add r7=32*8,r7
|
|
;;
|
|
fc r17
|
|
fc r1
|
|
fc r2
|
|
fc r3
|
|
fc r4
|
|
fc r5
|
|
fc r6
|
|
fc r7
|
|
add r17=32*8,r17
|
|
add r1=32*8,r1
|
|
add r2=32*8,r2
|
|
add r3=32*8,r3
|
|
add r4=32*8,r4
|
|
add r5=32*8,r5
|
|
add r6=32*8,r6
|
|
add r7=32*8,r7
|
|
;;
|
|
fc r17
|
|
fc r1
|
|
fc r2
|
|
fc r3
|
|
fc r4
|
|
fc r5
|
|
fc r6
|
|
fc r7
|
|
|
|
br.sptk b0
|
|
|
|
//EndStub//////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
//++
|
|
// Name:
|
|
// ia64_state_restore()
|
|
//
|
|
// Stub Description:
|
|
//
|
|
// Restore the SAL/OS state. This is sensitive to the layout of struct
|
|
// ia64_sal_os_state in mca.h.
|
|
//
|
|
// r2 contains the return address, r3 contains either
|
|
// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
|
|
//
|
|
// In addition to the SAL to OS state, this routine restores all the
|
|
// registers that appear in struct pt_regs and struct switch_stack,
|
|
// excluding those in the PAL minstate area.
|
|
//
|
|
//--
|
|
|
|
ia64_state_restore:
|
|
// Restore the switch_stack data that is not in minstate nor pt_regs.
|
|
add regs=MCA_SWITCH_STACK_OFFSET, r3
|
|
mov b0=r2 // save return address
|
|
;;
|
|
GET_IA64_MCA_DATA(temp2)
|
|
;;
|
|
add regs=temp2, regs
|
|
;;
|
|
add temp1=SW(F2), regs
|
|
add temp2=SW(F3), regs
|
|
;;
|
|
ldf.fill f2=[temp1],32
|
|
ldf.fill f3=[temp2],32
|
|
;;
|
|
ldf.fill f4=[temp1],32
|
|
ldf.fill f5=[temp2],32
|
|
;;
|
|
ldf.fill f12=[temp1],32
|
|
ldf.fill f13=[temp2],32
|
|
;;
|
|
ldf.fill f14=[temp1],32
|
|
ldf.fill f15=[temp2],32
|
|
;;
|
|
ldf.fill f16=[temp1],32
|
|
ldf.fill f17=[temp2],32
|
|
;;
|
|
ldf.fill f18=[temp1],32
|
|
ldf.fill f19=[temp2],32
|
|
;;
|
|
ldf.fill f20=[temp1],32
|
|
ldf.fill f21=[temp2],32
|
|
;;
|
|
ldf.fill f22=[temp1],32
|
|
ldf.fill f23=[temp2],32
|
|
;;
|
|
ldf.fill f24=[temp1],32
|
|
ldf.fill f25=[temp2],32
|
|
;;
|
|
ldf.fill f26=[temp1],32
|
|
ldf.fill f27=[temp2],32
|
|
;;
|
|
ldf.fill f28=[temp1],32
|
|
ldf.fill f29=[temp2],32
|
|
;;
|
|
ldf.fill f30=[temp1],SW(B2)-SW(F30)
|
|
ldf.fill f31=[temp2],SW(B3)-SW(F31)
|
|
;;
|
|
ld8 temp3=[temp1],16 // restore b2
|
|
ld8 temp4=[temp2],16 // restore b3
|
|
;;
|
|
mov b2=temp3
|
|
mov b3=temp4
|
|
ld8 temp3=[temp1],SW(AR_LC)-SW(B4) // restore b4
|
|
ld8 temp4=[temp2] // restore b5
|
|
;;
|
|
mov b4=temp3
|
|
mov b5=temp4
|
|
ld8 temp3=[temp1] // restore ar.lc
|
|
;;
|
|
mov ar.lc=temp3
|
|
|
|
// Restore the pt_regs data that is not in minstate. The previous code
|
|
// left regs at switch_stack.
|
|
add regs=MCA_PT_REGS_OFFSET-MCA_SWITCH_STACK_OFFSET, regs
|
|
;;
|
|
add temp1=PT(B6), regs
|
|
add temp2=PT(B7), regs
|
|
;;
|
|
ld8 temp3=[temp1],PT(AR_CSD)-PT(B6) // restore b6
|
|
ld8 temp4=[temp2],PT(AR_SSD)-PT(B7) // restore b7
|
|
;;
|
|
mov b6=temp3
|
|
mov b7=temp4
|
|
ld8 temp3=[temp1],PT(AR_UNAT)-PT(AR_CSD) // restore ar.csd
|
|
ld8 temp4=[temp2],PT(AR_PFS)-PT(AR_SSD) // restore ar.ssd
|
|
;;
|
|
mov ar.csd=temp3
|
|
mov ar.ssd=temp4
|
|
ld8 temp3=[temp1] // restore ar.unat
|
|
add temp1=PT(AR_CCV)-PT(AR_UNAT), temp1
|
|
ld8 temp4=[temp2],PT(AR_FPSR)-PT(AR_PFS) // restore ar.pfs
|
|
;;
|
|
mov ar.unat=temp3
|
|
mov ar.pfs=temp4
|
|
// ar.rnat, ar.bspstore, loadrs are restore in ia64_old_stack.
|
|
ld8 temp3=[temp1],PT(F6)-PT(AR_CCV) // restore ar.ccv
|
|
ld8 temp4=[temp2],PT(F7)-PT(AR_FPSR) // restore ar.fpsr
|
|
;;
|
|
mov ar.ccv=temp3
|
|
mov ar.fpsr=temp4
|
|
ldf.fill f6=[temp1],PT(F8)-PT(F6)
|
|
ldf.fill f7=[temp2],PT(F9)-PT(F7)
|
|
;;
|
|
ldf.fill f8=[temp1],PT(F10)-PT(F8)
|
|
ldf.fill f9=[temp2],PT(F11)-PT(F9)
|
|
;;
|
|
ldf.fill f10=[temp1]
|
|
ldf.fill f11=[temp2]
|
|
|
|
// Restore the SAL to OS state. The previous code left regs at pt_regs.
|
|
add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs
|
|
;;
|
|
add temp1=SOS(SAL_RA), regs
|
|
add temp2=SOS(SAL_GP), regs
|
|
;;
|
|
ld8 r12=[temp1],16 // sal_ra
|
|
ld8 r9=[temp2],16 // sal_gp
|
|
;;
|
|
ld8 r22=[temp1],16 // pal_min_state, virtual
|
|
ld8 r13=[temp2],16 // prev_IA64_KR_CURRENT
|
|
;;
|
|
ld8 r16=[temp1],16 // prev_IA64_KR_CURRENT_STACK
|
|
ld8 r20=[temp2],16 // prev_task
|
|
;;
|
|
ld8 temp3=[temp1],16 // cr.isr
|
|
ld8 temp4=[temp2],16 // cr.ifa
|
|
;;
|
|
mov cr.isr=temp3
|
|
mov cr.ifa=temp4
|
|
ld8 temp3=[temp1],16 // cr.itir
|
|
ld8 temp4=[temp2],16 // cr.iipa
|
|
;;
|
|
mov cr.itir=temp3
|
|
mov cr.iipa=temp4
|
|
ld8 temp3=[temp1] // cr.iim
|
|
ld8 temp4=[temp2] // cr.iha
|
|
add temp1=SOS(OS_STATUS), regs
|
|
add temp2=SOS(CONTEXT), regs
|
|
;;
|
|
mov cr.iim=temp3
|
|
mov cr.iha=temp4
|
|
dep r22=0,r22,62,1 // pal_min_state, physical, uncached
|
|
mov IA64_KR(CURRENT)=r13
|
|
ld8 r8=[temp1] // os_status
|
|
ld8 r10=[temp2] // context
|
|
|
|
/* Wire IA64_TR_CURRENT_STACK to the stack that we are resuming to. To
|
|
* avoid any dependencies on the algorithm in ia64_switch_to(), just
|
|
* purge any existing CURRENT_STACK mapping and insert the new one.
|
|
*
|
|
* r16 contains prev_IA64_KR_CURRENT_STACK, r13 contains
|
|
* prev_IA64_KR_CURRENT, these values may have been changed by the C
|
|
* code. Do not use r8, r9, r10, r22, they contain values ready for
|
|
* the return to SAL.
|
|
*/
|
|
|
|
mov r15=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK
|
|
;;
|
|
shl r15=r15,IA64_GRANULE_SHIFT
|
|
;;
|
|
dep r15=-1,r15,61,3 // virtual granule
|
|
mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps
|
|
;;
|
|
ptr.d r15,r18
|
|
;;
|
|
srlz.d
|
|
|
|
extr.u r19=r13,61,3 // r13 = prev_IA64_KR_CURRENT
|
|
shl r20=r16,IA64_GRANULE_SHIFT // r16 = prev_IA64_KR_CURRENT_STACK
|
|
movl r21=PAGE_KERNEL // page properties
|
|
;;
|
|
mov IA64_KR(CURRENT_STACK)=r16
|
|
cmp.ne p6,p0=RGN_KERNEL,r19 // new stack is in the kernel region?
|
|
or r21=r20,r21 // construct PA | page properties
|
|
(p6) br.spnt 1f // the dreaded cpu 0 idle task in region 5:(
|
|
;;
|
|
mov cr.itir=r18
|
|
mov cr.ifa=r13
|
|
mov r20=IA64_TR_CURRENT_STACK
|
|
;;
|
|
itr.d dtr[r20]=r21
|
|
;;
|
|
srlz.d
|
|
1:
|
|
|
|
br.sptk b0
|
|
|
|
//EndStub//////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
//++
|
|
// Name:
|
|
// ia64_new_stack()
|
|
//
|
|
// Stub Description:
|
|
//
|
|
// Switch to the MCA/INIT stack.
|
|
//
|
|
// r2 contains the return address, r3 contains either
|
|
// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
|
|
//
|
|
// On entry RBS is still on the original stack, this routine switches RBS
|
|
// to use the MCA/INIT stack.
|
|
//
|
|
// On entry, sos->pal_min_state is physical, on exit it is virtual.
|
|
//
|
|
//--
|
|
|
|
ia64_new_stack:
|
|
add regs=MCA_PT_REGS_OFFSET, r3
|
|
add temp2=MCA_SOS_OFFSET+SOS(PAL_MIN_STATE), r3
|
|
mov b0=r2 // save return address
|
|
GET_IA64_MCA_DATA(temp1)
|
|
invala
|
|
;;
|
|
add temp2=temp2, temp1 // struct ia64_sal_os_state.pal_min_state on MCA or INIT stack
|
|
add regs=regs, temp1 // struct pt_regs on MCA or INIT stack
|
|
;;
|
|
// Address of minstate area provided by PAL is physical, uncacheable.
|
|
// Convert to Linux virtual address in region 6 for C code.
|
|
ld8 ms=[temp2] // pal_min_state, physical
|
|
;;
|
|
dep temp1=-1,ms,62,2 // set region 6
|
|
mov temp3=IA64_RBS_OFFSET-MCA_PT_REGS_OFFSET
|
|
;;
|
|
st8 [temp2]=temp1 // pal_min_state, virtual
|
|
|
|
add temp4=temp3, regs // start of bspstore on new stack
|
|
;;
|
|
mov ar.bspstore=temp4 // switch RBS to MCA/INIT stack
|
|
;;
|
|
flushrs // must be first in group
|
|
br.sptk b0
|
|
|
|
//EndStub//////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
//++
|
|
// Name:
|
|
// ia64_old_stack()
|
|
//
|
|
// Stub Description:
|
|
//
|
|
// Switch to the old stack.
|
|
//
|
|
// r2 contains the return address, r3 contains either
|
|
// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
|
|
//
|
|
// On entry, pal_min_state is virtual, on exit it is physical.
|
|
//
|
|
// On entry RBS is on the MCA/INIT stack, this routine switches RBS
|
|
// back to the previous stack.
|
|
//
|
|
// The psr is set to all zeroes. SAL return requires either all zeroes or
|
|
// just psr.mc set. Leaving psr.mc off allows INIT to be issued if this
|
|
// code does not perform correctly.
|
|
//
|
|
// The dirty registers at the time of the event were flushed to the
|
|
// MCA/INIT stack in ia64_pt_regs_save(). Restore the dirty registers
|
|
// before reverting to the previous bspstore.
|
|
//--
|
|
|
|
ia64_old_stack:
|
|
add regs=MCA_PT_REGS_OFFSET, r3
|
|
mov b0=r2 // save return address
|
|
GET_IA64_MCA_DATA(temp2)
|
|
LOAD_PHYSICAL(p0,temp1,1f)
|
|
;;
|
|
mov cr.ipsr=r0
|
|
mov cr.ifs=r0
|
|
mov cr.iip=temp1
|
|
;;
|
|
invala
|
|
rfi
|
|
1:
|
|
|
|
add regs=regs, temp2 // struct pt_regs on MCA or INIT stack
|
|
;;
|
|
add temp1=PT(LOADRS), regs
|
|
;;
|
|
ld8 temp2=[temp1],PT(AR_BSPSTORE)-PT(LOADRS) // restore loadrs
|
|
;;
|
|
ld8 temp3=[temp1],PT(AR_RNAT)-PT(AR_BSPSTORE) // restore ar.bspstore
|
|
mov ar.rsc=temp2
|
|
;;
|
|
loadrs
|
|
ld8 temp4=[temp1] // restore ar.rnat
|
|
;;
|
|
mov ar.bspstore=temp3 // back to old stack
|
|
;;
|
|
mov ar.rnat=temp4
|
|
;;
|
|
|
|
br.sptk b0
|
|
|
|
//EndStub//////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
//++
|
|
// Name:
|
|
// ia64_set_kernel_registers()
|
|
//
|
|
// Stub Description:
|
|
//
|
|
// Set the registers that are required by the C code in order to run on an
|
|
// MCA/INIT stack.
|
|
//
|
|
// r2 contains the return address, r3 contains either
|
|
// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
|
|
//
|
|
//--
|
|
|
|
ia64_set_kernel_registers:
|
|
add temp3=MCA_SP_OFFSET, r3
|
|
mov b0=r2 // save return address
|
|
GET_IA64_MCA_DATA(temp1)
|
|
;;
|
|
add r12=temp1, temp3 // kernel stack pointer on MCA/INIT stack
|
|
add r13=temp1, r3 // set current to start of MCA/INIT stack
|
|
add r20=temp1, r3 // physical start of MCA/INIT stack
|
|
;;
|
|
DATA_PA_TO_VA(r12,temp2)
|
|
DATA_PA_TO_VA(r13,temp3)
|
|
;;
|
|
mov IA64_KR(CURRENT)=r13
|
|
|
|
/* Wire IA64_TR_CURRENT_STACK to the MCA/INIT handler stack. To avoid
|
|
* any dependencies on the algorithm in ia64_switch_to(), just purge
|
|
* any existing CURRENT_STACK mapping and insert the new one.
|
|
*/
|
|
|
|
mov r16=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK
|
|
;;
|
|
shl r16=r16,IA64_GRANULE_SHIFT
|
|
;;
|
|
dep r16=-1,r16,61,3 // virtual granule
|
|
mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps
|
|
;;
|
|
ptr.d r16,r18
|
|
;;
|
|
srlz.d
|
|
|
|
shr.u r16=r20,IA64_GRANULE_SHIFT // r20 = physical start of MCA/INIT stack
|
|
movl r21=PAGE_KERNEL // page properties
|
|
;;
|
|
mov IA64_KR(CURRENT_STACK)=r16
|
|
or r21=r20,r21 // construct PA | page properties
|
|
;;
|
|
mov cr.itir=r18
|
|
mov cr.ifa=r13
|
|
mov r20=IA64_TR_CURRENT_STACK
|
|
|
|
movl r17=FPSR_DEFAULT
|
|
;;
|
|
mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value
|
|
;;
|
|
itr.d dtr[r20]=r21
|
|
;;
|
|
srlz.d
|
|
|
|
br.sptk b0
|
|
|
|
//EndStub//////////////////////////////////////////////////////////////////////
|
|
|
|
#undef ms
|
|
#undef regs
|
|
#undef temp1
|
|
#undef temp2
|
|
#undef temp3
|
|
#undef temp4
|
|
|
|
|
|
// Support function for mca.c, it is here to avoid using inline asm. Given the
|
|
// address of an rnat slot, if that address is below the current ar.bspstore
|
|
// then return the contents of that slot, otherwise return the contents of
|
|
// ar.rnat.
|
|
GLOBAL_ENTRY(ia64_get_rnat)
|
|
alloc r14=ar.pfs,1,0,0,0
|
|
mov ar.rsc=0
|
|
;;
|
|
mov r14=ar.bspstore
|
|
;;
|
|
cmp.lt p6,p7=in0,r14
|
|
;;
|
|
(p6) ld8 r8=[in0]
|
|
(p7) mov r8=ar.rnat
|
|
mov ar.rsc=3
|
|
br.ret.sptk.many rp
|
|
END(ia64_get_rnat)
|