123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644 |
- /* If user disable the ASM, such as avoiding bugs in ASM, donot compile it. */
- #if !defined(MD_ST_NO_ASM)
- /*
- * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc.
- * All Rights Reserved.
- */
- #if defined(__ia64__)
- /****************************************************************/
- /*
- * The internal __jmp_buf layout is different from one used
- * by setjmp()/longjmp().
- *
- * Offset Description
- * ------ -----------
- * 0x000 stack pointer (r12)
- * 0x008 gp (r1)
- * 0x010 caller's unat
- * 0x018 fpsr
- * 0x020 r4
- * 0x028 r5
- * 0x030 r6
- * 0x038 r7
- * 0x040 rp (b0)
- * 0x048 b1
- * 0x050 b2
- * 0x058 b3
- * 0x060 b4
- * 0x068 b5
- * 0x070 ar.pfs
- * 0x078 ar.lc
- * 0x080 pr
- * 0x088 ar.bsp
- * 0x090 ar.unat
- * 0x098 &__jmp_buf
- * 0x0a0 ar.rsc
- * 0x0a8 ar.rnat
- * 0x0b0 f2
- * 0x0c0 f3
- * 0x0d0 f4
- * 0x0e0 f5
- * 0x0f0 f16
- * 0x100 f17
- * 0x110 f18
- * 0x120 f19
- * 0x130 f20
- * 0x130 f21
- * 0x140 f22
- * 0x150 f23
- * 0x160 f24
- * 0x170 f25
- * 0x180 f26
- * 0x190 f27
- * 0x1a0 f28
- * 0x1b0 f29
- * 0x1c0 f30
- * 0x1d0 f31
- *
- * Note that the address of __jmp_buf is saved but not used: we assume
- * that the jmp_buf data structure is never moved around in memory.
- */
- /*
- * Implemented according to "IA-64 Software Conventions and Runtime
- * Architecture Guide", Chapter 10: "Context Management".
- */
- .text
- .psr abi64
- .psr lsb
- .lsb
- /* _st_md_cxt_save(__jmp_buf env) */
- .align 32
- .global _st_md_cxt_save
- .proc _st_md_cxt_save
- _st_md_cxt_save:
- alloc r14 = ar.pfs,1,0,0,0
- mov r16 = ar.unat
- ;;
- mov r17 = ar.fpsr
- mov r2 = in0
- add r3 = 8,in0
- ;;
- st8.spill.nta [r2] = sp,16 // r12 (sp)
- ;;
- st8.spill.nta [r3] = gp,16 // r1 (gp)
- ;;
- st8.nta [r2] = r16,16 // save caller's unat
- st8.nta [r3] = r17,16 // save fpsr
- add r8 = 0xb0,in0
- ;;
- st8.spill.nta [r2] = r4,16 // r4
- ;;
- st8.spill.nta [r3] = r5,16 // r5
- add r9 = 0xc0,in0
- ;;
- stf.spill.nta [r8] = f2,32
- stf.spill.nta [r9] = f3,32
- mov r15 = rp
- ;;
- stf.spill.nta [r8] = f4,32
- stf.spill.nta [r9] = f5,32
- mov r17 = b1
- ;;
- stf.spill.nta [r8] = f16,32
- stf.spill.nta [r9] = f17,32
- mov r18 = b2
- ;;
- stf.spill.nta [r8] = f18,32
- stf.spill.nta [r9] = f19,32
- mov r19 = b3
- ;;
- stf.spill.nta [r8] = f20,32
- stf.spill.nta [r9] = f21,32
- mov r20 = b4
- ;;
- stf.spill.nta [r8] = f22,32
- stf.spill.nta [r9] = f23,32
- mov r21 = b5
- ;;
- stf.spill.nta [r8] = f24,32
- stf.spill.nta [r9] = f25,32
- mov r22 = ar.lc
- ;;
- stf.spill.nta [r8] = f26,32
- stf.spill.nta [r9] = f27,32
- mov r24 = pr
- ;;
- stf.spill.nta [r8] = f28,32
- stf.spill.nta [r9] = f29,32
- ;;
- stf.spill.nta [r8] = f30
- stf.spill.nta [r9] = f31
- st8.spill.nta [r2] = r6,16 // r6
- ;;
- st8.spill.nta [r3] = r7,16 // r7
- ;;
- mov r23 = ar.bsp
- mov r25 = ar.unat
- st8.nta [r2] = r15,16 // b0
- st8.nta [r3] = r17,16 // b1
- ;;
- st8.nta [r2] = r18,16 // b2
- st8.nta [r3] = r19,16 // b3
- mov r26 = ar.rsc
- ;;
- st8.nta [r2] = r20,16 // b4
- st8.nta [r3] = r21,16 // b5
- ;;
- st8.nta [r2] = r14,16 // ar.pfs
- st8.nta [r3] = r22,16 // ar.lc
- ;;
- st8.nta [r2] = r24,16 // pr
- st8.nta [r3] = r23,16 // ar.bsp
- ;;
- st8.nta [r2] = r25,16 // ar.unat
- st8.nta [r3] = in0,16 // &__jmp_buf (just in case)
- ;;
- st8.nta [r2] = r26 // ar.rsc
- ;;
- flushrs // flush dirty regs to backing store
- ;;
- and r27 = ~0x3,r26 // clear ar.rsc.mode
- ;;
- mov ar.rsc = r27 // put RSE in enforced lazy mode
- ;;
- mov r28 = ar.rnat
- ;;
- st8.nta [r3] = r28 // ar.rnat
- mov ar.rsc = r26 // restore ar.rsc
- ;;
- mov r8 = 0
- br.ret.sptk.few b0
- .endp _st_md_cxt_save
- /****************************************************************/
- /* _st_md_cxt_restore(__jmp_buf env, int val) */
- .global _st_md_cxt_restore
- .proc _st_md_cxt_restore
- _st_md_cxt_restore:
- alloc r8 = ar.pfs,2,0,0,0
- add r2 = 0x88,in0 // r2 <- &jmpbuf.ar_bsp
- mov r16 = ar.rsc
- ;;
- flushrs // flush dirty regs to backing store
- ;;
- and r17 = ~0x3,r16 // clear ar.rsc.mode
- ;;
- mov ar.rsc = r17 // put RSE in enforced lazy mode
- ;;
- invala // invalidate the ALAT
- ;;
- ld8 r23 = [r2],8 // r23 <- jmpbuf.ar_bsp
- ;;
- mov ar.bspstore = r23 // write BSPSTORE
- ld8 r25 = [r2],24 // r25 <- jmpbuf.ar_unat
- ;;
- ld8 r26 = [r2],-8 // r26 <- jmpbuf.ar_rnat
- ;;
- mov ar.rnat = r26 // write RNAT
- ld8 r27 = [r2] // r27 <- jmpbuf.ar_rsc
- ;;
- mov ar.rsc = r27 // write RSE control
- mov r2 = in0
- ;;
- mov ar.unat = r25 // write ar.unat
- add r3 = 8,in0
- ;;
- ld8.fill.nta sp = [r2],16 // r12 (sp)
- ld8.fill.nta gp = [r3],16 // r1 (gp)
- ;;
- ld8.nta r16 = [r2],16 // caller's unat
- ld8.nta r17 = [r3],16 // fpsr
- ;;
- ld8.fill.nta r4 = [r2],16 // r4
- ld8.fill.nta r5 = [r3],16 // r5
- ;;
- ld8.fill.nta r6 = [r2],16 // r6
- ld8.fill.nta r7 = [r3],16 // r7
- ;;
- mov ar.unat = r16 // restore caller's unat
- mov ar.fpsr = r17 // restore fpsr
- ;;
- ld8.nta r16 = [r2],16 // b0
- ld8.nta r17 = [r3],16 // b1
- ;;
- ld8.nta r18 = [r2],16 // b2
- ld8.nta r19 = [r3],16 // b3
- ;;
- ld8.nta r20 = [r2],16 // b4
- ld8.nta r21 = [r3],16 // b5
- ;;
- ld8.nta r11 = [r2],16 // ar.pfs
- ld8.nta r22 = [r3],72 // ar.lc
- ;;
- ld8.nta r24 = [r2],48 // pr
- mov b0 = r16
- ;;
- ldf.fill.nta f2 = [r2],32
- ldf.fill.nta f3 = [r3],32
- mov b1 = r17
- ;;
- ldf.fill.nta f4 = [r2],32
- ldf.fill.nta f5 = [r3],32
- mov b2 = r18
- ;;
- ldf.fill.nta f16 = [r2],32
- ldf.fill.nta f17 = [r3],32
- mov b3 = r19
- ;;
- ldf.fill.nta f18 = [r2],32
- ldf.fill.nta f19 = [r3],32
- mov b4 = r20
- ;;
- ldf.fill.nta f20 = [r2],32
- ldf.fill.nta f21 = [r3],32
- mov b5 = r21
- ;;
- ldf.fill.nta f22 = [r2],32
- ldf.fill.nta f23 = [r3],32
- mov ar.lc = r22
- ;;
- ldf.fill.nta f24 = [r2],32
- ldf.fill.nta f25 = [r3],32
- cmp.eq p6,p7 = 0,in1
- ;;
- ldf.fill.nta f26 = [r2],32
- ldf.fill.nta f27 = [r3],32
- mov ar.pfs = r11
- ;;
- ldf.fill.nta f28 = [r2],32
- ldf.fill.nta f29 = [r3],32
- ;;
- ldf.fill.nta f30 = [r2]
- ldf.fill.nta f31 = [r3]
- (p6) mov r8 = 1
- (p7) mov r8 = in1
- mov pr = r24,-1
- br.ret.sptk.few b0
- .endp _st_md_cxt_restore
- /****************************************************************/
- #elif defined(__i386__)
- /****************************************************************/
- /*
- * Internal __jmp_buf layout
- */
- #define JB_BX 0
- #define JB_SI 1
- #define JB_DI 2
- #define JB_BP 3
- #define JB_SP 4
- #define JB_PC 5
- .file "md.S"
- .text
- /* _st_md_cxt_save(__jmp_buf env) */
- .globl _st_md_cxt_save
- .type _st_md_cxt_save, @function
- .align 16
- _st_md_cxt_save:
- movl 4(%esp), %eax
- /*
- * Save registers.
- */
- movl %ebx, (JB_BX*4)(%eax)
- movl %esi, (JB_SI*4)(%eax)
- movl %edi, (JB_DI*4)(%eax)
- /* Save SP */
- leal 4(%esp), %ecx
- movl %ecx, (JB_SP*4)(%eax)
- /* Save PC we are returning to */
- movl 0(%esp), %ecx
- movl %ecx, (JB_PC*4)(%eax)
- /* Save caller frame pointer */
- movl %ebp, (JB_BP*4)(%eax)
- xorl %eax, %eax
- ret
- .size _st_md_cxt_save, .-_st_md_cxt_save
- /****************************************************************/
- /* _st_md_cxt_restore(__jmp_buf env, int val) */
- .globl _st_md_cxt_restore
- .type _st_md_cxt_restore, @function
- .align 16
- _st_md_cxt_restore:
- /* First argument is jmp_buf */
- movl 4(%esp), %ecx
- /* Second argument is return value */
- movl 8(%esp), %eax
- /* Set the return address */
- movl (JB_PC*4)(%ecx), %edx
- /*
- * Restore registers.
- */
- movl (JB_BX*4)(%ecx), %ebx
- movl (JB_SI*4)(%ecx), %esi
- movl (JB_DI*4)(%ecx), %edi
- movl (JB_BP*4)(%ecx), %ebp
- movl (JB_SP*4)(%ecx), %esp
- testl %eax, %eax
- jnz 1f
- incl %eax
- /* Jump to saved PC */
- 1: jmp *%edx
- .size _st_md_cxt_restore, .-_st_md_cxt_restore
- /****************************************************************/
- #elif defined(__amd64__) || defined(__x86_64__)
- /****************************************************************/
- /*
- * Internal __jmp_buf layout
- */
- #define JB_RBX 0
- #define JB_RBP 1
- #define JB_R12 2
- #define JB_R13 3
- #define JB_R14 4
- #define JB_R15 5
- #define JB_RSP 6
- #define JB_PC 7
- .file "md.S"
- .text
- /* _st_md_cxt_save(__jmp_buf env) */
- .globl _st_md_cxt_save
- .type _st_md_cxt_save, @function
- .align 16
- _st_md_cxt_save:
- /*
- * Save registers.
- */
- movq %rbx, (JB_RBX*8)(%rdi)
- movq %rbp, (JB_RBP*8)(%rdi)
- movq %r12, (JB_R12*8)(%rdi)
- movq %r13, (JB_R13*8)(%rdi)
- movq %r14, (JB_R14*8)(%rdi)
- movq %r15, (JB_R15*8)(%rdi)
- /* Save SP */
- leaq 8(%rsp), %rdx
- movq %rdx, (JB_RSP*8)(%rdi)
- /* Save PC we are returning to */
- movq (%rsp), %rax
- movq %rax, (JB_PC*8)(%rdi)
- xorq %rax, %rax
- ret
- .size _st_md_cxt_save, .-_st_md_cxt_save
- /****************************************************************/
- /* _st_md_cxt_restore(__jmp_buf env, int val) */
- .globl _st_md_cxt_restore
- .type _st_md_cxt_restore, @function
- .align 16
- _st_md_cxt_restore:
- /*
- * Restore registers.
- */
- movq (JB_RBX*8)(%rdi), %rbx
- movq (JB_RBP*8)(%rdi), %rbp
- movq (JB_R12*8)(%rdi), %r12
- movq (JB_R13*8)(%rdi), %r13
- movq (JB_R14*8)(%rdi), %r14
- movq (JB_R15*8)(%rdi), %r15
- /* Set return value */
- test %esi, %esi
- mov $01, %eax
- cmove %eax, %esi
- mov %esi, %eax
- movq (JB_PC*8)(%rdi), %rdx
- movq (JB_RSP*8)(%rdi), %rsp
- /* Jump to saved PC */
- jmpq *%rdx
- .size _st_md_cxt_restore, .-_st_md_cxt_restore
- /****************************************************************/
- #elif defined(__aarch64__)
- /****************************************************************/
- /* https://github.com/ossrs/srs/issues/1282#issuecomment-445539513 */
- #define JB_X19 0
- #define JB_X20 1
- #define JB_X21 2
- #define JB_X22 3
- #define JB_X23 4
- #define JB_X24 5
- #define JB_X25 6
- #define JB_X26 7
- #define JB_X27 8
- #define JB_X28 9
- #define JB_X29 10
- #define JB_LR 11
- #define JB_SP 13
- #define JB_D8 14
- #define JB_D9 15
- #define JB_D10 16
- #define JB_D11 17
- #define JB_D12 18
- #define JB_D13 19
- #define JB_D14 20
- #define JB_D15 21
- .file "md.S"
- .text
- /* _st_md_cxt_save(__jmp_buf env) */
- .globl _st_md_cxt_save
- .type _st_md_cxt_save, %function
- .align 4
- _st_md_cxt_save:
- stp x19, x20, [x0, #JB_X19<<3]
- stp x21, x22, [x0, #JB_X21<<3]
- stp x23, x24, [x0, #JB_X23<<3]
- stp x25, x26, [x0, #JB_X25<<3]
- stp x27, x28, [x0, #JB_X27<<3]
- stp x29, x30, [x0, #JB_X29<<3]
- stp d8, d9, [x0, #JB_D8<<3]
- stp d10, d11, [x0, #JB_D10<<3]
- stp d12, d13, [x0, #JB_D12<<3]
- stp d14, d15, [x0, #JB_D14<<3]
- mov x2, sp
- str x2, [x0, #JB_SP<<3]
- mov x0, #0
- ret
- .size _st_md_cxt_save, .-_st_md_cxt_save
- /****************************************************************/
- /* _st_md_cxt_restore(__jmp_buf env, int val) */
- .globl _st_md_cxt_restore
- .type _st_md_cxt_restore, %function
- .align 4
- _st_md_cxt_restore:
- ldp x19, x20, [x0, #JB_X19<<3]
- ldp x21, x22, [x0, #JB_X21<<3]
- ldp x23, x24, [x0, #JB_X23<<3]
- ldp x25, x26, [x0, #JB_X25<<3]
- ldp x27, x28, [x0, #JB_X27<<3]
- ldp x29, x30, [x0, #JB_X29<<3]
- ldp d8, d9, [x0, #JB_D8<<3]
- ldp d10, d11, [x0, #JB_D10<<3]
- ldp d12, d13, [x0, #JB_D12<<3]
- ldp d14, d15, [x0, #JB_D14<<3]
- ldr x5, [x0, #JB_SP<<3]
- mov sp, x5
- cmp x1, #0
- mov x0, #1
- csel x0, x1, x0, ne
- /* Use br instead of ret because ret is guaranteed to mispredict */
- br x30
- .size _st_md_cxt_restore, .-_st_md_cxt_restore
- /****************************************************************/
- #elif defined(__arm__)
- /****************************************************************/
- /* https://github.com/ossrs/srs/issues/1282#issuecomment-445539513 */
- /* Register list for a ldm/stm instruction to load/store
- the general registers from a __jmp_buf. */
- # define JMP_BUF_REGLIST {v1-v6, sl, fp, sp, lr}
- .file "md.S"
- .text
- /* _st_md_cxt_save(__jmp_buf env) */
- .globl _st_md_cxt_save
- .type _st_md_cxt_save, %function
- .align 2
- _st_md_cxt_save:
- mov ip, r0
- /* Save registers */
- stmia ip!, JMP_BUF_REGLIST
- #ifdef __VFP_FP__
- /* Store the VFP registers. */
- /* Following instruction is vstmia ip!, {d8-d15}. */
- stc p11, cr8, [ip], #64
- #endif
- #ifdef __IWMMXT__
- /* Save the call-preserved iWMMXt registers. */
- /* Following instructions are wstrd wr10, [ip], #8 (etc.) */
- stcl p1, cr10, [r12], #8
- stcl p1, cr11, [r12], #8
- stcl p1, cr12, [r12], #8
- stcl p1, cr13, [r12], #8
- stcl p1, cr14, [r12], #8
- stcl p1, cr15, [r12], #8
- #endif
- mov r0, #0
- bx lr
- .size _st_md_cxt_save, .-_st_md_cxt_save
- /****************************************************************/
- /* _st_md_cxt_restore(__jmp_buf env, int val) */
- .globl _st_md_cxt_restore
- .type _st_md_cxt_restore, %function
- .align 2
- _st_md_cxt_restore:
- mov ip, r0
- /* Restore registers */
- ldmia ip!, JMP_BUF_REGLIST
- #ifdef __VFP_FP__
- /* Restore the VFP registers. */
- /* Following instruction is vldmia ip!, {d8-d15}. */
- ldc p11, cr8, [r12], #64
- #endif
- #ifdef __IWMMXT__
- /* Restore the call-preserved iWMMXt registers. */
- /* Following instructions are wldrd wr10, [ip], #8 (etc.) */
- ldcl p1, cr10, [r12], #8
- ldcl p1, cr11, [r12], #8
- ldcl p1, cr12, [r12], #8
- ldcl p1, cr13, [r12], #8
- ldcl p1, cr14, [r12], #8
- ldcl p1, cr15, [r12], #8
- #endif
- movs r0, r1 /* get the return value in place */
- moveq r0, #1 /* can't let setjmp() return zero! */
- bx lr
- .size _st_md_cxt_restore, .-_st_md_cxt_restore
- /****************************************************************/
- #endif
- #endif
|