2
0

md.S 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. /* If user disable the ASM, such as avoiding bugs in ASM, donot compile it. */
  2. #if !defined(MD_ST_NO_ASM)
  3. /*
  4. * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc.
  5. * All Rights Reserved.
  6. */
  7. #if defined(__ia64__)
  8. /****************************************************************/
  9. /*
  10. * The internal __jmp_buf layout is different from one used
  11. * by setjmp()/longjmp().
  12. *
  13. * Offset Description
  14. * ------ -----------
  15. * 0x000 stack pointer (r12)
  16. * 0x008 gp (r1)
  17. * 0x010 caller's unat
  18. * 0x018 fpsr
  19. * 0x020 r4
  20. * 0x028 r5
  21. * 0x030 r6
  22. * 0x038 r7
  23. * 0x040 rp (b0)
  24. * 0x048 b1
  25. * 0x050 b2
  26. * 0x058 b3
  27. * 0x060 b4
  28. * 0x068 b5
  29. * 0x070 ar.pfs
  30. * 0x078 ar.lc
  31. * 0x080 pr
  32. * 0x088 ar.bsp
  33. * 0x090 ar.unat
  34. * 0x098 &__jmp_buf
  35. * 0x0a0 ar.rsc
  36. * 0x0a8 ar.rnat
  37. * 0x0b0 f2
  38. * 0x0c0 f3
  39. * 0x0d0 f4
  40. * 0x0e0 f5
  41. * 0x0f0 f16
  42. * 0x100 f17
  43. * 0x110 f18
  44. * 0x120 f19
  45. * 0x130 f20
  46. * 0x130 f21
  47. * 0x140 f22
  48. * 0x150 f23
  49. * 0x160 f24
  50. * 0x170 f25
  51. * 0x180 f26
  52. * 0x190 f27
  53. * 0x1a0 f28
  54. * 0x1b0 f29
  55. * 0x1c0 f30
  56. * 0x1d0 f31
  57. *
  58. * Note that the address of __jmp_buf is saved but not used: we assume
  59. * that the jmp_buf data structure is never moved around in memory.
  60. */
  61. /*
  62. * Implemented according to "IA-64 Software Conventions and Runtime
  63. * Architecture Guide", Chapter 10: "Context Management".
  64. */
  65. .text
  66. .psr abi64
  67. .psr lsb
  68. .lsb
  69. /* _st_md_cxt_save(__jmp_buf env) */
  70. .align 32
  71. .global _st_md_cxt_save
  72. .proc _st_md_cxt_save
  73. _st_md_cxt_save:
  74. alloc r14 = ar.pfs,1,0,0,0
  75. mov r16 = ar.unat
  76. ;;
  77. mov r17 = ar.fpsr
  78. mov r2 = in0
  79. add r3 = 8,in0
  80. ;;
  81. st8.spill.nta [r2] = sp,16 // r12 (sp)
  82. ;;
  83. st8.spill.nta [r3] = gp,16 // r1 (gp)
  84. ;;
  85. st8.nta [r2] = r16,16 // save caller's unat
  86. st8.nta [r3] = r17,16 // save fpsr
  87. add r8 = 0xb0,in0
  88. ;;
  89. st8.spill.nta [r2] = r4,16 // r4
  90. ;;
  91. st8.spill.nta [r3] = r5,16 // r5
  92. add r9 = 0xc0,in0
  93. ;;
  94. stf.spill.nta [r8] = f2,32
  95. stf.spill.nta [r9] = f3,32
  96. mov r15 = rp
  97. ;;
  98. stf.spill.nta [r8] = f4,32
  99. stf.spill.nta [r9] = f5,32
  100. mov r17 = b1
  101. ;;
  102. stf.spill.nta [r8] = f16,32
  103. stf.spill.nta [r9] = f17,32
  104. mov r18 = b2
  105. ;;
  106. stf.spill.nta [r8] = f18,32
  107. stf.spill.nta [r9] = f19,32
  108. mov r19 = b3
  109. ;;
  110. stf.spill.nta [r8] = f20,32
  111. stf.spill.nta [r9] = f21,32
  112. mov r20 = b4
  113. ;;
  114. stf.spill.nta [r8] = f22,32
  115. stf.spill.nta [r9] = f23,32
  116. mov r21 = b5
  117. ;;
  118. stf.spill.nta [r8] = f24,32
  119. stf.spill.nta [r9] = f25,32
  120. mov r22 = ar.lc
  121. ;;
  122. stf.spill.nta [r8] = f26,32
  123. stf.spill.nta [r9] = f27,32
  124. mov r24 = pr
  125. ;;
  126. stf.spill.nta [r8] = f28,32
  127. stf.spill.nta [r9] = f29,32
  128. ;;
  129. stf.spill.nta [r8] = f30
  130. stf.spill.nta [r9] = f31
  131. st8.spill.nta [r2] = r6,16 // r6
  132. ;;
  133. st8.spill.nta [r3] = r7,16 // r7
  134. ;;
  135. mov r23 = ar.bsp
  136. mov r25 = ar.unat
  137. st8.nta [r2] = r15,16 // b0
  138. st8.nta [r3] = r17,16 // b1
  139. ;;
  140. st8.nta [r2] = r18,16 // b2
  141. st8.nta [r3] = r19,16 // b3
  142. mov r26 = ar.rsc
  143. ;;
  144. st8.nta [r2] = r20,16 // b4
  145. st8.nta [r3] = r21,16 // b5
  146. ;;
  147. st8.nta [r2] = r14,16 // ar.pfs
  148. st8.nta [r3] = r22,16 // ar.lc
  149. ;;
  150. st8.nta [r2] = r24,16 // pr
  151. st8.nta [r3] = r23,16 // ar.bsp
  152. ;;
  153. st8.nta [r2] = r25,16 // ar.unat
  154. st8.nta [r3] = in0,16 // &__jmp_buf (just in case)
  155. ;;
  156. st8.nta [r2] = r26 // ar.rsc
  157. ;;
  158. flushrs // flush dirty regs to backing store
  159. ;;
  160. and r27 = ~0x3,r26 // clear ar.rsc.mode
  161. ;;
  162. mov ar.rsc = r27 // put RSE in enforced lazy mode
  163. ;;
  164. mov r28 = ar.rnat
  165. ;;
  166. st8.nta [r3] = r28 // ar.rnat
  167. mov ar.rsc = r26 // restore ar.rsc
  168. ;;
  169. mov r8 = 0
  170. br.ret.sptk.few b0
  171. .endp _st_md_cxt_save
  172. /****************************************************************/
  173. /* _st_md_cxt_restore(__jmp_buf env, int val) */
  174. .global _st_md_cxt_restore
  175. .proc _st_md_cxt_restore
  176. _st_md_cxt_restore:
  177. alloc r8 = ar.pfs,2,0,0,0
  178. add r2 = 0x88,in0 // r2 <- &jmpbuf.ar_bsp
  179. mov r16 = ar.rsc
  180. ;;
  181. flushrs // flush dirty regs to backing store
  182. ;;
  183. and r17 = ~0x3,r16 // clear ar.rsc.mode
  184. ;;
  185. mov ar.rsc = r17 // put RSE in enforced lazy mode
  186. ;;
  187. invala // invalidate the ALAT
  188. ;;
  189. ld8 r23 = [r2],8 // r23 <- jmpbuf.ar_bsp
  190. ;;
  191. mov ar.bspstore = r23 // write BSPSTORE
  192. ld8 r25 = [r2],24 // r25 <- jmpbuf.ar_unat
  193. ;;
  194. ld8 r26 = [r2],-8 // r26 <- jmpbuf.ar_rnat
  195. ;;
  196. mov ar.rnat = r26 // write RNAT
  197. ld8 r27 = [r2] // r27 <- jmpbuf.ar_rsc
  198. ;;
  199. mov ar.rsc = r27 // write RSE control
  200. mov r2 = in0
  201. ;;
  202. mov ar.unat = r25 // write ar.unat
  203. add r3 = 8,in0
  204. ;;
  205. ld8.fill.nta sp = [r2],16 // r12 (sp)
  206. ld8.fill.nta gp = [r3],16 // r1 (gp)
  207. ;;
  208. ld8.nta r16 = [r2],16 // caller's unat
  209. ld8.nta r17 = [r3],16 // fpsr
  210. ;;
  211. ld8.fill.nta r4 = [r2],16 // r4
  212. ld8.fill.nta r5 = [r3],16 // r5
  213. ;;
  214. ld8.fill.nta r6 = [r2],16 // r6
  215. ld8.fill.nta r7 = [r3],16 // r7
  216. ;;
  217. mov ar.unat = r16 // restore caller's unat
  218. mov ar.fpsr = r17 // restore fpsr
  219. ;;
  220. ld8.nta r16 = [r2],16 // b0
  221. ld8.nta r17 = [r3],16 // b1
  222. ;;
  223. ld8.nta r18 = [r2],16 // b2
  224. ld8.nta r19 = [r3],16 // b3
  225. ;;
  226. ld8.nta r20 = [r2],16 // b4
  227. ld8.nta r21 = [r3],16 // b5
  228. ;;
  229. ld8.nta r11 = [r2],16 // ar.pfs
  230. ld8.nta r22 = [r3],72 // ar.lc
  231. ;;
  232. ld8.nta r24 = [r2],48 // pr
  233. mov b0 = r16
  234. ;;
  235. ldf.fill.nta f2 = [r2],32
  236. ldf.fill.nta f3 = [r3],32
  237. mov b1 = r17
  238. ;;
  239. ldf.fill.nta f4 = [r2],32
  240. ldf.fill.nta f5 = [r3],32
  241. mov b2 = r18
  242. ;;
  243. ldf.fill.nta f16 = [r2],32
  244. ldf.fill.nta f17 = [r3],32
  245. mov b3 = r19
  246. ;;
  247. ldf.fill.nta f18 = [r2],32
  248. ldf.fill.nta f19 = [r3],32
  249. mov b4 = r20
  250. ;;
  251. ldf.fill.nta f20 = [r2],32
  252. ldf.fill.nta f21 = [r3],32
  253. mov b5 = r21
  254. ;;
  255. ldf.fill.nta f22 = [r2],32
  256. ldf.fill.nta f23 = [r3],32
  257. mov ar.lc = r22
  258. ;;
  259. ldf.fill.nta f24 = [r2],32
  260. ldf.fill.nta f25 = [r3],32
  261. cmp.eq p6,p7 = 0,in1
  262. ;;
  263. ldf.fill.nta f26 = [r2],32
  264. ldf.fill.nta f27 = [r3],32
  265. mov ar.pfs = r11
  266. ;;
  267. ldf.fill.nta f28 = [r2],32
  268. ldf.fill.nta f29 = [r3],32
  269. ;;
  270. ldf.fill.nta f30 = [r2]
  271. ldf.fill.nta f31 = [r3]
  272. (p6) mov r8 = 1
  273. (p7) mov r8 = in1
  274. mov pr = r24,-1
  275. br.ret.sptk.few b0
  276. .endp _st_md_cxt_restore
  277. /****************************************************************/
  278. #elif defined(__i386__)
  279. /****************************************************************/
  280. /*
  281. * Internal __jmp_buf layout
  282. */
  283. #define JB_BX 0
  284. #define JB_SI 1
  285. #define JB_DI 2
  286. #define JB_BP 3
  287. #define JB_SP 4
  288. #define JB_PC 5
  289. .file "md.S"
  290. .text
  291. /* _st_md_cxt_save(__jmp_buf env) */
  292. .globl _st_md_cxt_save
  293. .type _st_md_cxt_save, @function
  294. .align 16
  295. _st_md_cxt_save:
  296. movl 4(%esp), %eax
  297. /*
  298. * Save registers.
  299. */
  300. movl %ebx, (JB_BX*4)(%eax)
  301. movl %esi, (JB_SI*4)(%eax)
  302. movl %edi, (JB_DI*4)(%eax)
  303. /* Save SP */
  304. leal 4(%esp), %ecx
  305. movl %ecx, (JB_SP*4)(%eax)
  306. /* Save PC we are returning to */
  307. movl 0(%esp), %ecx
  308. movl %ecx, (JB_PC*4)(%eax)
  309. /* Save caller frame pointer */
  310. movl %ebp, (JB_BP*4)(%eax)
  311. xorl %eax, %eax
  312. ret
  313. .size _st_md_cxt_save, .-_st_md_cxt_save
  314. /****************************************************************/
  315. /* _st_md_cxt_restore(__jmp_buf env, int val) */
  316. .globl _st_md_cxt_restore
  317. .type _st_md_cxt_restore, @function
  318. .align 16
  319. _st_md_cxt_restore:
  320. /* First argument is jmp_buf */
  321. movl 4(%esp), %ecx
  322. /* Second argument is return value */
  323. movl 8(%esp), %eax
  324. /* Set the return address */
  325. movl (JB_PC*4)(%ecx), %edx
  326. /*
  327. * Restore registers.
  328. */
  329. movl (JB_BX*4)(%ecx), %ebx
  330. movl (JB_SI*4)(%ecx), %esi
  331. movl (JB_DI*4)(%ecx), %edi
  332. movl (JB_BP*4)(%ecx), %ebp
  333. movl (JB_SP*4)(%ecx), %esp
  334. testl %eax, %eax
  335. jnz 1f
  336. incl %eax
  337. /* Jump to saved PC */
  338. 1: jmp *%edx
  339. .size _st_md_cxt_restore, .-_st_md_cxt_restore
  340. /****************************************************************/
  341. #elif defined(__amd64__) || defined(__x86_64__)
  342. /****************************************************************/
  343. /*
  344. * Internal __jmp_buf layout
  345. */
  346. #define JB_RBX 0
  347. #define JB_RBP 1
  348. #define JB_R12 2
  349. #define JB_R13 3
  350. #define JB_R14 4
  351. #define JB_R15 5
  352. #define JB_RSP 6
  353. #define JB_PC 7
  354. .file "md.S"
  355. .text
  356. /* _st_md_cxt_save(__jmp_buf env) */
  357. .globl _st_md_cxt_save
  358. .type _st_md_cxt_save, @function
  359. .align 16
  360. _st_md_cxt_save:
  361. /*
  362. * Save registers.
  363. */
  364. movq %rbx, (JB_RBX*8)(%rdi)
  365. movq %rbp, (JB_RBP*8)(%rdi)
  366. movq %r12, (JB_R12*8)(%rdi)
  367. movq %r13, (JB_R13*8)(%rdi)
  368. movq %r14, (JB_R14*8)(%rdi)
  369. movq %r15, (JB_R15*8)(%rdi)
  370. /* Save SP */
  371. leaq 8(%rsp), %rdx
  372. movq %rdx, (JB_RSP*8)(%rdi)
  373. /* Save PC we are returning to */
  374. movq (%rsp), %rax
  375. movq %rax, (JB_PC*8)(%rdi)
  376. xorq %rax, %rax
  377. ret
  378. .size _st_md_cxt_save, .-_st_md_cxt_save
  379. /****************************************************************/
  380. /* _st_md_cxt_restore(__jmp_buf env, int val) */
  381. .globl _st_md_cxt_restore
  382. .type _st_md_cxt_restore, @function
  383. .align 16
  384. _st_md_cxt_restore:
  385. /*
  386. * Restore registers.
  387. */
  388. movq (JB_RBX*8)(%rdi), %rbx
  389. movq (JB_RBP*8)(%rdi), %rbp
  390. movq (JB_R12*8)(%rdi), %r12
  391. movq (JB_R13*8)(%rdi), %r13
  392. movq (JB_R14*8)(%rdi), %r14
  393. movq (JB_R15*8)(%rdi), %r15
  394. /* Set return value */
  395. test %esi, %esi
  396. mov $01, %eax
  397. cmove %eax, %esi
  398. mov %esi, %eax
  399. movq (JB_PC*8)(%rdi), %rdx
  400. movq (JB_RSP*8)(%rdi), %rsp
  401. /* Jump to saved PC */
  402. jmpq *%rdx
  403. .size _st_md_cxt_restore, .-_st_md_cxt_restore
  404. /****************************************************************/
  405. #elif defined(__aarch64__)
  406. /****************************************************************/
  407. /* https://github.com/ossrs/srs/issues/1282#issuecomment-445539513 */
  408. #define JB_X19 0
  409. #define JB_X20 1
  410. #define JB_X21 2
  411. #define JB_X22 3
  412. #define JB_X23 4
  413. #define JB_X24 5
  414. #define JB_X25 6
  415. #define JB_X26 7
  416. #define JB_X27 8
  417. #define JB_X28 9
  418. #define JB_X29 10
  419. #define JB_LR 11
  420. #define JB_SP 13
  421. #define JB_D8 14
  422. #define JB_D9 15
  423. #define JB_D10 16
  424. #define JB_D11 17
  425. #define JB_D12 18
  426. #define JB_D13 19
  427. #define JB_D14 20
  428. #define JB_D15 21
  429. .file "md.S"
  430. .text
  431. /* _st_md_cxt_save(__jmp_buf env) */
  432. .globl _st_md_cxt_save
  433. .type _st_md_cxt_save, %function
  434. .align 4
  435. _st_md_cxt_save:
  436. stp x19, x20, [x0, #JB_X19<<3]
  437. stp x21, x22, [x0, #JB_X21<<3]
  438. stp x23, x24, [x0, #JB_X23<<3]
  439. stp x25, x26, [x0, #JB_X25<<3]
  440. stp x27, x28, [x0, #JB_X27<<3]
  441. stp x29, x30, [x0, #JB_X29<<3]
  442. stp d8, d9, [x0, #JB_D8<<3]
  443. stp d10, d11, [x0, #JB_D10<<3]
  444. stp d12, d13, [x0, #JB_D12<<3]
  445. stp d14, d15, [x0, #JB_D14<<3]
  446. mov x2, sp
  447. str x2, [x0, #JB_SP<<3]
  448. mov x0, #0
  449. ret
  450. .size _st_md_cxt_save, .-_st_md_cxt_save
  451. /****************************************************************/
  452. /* _st_md_cxt_restore(__jmp_buf env, int val) */
  453. .globl _st_md_cxt_restore
  454. .type _st_md_cxt_restore, %function
  455. .align 4
  456. _st_md_cxt_restore:
  457. ldp x19, x20, [x0, #JB_X19<<3]
  458. ldp x21, x22, [x0, #JB_X21<<3]
  459. ldp x23, x24, [x0, #JB_X23<<3]
  460. ldp x25, x26, [x0, #JB_X25<<3]
  461. ldp x27, x28, [x0, #JB_X27<<3]
  462. ldp x29, x30, [x0, #JB_X29<<3]
  463. ldp d8, d9, [x0, #JB_D8<<3]
  464. ldp d10, d11, [x0, #JB_D10<<3]
  465. ldp d12, d13, [x0, #JB_D12<<3]
  466. ldp d14, d15, [x0, #JB_D14<<3]
  467. ldr x5, [x0, #JB_SP<<3]
  468. mov sp, x5
  469. cmp x1, #0
  470. mov x0, #1
  471. csel x0, x1, x0, ne
  472. /* Use br instead of ret because ret is guaranteed to mispredict */
  473. br x30
  474. .size _st_md_cxt_restore, .-_st_md_cxt_restore
  475. /****************************************************************/
  476. #elif defined(__arm__)
  477. /****************************************************************/
  478. /* https://github.com/ossrs/srs/issues/1282#issuecomment-445539513 */
  479. /* Register list for a ldm/stm instruction to load/store
  480. the general registers from a __jmp_buf. */
  481. # define JMP_BUF_REGLIST {v1-v6, sl, fp, sp, lr}
  482. .file "md.S"
  483. .text
  484. /* _st_md_cxt_save(__jmp_buf env) */
  485. .globl _st_md_cxt_save
  486. .type _st_md_cxt_save, %function
  487. .align 2
  488. _st_md_cxt_save:
  489. mov ip, r0
  490. /* Save registers */
  491. stmia ip!, JMP_BUF_REGLIST
  492. #ifdef __VFP_FP__
  493. /* Store the VFP registers. */
  494. /* Following instruction is vstmia ip!, {d8-d15}. */
  495. stc p11, cr8, [ip], #64
  496. #endif
  497. #ifdef __IWMMXT__
  498. /* Save the call-preserved iWMMXt registers. */
  499. /* Following instructions are wstrd wr10, [ip], #8 (etc.) */
  500. stcl p1, cr10, [r12], #8
  501. stcl p1, cr11, [r12], #8
  502. stcl p1, cr12, [r12], #8
  503. stcl p1, cr13, [r12], #8
  504. stcl p1, cr14, [r12], #8
  505. stcl p1, cr15, [r12], #8
  506. #endif
  507. mov r0, #0
  508. bx lr
  509. .size _st_md_cxt_save, .-_st_md_cxt_save
  510. /****************************************************************/
  511. /* _st_md_cxt_restore(__jmp_buf env, int val) */
  512. .globl _st_md_cxt_restore
  513. .type _st_md_cxt_restore, %function
  514. .align 2
  515. _st_md_cxt_restore:
  516. mov ip, r0
  517. /* Restore registers */
  518. ldmia ip!, JMP_BUF_REGLIST
  519. #ifdef __VFP_FP__
  520. /* Restore the VFP registers. */
  521. /* Following instruction is vldmia ip!, {d8-d15}. */
  522. ldc p11, cr8, [r12], #64
  523. #endif
  524. #ifdef __IWMMXT__
  525. /* Restore the call-preserved iWMMXt registers. */
  526. /* Following instructions are wldrd wr10, [ip], #8 (etc.) */
  527. ldcl p1, cr10, [r12], #8
  528. ldcl p1, cr11, [r12], #8
  529. ldcl p1, cr12, [r12], #8
  530. ldcl p1, cr13, [r12], #8
  531. ldcl p1, cr14, [r12], #8
  532. ldcl p1, cr15, [r12], #8
  533. #endif
  534. movs r0, r1 /* get the return value in place */
  535. moveq r0, #1 /* can't let setjmp() return zero! */
  536. bx lr
  537. .size _st_md_cxt_restore, .-_st_md_cxt_restore
  538. /****************************************************************/
  539. #endif
  540. #endif