lbnalpha.s 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. /*
  2. * Copyright (c) 1995 Colin Plumb. All rights reserved.
  3. * For licensing and other legal details, see the file legal.c.
  4. *
  5. * DEC Alpha 64-bit math primitives. These use 64-bit words
  6. * unless otherwise noted.
  7. *
  8. * The DEC assembler apparently does some instruction scheduling,
  9. * but I tried to do some of my own, and tries to spread things
  10. * out over the register file to give the assembler more room
  11. * to schedule things.
  12. *
  13. * Alpha OSF/1 register usage conventions:
  14. * r0 - v0 - Temp, holds integer return value
  15. * r1..r8 - t0..t7 - Temp, trashed by procedure call
  16. * r9..r14 - s0..s5 - Saved across procedure calls
  17. * r15 - s6/FP - Frame pointer, saved across procedure calls
  18. * r16..r21 - a0..a5 - Argument registers, all trashed by procedure call
  19. * r22..r25 - t8..t11 - Temp, trashed by procedure call
  20. * r26 - ra - Return address
  21. * r27 - t12/pv - Procedure value, trashed by procedure call
  22. * r28 - at - Assembler temp, trashed by procedure call
  23. * r29 - gp - Global pointer
  24. * r30 - sp - Stack pointer
  25. * r31 - zero - hardwired to zero
  26. */
  27. .text
  28. .align 4
  29. .globl lbnMulN1_64
  30. /* I have no idea what the '2' at the end of the .ent line means. */
  31. .ent lbnMulN1_64 2
  32. /*
  33. * Arguments: $16 = out, $17 = in, $18 = len<32>, $19 = k
  34. * Other registers: $0 = carry word, $1 = product low,
  35. * $2 = product high, $3 = input word
  36. */
  37. lbnMulN1_64:
  38. ldq $3,0($17) /* Load first word of input */
  39. subl $18,1,$18
  40. mulq $3,$19,$1 /* Do low half of first multiply */
  41. umulh $3,$19,$0 /* Do second half of first multiply */
  42. stq $1,0($16)
  43. beq $18,m64_done
  44. m64_loop:
  45. ldq $3,8($17)
  46. addq $17,8,$17
  47. mulq $3,$19,$1 /* Do bottom half of multiply */
  48. subl $18,1,$18
  49. umulh $3,$19,$2 /* Do top half of multiply */
  50. addq $0,$1,$1 /* Add carry word from previous multiply */
  51. stq $1,8($16)
  52. cmpult $1,$0,$0 /* Compute carry bit from add */
  53. addq $16,8,$16
  54. addq $2,$0,$0 /* Add carry bit to carry word */
  55. beq $18,m64_done
  56. ldq $3,8($17)
  57. addq $17,8,$17
  58. mulq $3,$19,$1 /* Do bottom half of multiply */
  59. subl $18,1,$18
  60. umulh $3,$19,$2 /* Do top half of multiply */
  61. addq $0,$1,$1 /* Add carry word from previous multiply */
  62. stq $1,8($16)
  63. cmpult $1,$0,$0 /* Compute carry bit from add */
  64. addq $16,8,$16
  65. addq $2,$0,$0 /* Add carry bit to carry word */
  66. bne $18,m64_loop
  67. m64_done:
  68. stq $0,8($16) /* Store last word of result */
  69. ret $31,($26),1
  70. /* The '1' in the hint field means procedure return - software convention */
  71. .end lbnMulN1_64
  72. .text
  73. .align 4
  74. .globl lbnMulAdd1_64
  75. .ent lbnMulAdd1_64 2
  76. /*
  77. * Arguments: $16 = out, $17 = in, $18 = len<32>, $19 = k
  78. * Other registers: $0 = product high, $1 = product low,
  79. * $2 = product high temp, $3 = input word, $4 = output word
  80. * $5 = carry bit from add to out
  81. */
  82. lbnMulAdd1_64:
  83. ldq $3,0($17) /* Load first word of input */
  84. subl $18,1,$18
  85. mulq $3,$19,$1 /* Do low half of first multiply */
  86. ldq $4,0($16) /* Load first word of output */
  87. umulh $3,$19,$2 /* Do second half of first multiply */
  88. addq $4,$1,$4
  89. cmpult $4,$1,$5 /* Compute borrow bit from subtract */
  90. stq $4,0($16)
  91. addq $5,$2,$0 /* Add carry bit to high word */
  92. beq $18,ma64_done
  93. ma64_loop:
  94. ldq $3,8($17) /* Load next word of input */
  95. addq $17,8,$17
  96. ldq $4,8($16) /* Load next word of output */
  97. mulq $3,$19,$1 /* Do bottom half of multiply */
  98. subl $18,1,$18
  99. addq $0,$1,$1 /* Add carry word from previous multiply */
  100. umulh $3,$19,$2 /* Do top half of multiply */
  101. cmpult $1,$0,$0 /* Compute carry bit from add */
  102. addq $4,$1,$4 /* Add product to loaded word */
  103. cmpult $4,$1,$5 /* Compute carry bit from add */
  104. stq $4,8($16)
  105. addq $5,$0,$5 /* Add carry bits together */
  106. addq $16,8,$16
  107. addq $5,$2,$0 /* Add carry bits to carry word */
  108. beq $18,ma64_done
  109. ldq $3,8($17) /* Load next word of input */
  110. addq $17,8,$17
  111. ldq $4,8($16) /* Load next word of output */
  112. mulq $3,$19,$1 /* Do bottom half of multiply */
  113. subl $18,1,$18
  114. addq $0,$1,$1 /* Add carry word from previous multiply */
  115. umulh $3,$19,$2 /* Do top half of multiply */
  116. cmpult $1,$0,$0 /* Compute carry bit from add */
  117. addq $4,$1,$4 /* Add product to loaded word */
  118. cmpult $4,$1,$5 /* Compute carry bit from add */
  119. stq $4,8($16)
  120. addq $5,$0,$5 /* Add carry bits together */
  121. addq $16,8,$16
  122. addq $5,$2,$0 /* Add carry bits to carry word */
  123. bne $18,ma64_loop
  124. ma64_done:
  125. ret $31,($26),1
  126. .end lbnMulAdd1_64
  127. .text
  128. .align 4
  129. .globl lbnMulSub1_64
  130. .ent lbnMulSub1_64 2
  131. /*
  132. * Arguments: $16 = out, $17 = in, $18 = len<32>, $19 = k
  133. * Other registers: $0 = carry word, $1 = product low,
  134. * $2 = product high temp, $3 = input word, $4 = output word
  135. * $5 = borrow bit from subtract
  136. */
  137. lbnMulSub1_64:
  138. ldq $3,0($17) /* Load first word of input */
  139. subl $18,1,$18
  140. mulq $3,$19,$1 /* Do low half of first multiply */
  141. ldq $4,0($16) /* Load first word of output */
  142. umulh $3,$19,$2 /* Do second half of first multiply */
  143. cmpult $4,$1,$5 /* Compute borrow bit from subtract */
  144. subq $4,$1,$4
  145. addq $5,$2,$0 /* Add carry bit to high word */
  146. stq $4,0($16)
  147. beq $18,ms64_done
  148. ms64_loop:
  149. ldq $3,8($17) /* Load next word of input */
  150. addq $17,8,$17
  151. ldq $4,8($16) /* Load next word of output */
  152. mulq $3,$19,$1 /* Do bottom half of multiply */
  153. subl $18,1,$18
  154. addq $0,$1,$1 /* Add carry word from previous multiply */
  155. umulh $3,$19,$2 /* Do top half of multiply */
  156. cmpult $1,$0,$0 /* Compute carry bit from add */
  157. cmpult $4,$1,$5 /* Compute borrow bit from subtract */
  158. subq $4,$1,$4
  159. addq $5,$0,$5 /* Add carry bits together */
  160. stq $4,8($16)
  161. addq $5,$2,$0 /* Add carry bits to carry word */
  162. addq $16,8,$16
  163. beq $18,ms64_done
  164. ldq $3,8($17) /* Load next word of input */
  165. addq $17,8,$17
  166. ldq $4,8($16) /* Load next word of output */
  167. mulq $3,$19,$1 /* Do bottom half of multiply */
  168. subl $18,1,$18
  169. addq $0,$1,$1 /* Add carry word from previous multiply */
  170. umulh $3,$19,$2 /* Do top half of multiply */
  171. cmpult $1,$0,$0 /* Compute carry bit from add */
  172. cmpult $4,$1,$5 /* Compute borrow bit from subtract */
  173. subq $4,$1,$4
  174. addq $5,$0,$5 /* Add carry bits together */
  175. stq $4,8($16)
  176. addq $5,$2,$0 /* Add carry bits to carry word */
  177. addq $16,8,$16
  178. bne $18,ms64_loop
  179. ms64_done:
  180. ret $31,($26),1
  181. .end lbnMulSub1_64