rc4-s390x.pl 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. #! /usr/bin/env perl
  2. # Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the OpenSSL license (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  11. # project. The module is, however, dual licensed under OpenSSL and
  12. # CRYPTOGAMS licenses depending on where you obtain it. For further
  13. # details see http://www.openssl.org/~appro/cryptogams/.
  14. # ====================================================================
  15. #
  16. # February 2009
  17. #
  18. # Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to
  19. # "cluster" Address Generation Interlocks, so that one pipeline stall
  20. # resolves several dependencies.
  21. # November 2010.
  22. #
  23. # Adapt for -m31 build. If kernel supports what's called "highgprs"
  24. # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
  25. # instructions and achieve "64-bit" performance even in 31-bit legacy
  26. # application context. The feature is not specific to any particular
  27. # processor, as long as it's "z-CPU". Latter implies that the code
  28. # remains z/Architecture specific. On z990 it was measured to perform
  29. # 50% better than code generated by gcc 4.3.
  30. $flavour = shift;
  31. if ($flavour =~ /3[12]/) {
  32. $SIZE_T=4;
  33. $g="";
  34. } else {
  35. $SIZE_T=8;
  36. $g="g";
  37. }
  38. while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
  39. open STDOUT,">$output";
  40. $rp="%r14";
  41. $sp="%r15";
  42. $code=<<___;
  43. .text
  44. ___
  45. # void RC4(RC4_KEY *key,size_t len,const void *inp,void *out)
  46. {
  47. $acc="%r0";
  48. $cnt="%r1";
  49. $key="%r2";
  50. $len="%r3";
  51. $inp="%r4";
  52. $out="%r5";
  53. @XX=("%r6","%r7");
  54. @TX=("%r8","%r9");
  55. $YY="%r10";
  56. $TY="%r11";
  57. $code.=<<___;
  58. .globl RC4
  59. .type RC4,\@function
  60. .align 64
  61. RC4:
  62. stm${g} %r6,%r11,6*$SIZE_T($sp)
  63. ___
  64. $code.=<<___ if ($flavour =~ /3[12]/);
  65. llgfr $len,$len
  66. ___
  67. $code.=<<___;
  68. llgc $XX[0],0($key)
  69. llgc $YY,1($key)
  70. la $XX[0],1($XX[0])
  71. nill $XX[0],0xff
  72. srlg $cnt,$len,3
  73. ltgr $cnt,$cnt
  74. llgc $TX[0],2($XX[0],$key)
  75. jz .Lshort
  76. j .Loop8
  77. .align 64
  78. .Loop8:
  79. ___
  80. for ($i=0;$i<8;$i++) {
  81. $code.=<<___;
  82. la $YY,0($YY,$TX[0]) # $i
  83. nill $YY,255
  84. la $XX[1],1($XX[0])
  85. nill $XX[1],255
  86. ___
  87. $code.=<<___ if ($i==1);
  88. llgc $acc,2($TY,$key)
  89. ___
  90. $code.=<<___ if ($i>1);
  91. sllg $acc,$acc,8
  92. ic $acc,2($TY,$key)
  93. ___
  94. $code.=<<___;
  95. llgc $TY,2($YY,$key)
  96. stc $TX[0],2($YY,$key)
  97. llgc $TX[1],2($XX[1],$key)
  98. stc $TY,2($XX[0],$key)
  99. cr $XX[1],$YY
  100. jne .Lcmov$i
  101. la $TX[1],0($TX[0])
  102. .Lcmov$i:
  103. la $TY,0($TY,$TX[0])
  104. nill $TY,255
  105. ___
  106. push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
  107. }
  108. $code.=<<___;
  109. lg $TX[1],0($inp)
  110. sllg $acc,$acc,8
  111. la $inp,8($inp)
  112. ic $acc,2($TY,$key)
  113. xgr $acc,$TX[1]
  114. stg $acc,0($out)
  115. la $out,8($out)
  116. brctg $cnt,.Loop8
  117. .Lshort:
  118. lghi $acc,7
  119. ngr $len,$acc
  120. jz .Lexit
  121. j .Loop1
  122. .align 16
  123. .Loop1:
  124. la $YY,0($YY,$TX[0])
  125. nill $YY,255
  126. llgc $TY,2($YY,$key)
  127. stc $TX[0],2($YY,$key)
  128. stc $TY,2($XX[0],$key)
  129. ar $TY,$TX[0]
  130. ahi $XX[0],1
  131. nill $TY,255
  132. nill $XX[0],255
  133. llgc $acc,0($inp)
  134. la $inp,1($inp)
  135. llgc $TY,2($TY,$key)
  136. llgc $TX[0],2($XX[0],$key)
  137. xr $acc,$TY
  138. stc $acc,0($out)
  139. la $out,1($out)
  140. brct $len,.Loop1
  141. .Lexit:
  142. ahi $XX[0],-1
  143. stc $XX[0],0($key)
  144. stc $YY,1($key)
  145. lm${g} %r6,%r11,6*$SIZE_T($sp)
  146. br $rp
  147. .size RC4,.-RC4
  148. .string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
  149. ___
  150. }
  151. # void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp)
  152. {
  153. $cnt="%r0";
  154. $idx="%r1";
  155. $key="%r2";
  156. $len="%r3";
  157. $inp="%r4";
  158. $acc="%r5";
  159. $dat="%r6";
  160. $ikey="%r7";
  161. $iinp="%r8";
  162. $code.=<<___;
  163. .globl RC4_set_key
  164. .type RC4_set_key,\@function
  165. .align 64
  166. RC4_set_key:
  167. stm${g} %r6,%r8,6*$SIZE_T($sp)
  168. lhi $cnt,256
  169. la $idx,0
  170. sth $idx,0($key)
  171. .align 4
  172. .L1stloop:
  173. stc $idx,2($idx,$key)
  174. la $idx,1($idx)
  175. brct $cnt,.L1stloop
  176. lghi $ikey,-256
  177. lr $cnt,$len
  178. la $iinp,0
  179. la $idx,0
  180. .align 16
  181. .L2ndloop:
  182. llgc $acc,2+256($ikey,$key)
  183. llgc $dat,0($iinp,$inp)
  184. la $idx,0($idx,$acc)
  185. la $ikey,1($ikey)
  186. la $idx,0($idx,$dat)
  187. nill $idx,255
  188. la $iinp,1($iinp)
  189. tml $ikey,255
  190. llgc $dat,2($idx,$key)
  191. stc $dat,2+256-1($ikey,$key)
  192. stc $acc,2($idx,$key)
  193. jz .Ldone
  194. brct $cnt,.L2ndloop
  195. lr $cnt,$len
  196. la $iinp,0
  197. j .L2ndloop
  198. .Ldone:
  199. lm${g} %r6,%r8,6*$SIZE_T($sp)
  200. br $rp
  201. .size RC4_set_key,.-RC4_set_key
  202. ___
  203. }
  204. # const char *RC4_options()
  205. $code.=<<___;
  206. .globl RC4_options
  207. .type RC4_options,\@function
  208. .align 16
  209. RC4_options:
  210. larl %r2,.Loptions
  211. br %r14
  212. .size RC4_options,.-RC4_options
  213. .section .rodata
  214. .Loptions:
  215. .align 8
  216. .string "rc4(8x,char)"
  217. ___
  218. print $code;
  219. close STDOUT or die "error closing STDOUT: $!"; # force flush