poly1305-s390x.pl 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. #! /usr/bin/env perl
  2. # Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved.
  3. #
  4. # Licensed under the OpenSSL license (the "License"). You may not use
  5. # this file except in compliance with the License. You can obtain a copy
  6. # in the file LICENSE in the source distribution or at
  7. # https://www.openssl.org/source/license.html
  8. #
  9. # ====================================================================
  10. # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
  11. # project. The module is, however, dual licensed under OpenSSL and
  12. # CRYPTOGAMS licenses depending on where you obtain it. For further
  13. # details see http://www.openssl.org/~appro/cryptogams/.
  14. # ====================================================================
  15. #
  16. # This module implements Poly1305 hash for s390x.
  17. #
  18. # June 2015
  19. #
  20. # ~6.6/2.3 cpb on z10/z196+, >2x improvement over compiler-generated
  21. # code. For older compiler improvement coefficient is >3x, because
  22. # then base 2^64 and base 2^32 implementations are compared.
  23. #
  24. # On side note, z13 enables vector base 2^26 implementation...
  25. $flavour = shift;
  26. if ($flavour =~ /3[12]/) {
  27. $SIZE_T=4;
  28. $g="";
  29. } else {
  30. $SIZE_T=8;
  31. $g="g";
  32. }
  33. while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
  34. open STDOUT,">$output";
  35. $sp="%r15";
  36. my ($ctx,$inp,$len,$padbit) = map("%r$_",(2..5));
  37. $code.=<<___;
  38. .text
  39. .globl poly1305_init
  40. .type poly1305_init,\@function
  41. .align 16
  42. poly1305_init:
  43. lghi %r0,0
  44. lghi %r1,-1
  45. stg %r0,0($ctx) # zero hash value
  46. stg %r0,8($ctx)
  47. stg %r0,16($ctx)
  48. cl${g}r $inp,%r0
  49. je .Lno_key
  50. lrvg %r4,0($inp) # load little-endian key
  51. lrvg %r5,8($inp)
  52. nihl %r1,0xffc0 # 0xffffffc0ffffffff
  53. srlg %r0,%r1,4 # 0x0ffffffc0fffffff
  54. srlg %r1,%r1,4
  55. nill %r1,0xfffc # 0x0ffffffc0ffffffc
  56. ngr %r4,%r0
  57. ngr %r5,%r1
  58. stg %r4,32($ctx)
  59. stg %r5,40($ctx)
  60. .Lno_key:
  61. lghi %r2,0
  62. br %r14
  63. .size poly1305_init,.-poly1305_init
  64. ___
  65. {
  66. my ($d0hi,$d0lo,$d1hi,$d1lo,$t0,$h0,$t1,$h1,$h2) = map("%r$_",(6..14));
  67. my ($r0,$r1,$s1) = map("%r$_",(0..2));
  68. $code.=<<___;
  69. .globl poly1305_blocks
  70. .type poly1305_blocks,\@function
  71. .align 16
  72. poly1305_blocks:
  73. srl${g} $len,4 # fixed-up in 64-bit build
  74. lghi %r0,0
  75. cl${g}r $len,%r0
  76. je .Lno_data
  77. stm${g} %r6,%r14,`6*$SIZE_T`($sp)
  78. llgfr $padbit,$padbit # clear upper half, much needed with
  79. # non-64-bit ABI
  80. lg $r0,32($ctx) # load key
  81. lg $r1,40($ctx)
  82. lg $h0,0($ctx) # load hash value
  83. lg $h1,8($ctx)
  84. lg $h2,16($ctx)
  85. st$g $ctx,`2*$SIZE_T`($sp) # off-load $ctx
  86. srlg $s1,$r1,2
  87. algr $s1,$r1 # s1 = r1 + r1>>2
  88. j .Loop
  89. .align 16
  90. .Loop:
  91. lrvg $d0lo,0($inp) # load little-endian input
  92. lrvg $d1lo,8($inp)
  93. la $inp,16($inp)
  94. algr $d0lo,$h0 # accumulate input
  95. alcgr $d1lo,$h1
  96. lgr $h0,$d0lo
  97. mlgr $d0hi,$r0 # h0*r0 -> $d0hi:$d0lo
  98. lgr $h1,$d1lo
  99. mlgr $d1hi,$s1 # h1*5*r1 -> $d1hi:$d1lo
  100. mlgr $t0,$r1 # h0*r1 -> $t0:$h0
  101. mlgr $t1,$r0 # h1*r0 -> $t1:$h1
  102. alcgr $h2,$padbit
  103. algr $d0lo,$d1lo
  104. lgr $d1lo,$h2
  105. alcgr $d0hi,$d1hi
  106. lghi $d1hi,0
  107. algr $h1,$h0
  108. alcgr $t1,$t0
  109. msgr $d1lo,$s1 # h2*s1
  110. msgr $h2,$r0 # h2*r0
  111. algr $h1,$d1lo
  112. alcgr $t1,$d1hi # $d1hi is zero
  113. algr $h1,$d0hi
  114. alcgr $h2,$t1
  115. lghi $h0,-4 # final reduction step
  116. ngr $h0,$h2
  117. srlg $t0,$h2,2
  118. algr $h0,$t0
  119. lghi $t1,3
  120. ngr $h2,$t1
  121. algr $h0,$d0lo
  122. alcgr $h1,$d1hi # $d1hi is still zero
  123. alcgr $h2,$d1hi # $d1hi is still zero
  124. brct$g $len,.Loop
  125. l$g $ctx,`2*$SIZE_T`($sp) # restore $ctx
  126. stg $h0,0($ctx) # store hash value
  127. stg $h1,8($ctx)
  128. stg $h2,16($ctx)
  129. lm${g} %r6,%r14,`6*$SIZE_T`($sp)
  130. .Lno_data:
  131. br %r14
  132. .size poly1305_blocks,.-poly1305_blocks
  133. ___
  134. }
  135. {
  136. my ($mac,$nonce)=($inp,$len);
  137. my ($h0,$h1,$h2,$d0,$d1)=map("%r$_",(5..9));
  138. $code.=<<___;
  139. .globl poly1305_emit
  140. .type poly1305_emit,\@function
  141. .align 16
  142. poly1305_emit:
  143. stm${g} %r6,%r9,`6*$SIZE_T`($sp)
  144. lg $h0,0($ctx)
  145. lg $h1,8($ctx)
  146. lg $h2,16($ctx)
  147. lghi %r0,5
  148. lghi %r1,0
  149. lgr $d0,$h0
  150. lgr $d1,$h1
  151. algr $h0,%r0 # compare to modulus
  152. alcgr $h1,%r1
  153. alcgr $h2,%r1
  154. srlg $h2,$h2,2 # did it borrow/carry?
  155. slgr %r1,$h2 # 0-$h2>>2
  156. lg $h2,0($nonce) # load nonce
  157. lghi %r0,-1
  158. lg $ctx,8($nonce)
  159. xgr %r0,%r1 # ~%r1
  160. ngr $h0,%r1
  161. ngr $d0,%r0
  162. ngr $h1,%r1
  163. ngr $d1,%r0
  164. ogr $h0,$d0
  165. rllg $d0,$h2,32 # flip nonce words
  166. ogr $h1,$d1
  167. rllg $d1,$ctx,32
  168. algr $h0,$d0 # accumulate nonce
  169. alcgr $h1,$d1
  170. strvg $h0,0($mac) # write little-endian result
  171. strvg $h1,8($mac)
  172. lm${g} %r6,%r9,`6*$SIZE_T`($sp)
  173. br %r14
  174. .size poly1305_emit,.-poly1305_emit
  175. .string "Poly1305 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
  176. ___
  177. }
  178. $code =~ s/\`([^\`]*)\`/eval $1/gem;
  179. $code =~ s/\b(srlg\s+)(%r[0-9]+\s*,)\s*([0-9]+)/$1$2$2$3/gm;
  180. print $code;
  181. close STDOUT or die "error closing STDOUT: $!";