123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227 |
- #! /usr/bin/env perl
- # Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved.
- #
- # Licensed under the OpenSSL license (the "License"). You may not use
- # this file except in compliance with the License. You can obtain a copy
- # in the file LICENSE in the source distribution or at
- # https://www.openssl.org/source/license.html
- #
- # ====================================================================
- # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
- # project. The module is, however, dual licensed under OpenSSL and
- # CRYPTOGAMS licenses depending on where you obtain it. For further
- # details see http://www.openssl.org/~appro/cryptogams/.
- # ====================================================================
- #
- # This module implements Poly1305 hash for s390x.
- #
- # June 2015
- #
- # ~6.6/2.3 cpb on z10/z196+, >2x improvement over compiler-generated
- # code. For older compiler improvement coefficient is >3x, because
- # then base 2^64 and base 2^32 implementations are compared.
- #
- # On side note, z13 enables vector base 2^26 implementation...
- $flavour = shift;
- if ($flavour =~ /3[12]/) {
- $SIZE_T=4;
- $g="";
- } else {
- $SIZE_T=8;
- $g="g";
- }
- while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
- open STDOUT,">$output";
- $sp="%r15";
- my ($ctx,$inp,$len,$padbit) = map("%r$_",(2..5));
- $code.=<<___;
- .text
- .globl poly1305_init
- .type poly1305_init,\@function
- .align 16
- poly1305_init:
- lghi %r0,0
- lghi %r1,-1
- stg %r0,0($ctx) # zero hash value
- stg %r0,8($ctx)
- stg %r0,16($ctx)
- cl${g}r $inp,%r0
- je .Lno_key
- lrvg %r4,0($inp) # load little-endian key
- lrvg %r5,8($inp)
- nihl %r1,0xffc0 # 0xffffffc0ffffffff
- srlg %r0,%r1,4 # 0x0ffffffc0fffffff
- srlg %r1,%r1,4
- nill %r1,0xfffc # 0x0ffffffc0ffffffc
- ngr %r4,%r0
- ngr %r5,%r1
- stg %r4,32($ctx)
- stg %r5,40($ctx)
- .Lno_key:
- lghi %r2,0
- br %r14
- .size poly1305_init,.-poly1305_init
- ___
- {
- my ($d0hi,$d0lo,$d1hi,$d1lo,$t0,$h0,$t1,$h1,$h2) = map("%r$_",(6..14));
- my ($r0,$r1,$s1) = map("%r$_",(0..2));
- $code.=<<___;
- .globl poly1305_blocks
- .type poly1305_blocks,\@function
- .align 16
- poly1305_blocks:
- srl${g} $len,4 # fixed-up in 64-bit build
- lghi %r0,0
- cl${g}r $len,%r0
- je .Lno_data
- stm${g} %r6,%r14,`6*$SIZE_T`($sp)
- llgfr $padbit,$padbit # clear upper half, much needed with
- # non-64-bit ABI
- lg $r0,32($ctx) # load key
- lg $r1,40($ctx)
- lg $h0,0($ctx) # load hash value
- lg $h1,8($ctx)
- lg $h2,16($ctx)
- st$g $ctx,`2*$SIZE_T`($sp) # off-load $ctx
- srlg $s1,$r1,2
- algr $s1,$r1 # s1 = r1 + r1>>2
- j .Loop
- .align 16
- .Loop:
- lrvg $d0lo,0($inp) # load little-endian input
- lrvg $d1lo,8($inp)
- la $inp,16($inp)
- algr $d0lo,$h0 # accumulate input
- alcgr $d1lo,$h1
- lgr $h0,$d0lo
- mlgr $d0hi,$r0 # h0*r0 -> $d0hi:$d0lo
- lgr $h1,$d1lo
- mlgr $d1hi,$s1 # h1*5*r1 -> $d1hi:$d1lo
- mlgr $t0,$r1 # h0*r1 -> $t0:$h0
- mlgr $t1,$r0 # h1*r0 -> $t1:$h1
- alcgr $h2,$padbit
- algr $d0lo,$d1lo
- lgr $d1lo,$h2
- alcgr $d0hi,$d1hi
- lghi $d1hi,0
- algr $h1,$h0
- alcgr $t1,$t0
- msgr $d1lo,$s1 # h2*s1
- msgr $h2,$r0 # h2*r0
- algr $h1,$d1lo
- alcgr $t1,$d1hi # $d1hi is zero
- algr $h1,$d0hi
- alcgr $h2,$t1
- lghi $h0,-4 # final reduction step
- ngr $h0,$h2
- srlg $t0,$h2,2
- algr $h0,$t0
- lghi $t1,3
- ngr $h2,$t1
- algr $h0,$d0lo
- alcgr $h1,$d1hi # $d1hi is still zero
- alcgr $h2,$d1hi # $d1hi is still zero
- brct$g $len,.Loop
- l$g $ctx,`2*$SIZE_T`($sp) # restore $ctx
- stg $h0,0($ctx) # store hash value
- stg $h1,8($ctx)
- stg $h2,16($ctx)
- lm${g} %r6,%r14,`6*$SIZE_T`($sp)
- .Lno_data:
- br %r14
- .size poly1305_blocks,.-poly1305_blocks
- ___
- }
- {
- my ($mac,$nonce)=($inp,$len);
- my ($h0,$h1,$h2,$d0,$d1)=map("%r$_",(5..9));
- $code.=<<___;
- .globl poly1305_emit
- .type poly1305_emit,\@function
- .align 16
- poly1305_emit:
- stm${g} %r6,%r9,`6*$SIZE_T`($sp)
- lg $h0,0($ctx)
- lg $h1,8($ctx)
- lg $h2,16($ctx)
- lghi %r0,5
- lghi %r1,0
- lgr $d0,$h0
- lgr $d1,$h1
- algr $h0,%r0 # compare to modulus
- alcgr $h1,%r1
- alcgr $h2,%r1
- srlg $h2,$h2,2 # did it borrow/carry?
- slgr %r1,$h2 # 0-$h2>>2
- lg $h2,0($nonce) # load nonce
- lghi %r0,-1
- lg $ctx,8($nonce)
- xgr %r0,%r1 # ~%r1
- ngr $h0,%r1
- ngr $d0,%r0
- ngr $h1,%r1
- ngr $d1,%r0
- ogr $h0,$d0
- rllg $d0,$h2,32 # flip nonce words
- ogr $h1,$d1
- rllg $d1,$ctx,32
- algr $h0,$d0 # accumulate nonce
- alcgr $h1,$d1
- strvg $h0,0($mac) # write little-endian result
- strvg $h1,8($mac)
- lm${g} %r6,%r9,`6*$SIZE_T`($sp)
- br %r14
- .size poly1305_emit,.-poly1305_emit
- .string "Poly1305 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
- ___
- }
- $code =~ s/\`([^\`]*)\`/eval $1/gem;
- $code =~ s/\b(srlg\s+)(%r[0-9]+\s*,)\s*([0-9]+)/$1$2$2$3/gm;
- print $code;
- close STDOUT or die "error closing STDOUT: $!";
|