123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748 |
- #! /usr/bin/env perl
- # Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
- #
- # Licensed under the OpenSSL license (the "License"). You may not use
- # this file except in compliance with the License. You can obtain a copy
- # in the file LICENSE in the source distribution or at
- # https://www.openssl.org/source/license.html
- #
- # ====================================================================
- # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
- # project. The module is, however, dual licensed under OpenSSL and
- # CRYPTOGAMS licenses depending on where you obtain it. For further
- # details see http://www.openssl.org/~appro/cryptogams/.
- # ====================================================================
- #
- # April 2010
- #
- # The module implements "4-bit" GCM GHASH function and underlying
- # single multiplication operation in GF(2^128). "4-bit" means that it
- # uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
- # it processes one byte in 19.6 cycles, which is more than twice as
- # fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
- # 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
- # processed byte. This is ~2.2x faster than 64-bit code generated by
- # vendor compiler (which used to be very hard to beat:-).
- #
- # Special thanks to polarhome.com for providing HP-UX account.
- $flavour = shift;
- $output = shift;
- open STDOUT,">$output";
- if ($flavour =~ /64/) {
- $LEVEL ="2.0W";
- $SIZE_T =8;
- $FRAME_MARKER =80;
- $SAVED_RP =16;
- $PUSH ="std";
- $PUSHMA ="std,ma";
- $POP ="ldd";
- $POPMB ="ldd,mb";
- $NREGS =6;
- } else {
- $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
- $SIZE_T =4;
- $FRAME_MARKER =48;
- $SAVED_RP =20;
- $PUSH ="stw";
- $PUSHMA ="stwm";
- $POP ="ldw";
- $POPMB ="ldwm";
- $NREGS =11;
- }
- $FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
- # [+ argument transfer]
- ################# volatile registers
- $Xi="%r26"; # argument block
- $Htbl="%r25";
- $inp="%r24";
- $len="%r23";
- $Hhh=$Htbl; # variables
- $Hll="%r22";
- $Zhh="%r21";
- $Zll="%r20";
- $cnt="%r19";
- $rem_4bit="%r28";
- $rem="%r29";
- $mask0xf0="%r31";
- ################# preserved registers
- $Thh="%r1";
- $Tll="%r2";
- $nlo="%r3";
- $nhi="%r4";
- $byte="%r5";
- if ($SIZE_T==4) {
- $Zhl="%r6";
- $Zlh="%r7";
- $Hhl="%r8";
- $Hlh="%r9";
- $Thl="%r10";
- $Tlh="%r11";
- }
- $rem2="%r6"; # used in PA-RISC 2.0 code
- $code.=<<___;
- .LEVEL $LEVEL
- .SPACE \$TEXT\$
- .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
- .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
- .ALIGN 64
- gcm_gmult_4bit
- .PROC
- .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
- .ENTRY
- $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
- $PUSHMA %r3,$FRAME(%sp)
- $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
- $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
- $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
- ___
- $code.=<<___ if ($SIZE_T==4);
- $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
- $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
- $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
- $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
- $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
- ___
- $code.=<<___;
- blr %r0,$rem_4bit
- ldi 3,$rem
- L\$pic_gmult
- andcm $rem_4bit,$rem,$rem_4bit
- addl $inp,$len,$len
- ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
- ldi 0xf0,$mask0xf0
- ___
- $code.=<<___ if ($SIZE_T==4);
- ldi 31,$rem
- mtctl $rem,%cr11
- extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
- b L\$parisc1_gmult
- nop
- ___
- $code.=<<___;
- ldb 15($Xi),$nlo
- ldo 8($Htbl),$Hll
- and $mask0xf0,$nlo,$nhi
- depd,z $nlo,59,4,$nlo
- ldd $nlo($Hll),$Zll
- ldd $nlo($Hhh),$Zhh
- depd,z $Zll,60,4,$rem
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldb 14($Xi),$nlo
- ldd $nhi($Hll),$Tll
- ldd $nhi($Hhh),$Thh
- and $mask0xf0,$nlo,$nhi
- depd,z $nlo,59,4,$nlo
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldd $rem($rem_4bit),$rem
- b L\$oop_gmult_pa2
- ldi 13,$cnt
- .ALIGN 8
- L\$oop_gmult_pa2
- xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
- depd,z $Zll,60,4,$rem
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldd $nlo($Hll),$Tll
- ldd $nlo($Hhh),$Thh
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldd $rem($rem_4bit),$rem
- xor $rem,$Zhh,$Zhh
- depd,z $Zll,60,4,$rem
- ldbx $cnt($Xi),$nlo
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldd $nhi($Hll),$Tll
- ldd $nhi($Hhh),$Thh
- and $mask0xf0,$nlo,$nhi
- depd,z $nlo,59,4,$nlo
- ldd $rem($rem_4bit),$rem
- xor $Tll,$Zll,$Zll
- addib,uv -1,$cnt,L\$oop_gmult_pa2
- xor $Thh,$Zhh,$Zhh
- xor $rem,$Zhh,$Zhh
- depd,z $Zll,60,4,$rem
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldd $nlo($Hll),$Tll
- ldd $nlo($Hhh),$Thh
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldd $rem($rem_4bit),$rem
- xor $rem,$Zhh,$Zhh
- depd,z $Zll,60,4,$rem
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldd $nhi($Hll),$Tll
- ldd $nhi($Hhh),$Thh
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldd $rem($rem_4bit),$rem
- xor $rem,$Zhh,$Zhh
- std $Zll,8($Xi)
- std $Zhh,0($Xi)
- ___
- $code.=<<___ if ($SIZE_T==4);
- b L\$done_gmult
- nop
- L\$parisc1_gmult
- ldb 15($Xi),$nlo
- ldo 12($Htbl),$Hll
- ldo 8($Htbl),$Hlh
- ldo 4($Htbl),$Hhl
- and $mask0xf0,$nlo,$nhi
- zdep $nlo,27,4,$nlo
- ldwx $nlo($Hll),$Zll
- ldwx $nlo($Hlh),$Zlh
- ldwx $nlo($Hhl),$Zhl
- ldwx $nlo($Hhh),$Zhh
- zdep $Zll,28,4,$rem
- ldb 14($Xi),$nlo
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $nhi($Hll),$Tll
- shrpw $Zhl,$Zlh,4,$Zlh
- ldwx $nhi($Hlh),$Tlh
- shrpw $Zhh,$Zhl,4,$Zhl
- ldwx $nhi($Hhl),$Thl
- extru $Zhh,27,28,$Zhh
- ldwx $nhi($Hhh),$Thh
- xor $rem,$Zhh,$Zhh
- and $mask0xf0,$nlo,$nhi
- zdep $nlo,27,4,$nlo
- xor $Tll,$Zll,$Zll
- ldwx $nlo($Hll),$Tll
- xor $Tlh,$Zlh,$Zlh
- ldwx $nlo($Hlh),$Tlh
- xor $Thl,$Zhl,$Zhl
- b L\$oop_gmult_pa1
- ldi 13,$cnt
- .ALIGN 8
- L\$oop_gmult_pa1
- zdep $Zll,28,4,$rem
- ldwx $nlo($Hhl),$Thl
- xor $Thh,$Zhh,$Zhh
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $nlo($Hhh),$Thh
- shrpw $Zhl,$Zlh,4,$Zlh
- ldbx $cnt($Xi),$nlo
- xor $Tll,$Zll,$Zll
- ldwx $nhi($Hll),$Tll
- shrpw $Zhh,$Zhl,4,$Zhl
- xor $Tlh,$Zlh,$Zlh
- ldwx $nhi($Hlh),$Tlh
- extru $Zhh,27,28,$Zhh
- xor $Thl,$Zhl,$Zhl
- ldwx $nhi($Hhl),$Thl
- xor $rem,$Zhh,$Zhh
- zdep $Zll,28,4,$rem
- xor $Thh,$Zhh,$Zhh
- ldwx $nhi($Hhh),$Thh
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $rem($rem_4bit),$rem
- shrpw $Zhl,$Zlh,4,$Zlh
- shrpw $Zhh,$Zhl,4,$Zhl
- and $mask0xf0,$nlo,$nhi
- extru $Zhh,27,28,$Zhh
- zdep $nlo,27,4,$nlo
- xor $Tll,$Zll,$Zll
- ldwx $nlo($Hll),$Tll
- xor $Tlh,$Zlh,$Zlh
- ldwx $nlo($Hlh),$Tlh
- xor $rem,$Zhh,$Zhh
- addib,uv -1,$cnt,L\$oop_gmult_pa1
- xor $Thl,$Zhl,$Zhl
- zdep $Zll,28,4,$rem
- ldwx $nlo($Hhl),$Thl
- xor $Thh,$Zhh,$Zhh
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $nlo($Hhh),$Thh
- shrpw $Zhl,$Zlh,4,$Zlh
- xor $Tll,$Zll,$Zll
- ldwx $nhi($Hll),$Tll
- shrpw $Zhh,$Zhl,4,$Zhl
- xor $Tlh,$Zlh,$Zlh
- ldwx $nhi($Hlh),$Tlh
- extru $Zhh,27,28,$Zhh
- xor $rem,$Zhh,$Zhh
- xor $Thl,$Zhl,$Zhl
- ldwx $nhi($Hhl),$Thl
- xor $Thh,$Zhh,$Zhh
- ldwx $nhi($Hhh),$Thh
- zdep $Zll,28,4,$rem
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- shrpw $Zhl,$Zlh,4,$Zlh
- shrpw $Zhh,$Zhl,4,$Zhl
- extru $Zhh,27,28,$Zhh
- xor $Tll,$Zll,$Zll
- xor $Tlh,$Zlh,$Zlh
- xor $rem,$Zhh,$Zhh
- stw $Zll,12($Xi)
- xor $Thl,$Zhl,$Zhl
- stw $Zlh,8($Xi)
- xor $Thh,$Zhh,$Zhh
- stw $Zhl,4($Xi)
- stw $Zhh,0($Xi)
- ___
- $code.=<<___;
- L\$done_gmult
- $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
- $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
- $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
- $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
- ___
- $code.=<<___ if ($SIZE_T==4);
- $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
- $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
- $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
- $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
- $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
- ___
- $code.=<<___;
- bv (%r2)
- .EXIT
- $POPMB -$FRAME(%sp),%r3
- .PROCEND
- .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
- .ALIGN 64
- gcm_ghash_4bit
- .PROC
- .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
- .ENTRY
- $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
- $PUSHMA %r3,$FRAME(%sp)
- $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
- $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
- $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
- ___
- $code.=<<___ if ($SIZE_T==4);
- $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
- $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
- $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
- $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
- $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
- ___
- $code.=<<___;
- blr %r0,$rem_4bit
- ldi 3,$rem
- L\$pic_ghash
- andcm $rem_4bit,$rem,$rem_4bit
- addl $inp,$len,$len
- ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
- ldi 0xf0,$mask0xf0
- ___
- $code.=<<___ if ($SIZE_T==4);
- ldi 31,$rem
- mtctl $rem,%cr11
- extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
- b L\$parisc1_ghash
- nop
- ___
- $code.=<<___;
- ldb 15($Xi),$nlo
- ldo 8($Htbl),$Hll
- L\$outer_ghash_pa2
- ldb 15($inp),$nhi
- xor $nhi,$nlo,$nlo
- and $mask0xf0,$nlo,$nhi
- depd,z $nlo,59,4,$nlo
- ldd $nlo($Hll),$Zll
- ldd $nlo($Hhh),$Zhh
- depd,z $Zll,60,4,$rem
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldb 14($Xi),$nlo
- ldb 14($inp),$byte
- ldd $nhi($Hll),$Tll
- ldd $nhi($Hhh),$Thh
- xor $byte,$nlo,$nlo
- and $mask0xf0,$nlo,$nhi
- depd,z $nlo,59,4,$nlo
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldd $rem($rem_4bit),$rem
- b L\$oop_ghash_pa2
- ldi 13,$cnt
- .ALIGN 8
- L\$oop_ghash_pa2
- xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
- depd,z $Zll,60,4,$rem2
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldd $nlo($Hll),$Tll
- ldd $nlo($Hhh),$Thh
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldbx $cnt($Xi),$nlo
- ldbx $cnt($inp),$byte
- depd,z $Zll,60,4,$rem
- shrpd $Zhh,$Zll,4,$Zll
- ldd $rem2($rem_4bit),$rem2
- xor $rem2,$Zhh,$Zhh
- xor $byte,$nlo,$nlo
- ldd $nhi($Hll),$Tll
- ldd $nhi($Hhh),$Thh
- and $mask0xf0,$nlo,$nhi
- depd,z $nlo,59,4,$nlo
- extrd,u $Zhh,59,60,$Zhh
- xor $Tll,$Zll,$Zll
- ldd $rem($rem_4bit),$rem
- addib,uv -1,$cnt,L\$oop_ghash_pa2
- xor $Thh,$Zhh,$Zhh
- xor $rem,$Zhh,$Zhh
- depd,z $Zll,60,4,$rem2
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldd $nlo($Hll),$Tll
- ldd $nlo($Hhh),$Thh
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- depd,z $Zll,60,4,$rem
- shrpd $Zhh,$Zll,4,$Zll
- ldd $rem2($rem_4bit),$rem2
- xor $rem2,$Zhh,$Zhh
- ldd $nhi($Hll),$Tll
- ldd $nhi($Hhh),$Thh
- extrd,u $Zhh,59,60,$Zhh
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldd $rem($rem_4bit),$rem
- xor $rem,$Zhh,$Zhh
- std $Zll,8($Xi)
- ldo 16($inp),$inp
- std $Zhh,0($Xi)
- cmpb,*<> $inp,$len,L\$outer_ghash_pa2
- copy $Zll,$nlo
- ___
- $code.=<<___ if ($SIZE_T==4);
- b L\$done_ghash
- nop
- L\$parisc1_ghash
- ldb 15($Xi),$nlo
- ldo 12($Htbl),$Hll
- ldo 8($Htbl),$Hlh
- ldo 4($Htbl),$Hhl
- L\$outer_ghash_pa1
- ldb 15($inp),$byte
- xor $byte,$nlo,$nlo
- and $mask0xf0,$nlo,$nhi
- zdep $nlo,27,4,$nlo
- ldwx $nlo($Hll),$Zll
- ldwx $nlo($Hlh),$Zlh
- ldwx $nlo($Hhl),$Zhl
- ldwx $nlo($Hhh),$Zhh
- zdep $Zll,28,4,$rem
- ldb 14($Xi),$nlo
- ldb 14($inp),$byte
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $nhi($Hll),$Tll
- shrpw $Zhl,$Zlh,4,$Zlh
- ldwx $nhi($Hlh),$Tlh
- shrpw $Zhh,$Zhl,4,$Zhl
- ldwx $nhi($Hhl),$Thl
- extru $Zhh,27,28,$Zhh
- ldwx $nhi($Hhh),$Thh
- xor $byte,$nlo,$nlo
- xor $rem,$Zhh,$Zhh
- and $mask0xf0,$nlo,$nhi
- zdep $nlo,27,4,$nlo
- xor $Tll,$Zll,$Zll
- ldwx $nlo($Hll),$Tll
- xor $Tlh,$Zlh,$Zlh
- ldwx $nlo($Hlh),$Tlh
- xor $Thl,$Zhl,$Zhl
- b L\$oop_ghash_pa1
- ldi 13,$cnt
- .ALIGN 8
- L\$oop_ghash_pa1
- zdep $Zll,28,4,$rem
- ldwx $nlo($Hhl),$Thl
- xor $Thh,$Zhh,$Zhh
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $nlo($Hhh),$Thh
- shrpw $Zhl,$Zlh,4,$Zlh
- ldbx $cnt($Xi),$nlo
- xor $Tll,$Zll,$Zll
- ldwx $nhi($Hll),$Tll
- shrpw $Zhh,$Zhl,4,$Zhl
- ldbx $cnt($inp),$byte
- xor $Tlh,$Zlh,$Zlh
- ldwx $nhi($Hlh),$Tlh
- extru $Zhh,27,28,$Zhh
- xor $Thl,$Zhl,$Zhl
- ldwx $nhi($Hhl),$Thl
- xor $rem,$Zhh,$Zhh
- zdep $Zll,28,4,$rem
- xor $Thh,$Zhh,$Zhh
- ldwx $nhi($Hhh),$Thh
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $rem($rem_4bit),$rem
- shrpw $Zhl,$Zlh,4,$Zlh
- xor $byte,$nlo,$nlo
- shrpw $Zhh,$Zhl,4,$Zhl
- and $mask0xf0,$nlo,$nhi
- extru $Zhh,27,28,$Zhh
- zdep $nlo,27,4,$nlo
- xor $Tll,$Zll,$Zll
- ldwx $nlo($Hll),$Tll
- xor $Tlh,$Zlh,$Zlh
- ldwx $nlo($Hlh),$Tlh
- xor $rem,$Zhh,$Zhh
- addib,uv -1,$cnt,L\$oop_ghash_pa1
- xor $Thl,$Zhl,$Zhl
- zdep $Zll,28,4,$rem
- ldwx $nlo($Hhl),$Thl
- xor $Thh,$Zhh,$Zhh
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $nlo($Hhh),$Thh
- shrpw $Zhl,$Zlh,4,$Zlh
- xor $Tll,$Zll,$Zll
- ldwx $nhi($Hll),$Tll
- shrpw $Zhh,$Zhl,4,$Zhl
- xor $Tlh,$Zlh,$Zlh
- ldwx $nhi($Hlh),$Tlh
- extru $Zhh,27,28,$Zhh
- xor $rem,$Zhh,$Zhh
- xor $Thl,$Zhl,$Zhl
- ldwx $nhi($Hhl),$Thl
- xor $Thh,$Zhh,$Zhh
- ldwx $nhi($Hhh),$Thh
- zdep $Zll,28,4,$rem
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- shrpw $Zhl,$Zlh,4,$Zlh
- shrpw $Zhh,$Zhl,4,$Zhl
- extru $Zhh,27,28,$Zhh
- xor $Tll,$Zll,$Zll
- xor $Tlh,$Zlh,$Zlh
- xor $rem,$Zhh,$Zhh
- stw $Zll,12($Xi)
- xor $Thl,$Zhl,$Zhl
- stw $Zlh,8($Xi)
- xor $Thh,$Zhh,$Zhh
- stw $Zhl,4($Xi)
- ldo 16($inp),$inp
- stw $Zhh,0($Xi)
- comb,<> $inp,$len,L\$outer_ghash_pa1
- copy $Zll,$nlo
- ___
- $code.=<<___;
- L\$done_ghash
- $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
- $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
- $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
- $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
- ___
- $code.=<<___ if ($SIZE_T==4);
- $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
- $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
- $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
- $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
- $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
- ___
- $code.=<<___;
- bv (%r2)
- .EXIT
- $POPMB -$FRAME(%sp),%r3
- .PROCEND
- .ALIGN 64
- L\$rem_4bit
- .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
- .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
- .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
- .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
- .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
- .ALIGN 64
- ___
- # Explicitly encode PA-RISC 2.0 instructions used in this module, so
- # that it can be compiled with .LEVEL 1.0. It should be noted that I
- # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
- # directive...
- my $ldd = sub {
- my ($mod,$args) = @_;
- my $orig = "ldd$mod\t$args";
- if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
- { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
- { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
- $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
- $opcode|=(1<<5) if ($mod =~ /^,m/);
- $opcode|=(1<<13) if ($mod =~ /^,mb/);
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- else { "\t".$orig; }
- };
- my $std = sub {
- my ($mod,$args) = @_;
- my $orig = "std$mod\t$args";
- if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
- { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- else { "\t".$orig; }
- };
- my $extrd = sub {
- my ($mod,$args) = @_;
- my $orig = "extrd$mod\t$args";
- # I only have ",u" completer, it's implicitly encoded...
- if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
- { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
- my $len=32-$3;
- $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
- $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
- { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
- my $len=32-$2;
- $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
- $opcode |= (1<<13) if ($mod =~ /,\**=/);
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- else { "\t".$orig; }
- };
- my $shrpd = sub {
- my ($mod,$args) = @_;
- my $orig = "shrpd$mod\t$args";
- if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
- { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
- my $cpos=63-$3;
- $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
- { sprintf "\t.WORD\t0x%08x\t; %s",
- (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
- }
- else { "\t".$orig; }
- };
- my $depd = sub {
- my ($mod,$args) = @_;
- my $orig = "depd$mod\t$args";
- # I only have ",z" completer, it's implicitly encoded...
- if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
- { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
- my $cpos=63-$2;
- my $len=32-$3;
- $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
- $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- else { "\t".$orig; }
- };
- sub assemble {
- my ($mnemonic,$mod,$args)=@_;
- my $opcode = eval("\$$mnemonic");
- ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
- }
- if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
- =~ /GNU assembler/) {
- $gnuas = 1;
- }
- foreach (split("\n",$code)) {
- s/\`([^\`]*)\`/eval $1/ge;
- if ($SIZE_T==4) {
- s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
- s/cmpb,\*/comb,/;
- s/,\*/,/;
- }
- s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
- s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
- s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
- s/\bbv\b/bve/ if ($SIZE_T==8);
- print $_,"\n";
- }
- close STDOUT or die "error closing STDOUT: $!";
|