des_enc.m4 46 KB


  1. ! Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
  2. !
  3. ! Licensed under the OpenSSL license (the "License"). You may not use
  4. ! this file except in compliance with the License. You can obtain a copy
  5. ! in the file LICENSE in the source distribution or at
  6. ! https://www.openssl.org/source/license.html
  7. !
  8. ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
  9. !
  10. ! Global registers 1 to 5 are used. This is the same as done by the
  11. ! cc compiler. The UltraSPARC load/store little endian feature is used.
  12. !
  13. ! Instruction grouping often refers to one CPU cycle.
  14. !
  15. ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
  16. !
  17. ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
  18. !
  19. ! Performance improvement according to './apps/openssl speed des'
  20. !
  21. ! 32-bit build:
  22. ! 23% faster than cc-5.2 -xarch=v8plus -xO5
  23. ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
  24. ! 64-bit build:
  25. ! 50% faster than cc-5.2 -xarch=v9 -xO5
  26. ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
  27. !
  28. .ident "des_enc.m4 2.1"
  29. .file "des_enc-sparc.S"
  30. #include <openssl/opensslconf.h>
  31. #if defined(__SUNPRO_C) && defined(__sparcv9)
  32. # define ABI64 /* They've said -xarch=v9 at command line */
  33. #elif defined(__GNUC__) && defined(__arch64__)
  34. # define ABI64 /* They've said -m64 at command line */
  35. #endif
  36. #ifdef ABI64
  37. .register %g2,#scratch
  38. .register %g3,#scratch
  39. # define FRAME -192
  40. # define BIAS 2047
  41. # define LDPTR ldx
  42. # define STPTR stx
  43. # define ARG0 128
  44. # define ARGSZ 8
  45. #else
  46. # define FRAME -96
  47. # define BIAS 0
  48. # define LDPTR ld
  49. # define STPTR st
  50. # define ARG0 68
  51. # define ARGSZ 4
  52. #endif
  53. #define LOOPS 7
  54. #define global0 %g0
  55. #define global1 %g1
  56. #define global2 %g2
  57. #define global3 %g3
  58. #define global4 %g4
  59. #define global5 %g5
  60. #define local0 %l0
  61. #define local1 %l1
  62. #define local2 %l2
  63. #define local3 %l3
  64. #define local4 %l4
  65. #define local5 %l5
  66. #define local7 %l6
  67. #define local6 %l7
  68. #define in0 %i0
  69. #define in1 %i1
  70. #define in2 %i2
  71. #define in3 %i3
  72. #define in4 %i4
  73. #define in5 %i5
  74. #define in6 %i6
  75. #define in7 %i7
  76. #define out0 %o0
  77. #define out1 %o1
  78. #define out2 %o2
  79. #define out3 %o3
  80. #define out4 %o4
  81. #define out5 %o5
  82. #define out6 %o6
  83. #define out7 %o7
  84. #define stub stb
  85. changequote({,})
  86. ! Macro definitions:
  87. ! {ip_macro}
  88. !
  89. ! The logic used in initial and final permutations is the same as in
  90. ! the C code. The permutations are done with a clever shift, xor, and
  91. ! technique.
  92. !
  93. ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
  94. ! sbox 6 to local6, and address sbox 8 to out3.
  95. !
  96. ! Rotates the halves 3 left to bring the sbox bits in convenient positions.
  97. !
  98. ! Loads key first round from address in parameter 5 to out0, out1.
  99. !
  100. ! After the original LibDES initial permutation, the resulting left
  101. ! is in the variable initially used for right and vice versa. The macro
  102. ! implements the possibility to keep the halves in the original registers.
  103. !
  104. ! parameter 1 left
  105. ! parameter 2 right
  106. ! parameter 3 result left (modify in first round)
  107. ! parameter 4 result right (use in first round)
  108. ! parameter 5 key address
  109. ! parameter 6 1/2 for include encryption/decryption
  110. ! parameter 7 1 for move in1 to in3
  111. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  112. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  113. define(ip_macro, {
  114. ! {ip_macro}
  115. ! $1 $2 $4 $3 $5 $6 $7 $8 $9
  116. ld [out2+256], local1
  117. srl $2, 4, local4
  118. xor local4, $1, local4
  119. ifelse($7,1,{mov in1, in3},{nop})
  120. ld [out2+260], local2
  121. and local4, local1, local4
  122. ifelse($8,1,{mov in3, in4},{})
  123. ifelse($8,2,{mov in4, in3},{})
  124. ld [out2+280], out4 ! loop counter
  125. sll local4, 4, local1
  126. xor $1, local4, $1
  127. ld [out2+264], local3
  128. srl $1, 16, local4
  129. xor $2, local1, $2
  130. ifelse($9,1,{LDPTR KS3, in4},{})
  131. xor local4, $2, local4
  132. nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
  133. ifelse($9,1,{LDPTR KS2, in3},{})
  134. and local4, local2, local4
  135. nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
  136. sll local4, 16, local1
  137. xor $2, local4, $2
  138. srl $2, 2, local4
  139. xor $1, local1, $1
  140. sethi %hi(16711680), local5
  141. xor local4, $1, local4
  142. and local4, local3, local4
  143. or local5, 255, local5
  144. sll local4, 2, local2
  145. xor $1, local4, $1
  146. srl $1, 8, local4
  147. xor $2, local2, $2
  148. xor local4, $2, local4
  149. add global1, 768, global4
  150. and local4, local5, local4
  151. add global1, 1024, global5
  152. ld [out2+272], local7
  153. sll local4, 8, local1
  154. xor $2, local4, $2
  155. srl $2, 1, local4
  156. xor $1, local1, $1
  157. ld [$5], out0 ! key 7531
  158. xor local4, $1, local4
  159. add global1, 256, global2
  160. ld [$5+4], out1 ! key 8642
  161. and local4, local7, local4
  162. add global1, 512, global3
  163. sll local4, 1, local1
  164. xor $1, local4, $1
  165. sll $1, 3, local3
  166. xor $2, local1, $2
  167. sll $2, 3, local2
  168. add global1, 1280, local6 ! address sbox 8
  169. srl $1, 29, local4
  170. add global1, 1792, out3 ! address sbox 8
  171. srl $2, 29, local1
  172. or local4, local3, $4
  173. or local2, local1, $3
  174. ifelse($6, 1, {
  175. ld [out2+284], local5 ! 0x0000FC00 used in the rounds
  176. or local2, local1, $3
  177. xor $4, out0, local1
  178. call .des_enc.1
  179. and local1, 252, local1
  180. },{})
  181. ifelse($6, 2, {
  182. ld [out2+284], local5 ! 0x0000FC00 used in the rounds
  183. or local2, local1, $3
  184. xor $4, out0, local1
  185. call .des_dec.1
  186. and local1, 252, local1
  187. },{})
  188. })
  189. ! {rounds_macro}
  190. !
  191. ! The logic used in the DES rounds is the same as in the C code,
  192. ! except that calculations for sbox 1 and sbox 5 begin before
  193. ! the previous round is finished.
  194. !
  195. ! In each round one half (work) is modified based on key and the
  196. ! other half (use).
  197. !
  198. ! In this version we do two rounds in a loop repeated 7 times
  199. ! and two rounds separately.
  200. !
  201. ! One half has the bits for the sboxes in the following positions:
  202. !
  203. ! 777777xx555555xx333333xx111111xx
  204. !
  205. ! 88xx666666xx444444xx222222xx8888
  206. !
  207. ! The bits for each sbox are xor-ed with the key bits for that box.
  208. ! The above xx bits are cleared, and the result used for lookup in
  209. ! the sbox table. Each sbox entry contains the 4 output bits permuted
  210. ! into 32 bits according to the P permutation.
  211. !
  212. ! In the description of DES, left and right are switched after
  213. ! each round, except after last round. In this code the original
  214. ! left and right are kept in the same register in all rounds, meaning
  215. ! that after the 16 rounds the result for right is in the register
  216. ! originally used for left.
  217. !
  218. ! parameter 1 first work (left in first round)
  219. ! parameter 2 first use (right in first round)
  220. ! parameter 3 enc/dec 1/-1
  221. ! parameter 4 loop label
  222. ! parameter 5 key address register
  223. ! parameter 6 optional address for key next encryption/decryption
  224. ! parameter 7 not empty for include retl
  225. !
  226. ! also compares in2 to 8
  227. define(rounds_macro, {
  228. ! {rounds_macro}
  229. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  230. xor $2, out0, local1
  231. ld [out2+284], local5 ! 0x0000FC00
  232. ba $4
  233. and local1, 252, local1
  234. .align 32
  235. $4:
  236. ! local6 is address sbox 6
  237. ! out3 is address sbox 8
  238. ! out4 is loop counter
  239. ld [global1+local1], local1
  240. xor $2, out1, out1 ! 8642
  241. xor $2, out0, out0 ! 7531
  242. ! fmovs %f0, %f0 ! fxor used for alignment
  243. srl out1, 4, local0 ! rotate 4 right
  244. and out0, local5, local3 ! 3
  245. ! fmovs %f0, %f0
  246. ld [$5+$3*8], local7 ! key 7531 next round
  247. srl local3, 8, local3 ! 3
  248. and local0, 252, local2 ! 2
  249. ! fmovs %f0, %f0
  250. ld [global3+local3],local3 ! 3
  251. sll out1, 28, out1 ! rotate
  252. xor $1, local1, $1 ! 1 finished, local1 now sbox 7
  253. ld [global2+local2], local2 ! 2
  254. srl out0, 24, local1 ! 7
  255. or out1, local0, out1 ! rotate
  256. ldub [out2+local1], local1 ! 7 (and 0xFC)
  257. srl out1, 24, local0 ! 8
  258. and out1, local5, local4 ! 4
  259. ldub [out2+local0], local0 ! 8 (and 0xFC)
  260. srl local4, 8, local4 ! 4
  261. xor $1, local2, $1 ! 2 finished local2 now sbox 6
  262. ld [global4+local4],local4 ! 4
  263. srl out1, 16, local2 ! 6
  264. xor $1, local3, $1 ! 3 finished local3 now sbox 5
  265. ld [out3+local0],local0 ! 8
  266. and local2, 252, local2 ! 6
  267. add global1, 1536, local5 ! address sbox 7
  268. ld [local6+local2], local2 ! 6
  269. srl out0, 16, local3 ! 5
  270. xor $1, local4, $1 ! 4 finished
  271. ld [local5+local1],local1 ! 7
  272. and local3, 252, local3 ! 5
  273. xor $1, local0, $1 ! 8 finished
  274. ld [global5+local3],local3 ! 5
  275. xor $1, local2, $1 ! 6 finished
  276. subcc out4, 1, out4
  277. ld [$5+$3*8+4], out0 ! key 8642 next round
  278. xor $1, local7, local2 ! sbox 5 next round
  279. xor $1, local1, $1 ! 7 finished
  280. srl local2, 16, local2 ! sbox 5 next round
  281. xor $1, local3, $1 ! 5 finished
  282. ld [$5+$3*16+4], out1 ! key 8642 next round again
  283. and local2, 252, local2 ! sbox5 next round
  284. ! next round
  285. xor $1, local7, local7 ! 7531
  286. ld [global5+local2], local2 ! 5
  287. srl local7, 24, local3 ! 7
  288. xor $1, out0, out0 ! 8642
  289. ldub [out2+local3], local3 ! 7 (and 0xFC)
  290. srl out0, 4, local0 ! rotate 4 right
  291. and local7, 252, local1 ! 1
  292. sll out0, 28, out0 ! rotate
  293. xor $2, local2, $2 ! 5 finished local2 used
  294. srl local0, 8, local4 ! 4
  295. and local0, 252, local2 ! 2
  296. ld [local5+local3], local3 ! 7
  297. srl local0, 16, local5 ! 6
  298. or out0, local0, out0 ! rotate
  299. ld [global2+local2], local2 ! 2
  300. srl out0, 24, local0
  301. ld [$5+$3*16], out0 ! key 7531 next round
  302. and local4, 252, local4 ! 4
  303. and local5, 252, local5 ! 6
  304. ld [global4+local4], local4 ! 4
  305. xor $2, local3, $2 ! 7 finished local3 used
  306. and local0, 252, local0 ! 8
  307. ld [local6+local5], local5 ! 6
  308. xor $2, local2, $2 ! 2 finished local2 now sbox 3
  309. srl local7, 8, local2 ! 3 start
  310. ld [out3+local0], local0 ! 8
  311. xor $2, local4, $2 ! 4 finished
  312. and local2, 252, local2 ! 3
  313. ld [global1+local1], local1 ! 1
  314. xor $2, local5, $2 ! 6 finished local5 used
  315. ld [global3+local2], local2 ! 3
  316. xor $2, local0, $2 ! 8 finished
  317. add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
  318. ld [out2+284], local5 ! 0x0000FC00
  319. xor $2, out0, local4 ! sbox 1 next round
  320. xor $2, local1, $2 ! 1 finished
  321. xor $2, local2, $2 ! 3 finished
  322. bne $4
  323. and local4, 252, local1 ! sbox 1 next round
  324. ! two rounds more:
  325. ld [global1+local1], local1
  326. xor $2, out1, out1
  327. xor $2, out0, out0
  328. srl out1, 4, local0 ! rotate
  329. and out0, local5, local3
  330. ld [$5+$3*8], local7 ! key 7531
  331. srl local3, 8, local3
  332. and local0, 252, local2
  333. ld [global3+local3],local3
  334. sll out1, 28, out1 ! rotate
  335. xor $1, local1, $1 ! 1 finished, local1 now sbox 7
  336. ld [global2+local2], local2
  337. srl out0, 24, local1
  338. or out1, local0, out1 ! rotate
  339. ldub [out2+local1], local1
  340. srl out1, 24, local0
  341. and out1, local5, local4
  342. ldub [out2+local0], local0
  343. srl local4, 8, local4
  344. xor $1, local2, $1 ! 2 finished local2 now sbox 6
  345. ld [global4+local4],local4
  346. srl out1, 16, local2
  347. xor $1, local3, $1 ! 3 finished local3 now sbox 5
  348. ld [out3+local0],local0
  349. and local2, 252, local2
  350. add global1, 1536, local5 ! address sbox 7
  351. ld [local6+local2], local2
  352. srl out0, 16, local3
  353. xor $1, local4, $1 ! 4 finished
  354. ld [local5+local1],local1
  355. and local3, 252, local3
  356. xor $1, local0, $1
  357. ld [global5+local3],local3
  358. xor $1, local2, $1 ! 6 finished
  359. cmp in2, 8
  360. ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
  361. xor $1, local7, local2 ! sbox 5 next round
  362. xor $1, local1, $1 ! 7 finished
  363. ld [$5+$3*8+4], out0
  364. srl local2, 16, local2 ! sbox 5 next round
  365. xor $1, local3, $1 ! 5 finished
  366. and local2, 252, local2
  367. ! next round (two rounds more)
  368. xor $1, local7, local7 ! 7531
  369. ld [global5+local2], local2
  370. srl local7, 24, local3
  371. xor $1, out0, out0 ! 8642
  372. ldub [out2+local3], local3
  373. srl out0, 4, local0 ! rotate
  374. and local7, 252, local1
  375. sll out0, 28, out0 ! rotate
  376. xor $2, local2, $2 ! 5 finished local2 used
  377. srl local0, 8, local4
  378. and local0, 252, local2
  379. ld [local5+local3], local3
  380. srl local0, 16, local5
  381. or out0, local0, out0 ! rotate
  382. ld [global2+local2], local2
  383. srl out0, 24, local0
  384. ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
  385. and local4, 252, local4
  386. and local5, 252, local5
  387. ld [global4+local4], local4
  388. xor $2, local3, $2 ! 7 finished local3 used
  389. and local0, 252, local0
  390. ld [local6+local5], local5
  391. xor $2, local2, $2 ! 2 finished local2 now sbox 3
  392. srl local7, 8, local2 ! 3 start
  393. ld [out3+local0], local0
  394. xor $2, local4, $2
  395. and local2, 252, local2
  396. ld [global1+local1], local1
  397. xor $2, local5, $2 ! 6 finished local5 used
  398. ld [global3+local2], local2
  399. srl $1, 3, local3
  400. xor $2, local0, $2
  401. ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
  402. sll $1, 29, local4
  403. xor $2, local1, $2
  404. ifelse($7,{}, {}, {retl})
  405. xor $2, local2, $2
  406. })
  407. ! {fp_macro}
  408. !
  409. ! parameter 1 right (original left)
  410. ! parameter 2 left (original right)
  411. ! parameter 3 1 for optional store to [in0]
  412. ! parameter 4 1 for load input/output address to local5/7
  413. !
  414. ! The final permutation logic switches the halves, meaning that
  415. ! left and right ends up the registers originally used.
  416. define(fp_macro, {
  417. ! {fp_macro}
  418. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  419. ! initially undo the rotate 3 left done after initial permutation
  420. ! original left is received shifted 3 right and 29 left in local3/4
  421. sll $2, 29, local1
  422. or local3, local4, $1
  423. srl $2, 3, $2
  424. sethi %hi(0x55555555), local2
  425. or $2, local1, $2
  426. or local2, %lo(0x55555555), local2
  427. srl $2, 1, local3
  428. sethi %hi(0x00ff00ff), local1
  429. xor local3, $1, local3
  430. or local1, %lo(0x00ff00ff), local1
  431. and local3, local2, local3
  432. sethi %hi(0x33333333), local4
  433. sll local3, 1, local2
  434. xor $1, local3, $1
  435. srl $1, 8, local3
  436. xor $2, local2, $2
  437. xor local3, $2, local3
  438. or local4, %lo(0x33333333), local4
  439. and local3, local1, local3
  440. sethi %hi(0x0000ffff), local1
  441. sll local3, 8, local2
  442. xor $2, local3, $2
  443. srl $2, 2, local3
  444. xor $1, local2, $1
  445. xor local3, $1, local3
  446. or local1, %lo(0x0000ffff), local1
  447. and local3, local4, local3
  448. sethi %hi(0x0f0f0f0f), local4
  449. sll local3, 2, local2
  450. ifelse($4,1, {LDPTR INPUT, local5})
  451. xor $1, local3, $1
  452. ifelse($4,1, {LDPTR OUTPUT, local7})
  453. srl $1, 16, local3
  454. xor $2, local2, $2
  455. xor local3, $2, local3
  456. or local4, %lo(0x0f0f0f0f), local4
  457. and local3, local1, local3
  458. sll local3, 16, local2
  459. xor $2, local3, local1
  460. srl local1, 4, local3
  461. xor $1, local2, $1
  462. xor local3, $1, local3
  463. and local3, local4, local3
  464. sll local3, 4, local2
  465. xor $1, local3, $1
  466. ! optional store:
  467. ifelse($3,1, {st $1, [in0]})
  468. xor local1, local2, $2
  469. ifelse($3,1, {st $2, [in0+4]})
  470. })
  471. ! {fp_ip_macro}
  472. !
  473. ! Does initial permutation for next block mixed with
  474. ! final permutation for current block.
  475. !
  476. ! parameter 1 original left
  477. ! parameter 2 original right
  478. ! parameter 3 left ip
  479. ! parameter 4 right ip
  480. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  481. ! 2: mov in4 to in3
  482. !
  483. ! also adds -8 to length in2 and loads loop counter to out4
  484. define(fp_ip_macro, {
  485. ! {fp_ip_macro}
  486. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  487. define({temp1},{out4})
  488. define({temp2},{local3})
  489. define({ip1},{local1})
  490. define({ip2},{local2})
  491. define({ip4},{local4})
  492. define({ip5},{local5})
  493. ! $1 in local3, local4
  494. ld [out2+256], ip1
  495. sll out5, 29, temp1
  496. or local3, local4, $1
  497. srl out5, 3, $2
  498. ifelse($5,2,{mov in4, in3})
  499. ld [out2+272], ip5
  500. srl $4, 4, local0
  501. or $2, temp1, $2
  502. srl $2, 1, temp1
  503. xor temp1, $1, temp1
  504. and temp1, ip5, temp1
  505. xor local0, $3, local0
  506. sll temp1, 1, temp2
  507. xor $1, temp1, $1
  508. and local0, ip1, local0
  509. add in2, -8, in2
  510. sll local0, 4, local7
  511. xor $3, local0, $3
  512. ld [out2+268], ip4
  513. srl $1, 8, temp1
  514. xor $2, temp2, $2
  515. ld [out2+260], ip2
  516. srl $3, 16, local0
  517. xor $4, local7, $4
  518. xor temp1, $2, temp1
  519. xor local0, $4, local0
  520. and temp1, ip4, temp1
  521. and local0, ip2, local0
  522. sll temp1, 8, temp2
  523. xor $2, temp1, $2
  524. sll local0, 16, local7
  525. xor $4, local0, $4
  526. srl $2, 2, temp1
  527. xor $1, temp2, $1
  528. ld [out2+264], temp2 ! ip3
  529. srl $4, 2, local0
  530. xor $3, local7, $3
  531. xor temp1, $1, temp1
  532. xor local0, $3, local0
  533. and temp1, temp2, temp1
  534. and local0, temp2, local0
  535. sll temp1, 2, temp2
  536. xor $1, temp1, $1
  537. sll local0, 2, local7
  538. xor $3, local0, $3
  539. srl $1, 16, temp1
  540. xor $2, temp2, $2
  541. srl $3, 8, local0
  542. xor $4, local7, $4
  543. xor temp1, $2, temp1
  544. xor local0, $4, local0
  545. and temp1, ip2, temp1
  546. and local0, ip4, local0
  547. sll temp1, 16, temp2
  548. xor $2, temp1, local4
  549. sll local0, 8, local7
  550. xor $4, local0, $4
  551. srl $4, 1, local0
  552. xor $3, local7, $3
  553. srl local4, 4, temp1
  554. xor local0, $3, local0
  555. xor $1, temp2, $1
  556. and local0, ip5, local0
  557. sll local0, 1, local7
  558. xor temp1, $1, temp1
  559. xor $3, local0, $3
  560. xor $4, local7, $4
  561. sll $3, 3, local5
  562. and temp1, ip1, temp1
  563. sll temp1, 4, temp2
  564. xor $1, temp1, $1
  565. ifelse($5,1,{LDPTR KS2, in4})
  566. sll $4, 3, local2
  567. xor local4, temp2, $2
  568. ! reload since used as temporary:
  569. ld [out2+280], out4 ! loop counter
  570. srl $3, 29, local0
  571. ifelse($5,1,{add in4, 120, in4})
  572. ifelse($5,1,{LDPTR KS1, in3})
  573. srl $4, 29, local7
  574. or local0, local5, $4
  575. or local2, local7, $3
  576. })
  577. ! {load_little_endian}
  578. !
  579. ! parameter 1 address
  580. ! parameter 2 destination left
  581. ! parameter 3 destination right
  582. ! parameter 4 temporary
  583. ! parameter 5 label
  584. define(load_little_endian, {
  585. ! {load_little_endian}
  586. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  587. ! first in memory to rightmost in register
  588. $5:
  589. ldub [$1+3], $2
  590. ldub [$1+2], $4
  591. sll $2, 8, $2
  592. or $2, $4, $2
  593. ldub [$1+1], $4
  594. sll $2, 8, $2
  595. or $2, $4, $2
  596. ldub [$1+0], $4
  597. sll $2, 8, $2
  598. or $2, $4, $2
  599. ldub [$1+3+4], $3
  600. ldub [$1+2+4], $4
  601. sll $3, 8, $3
  602. or $3, $4, $3
  603. ldub [$1+1+4], $4
  604. sll $3, 8, $3
  605. or $3, $4, $3
  606. ldub [$1+0+4], $4
  607. sll $3, 8, $3
  608. or $3, $4, $3
  609. $5a:
  610. })
  611. ! {load_little_endian_inc}
  612. !
  613. ! parameter 1 address
  614. ! parameter 2 destination left
  615. ! parameter 3 destination right
  616. ! parameter 4 temporary
  617. ! parameter 4 label
  618. !
  619. ! adds 8 to address
  620. define(load_little_endian_inc, {
  621. ! {load_little_endian_inc}
  622. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  623. ! first in memory to rightmost in register
  624. $5:
  625. ldub [$1+3], $2
  626. ldub [$1+2], $4
  627. sll $2, 8, $2
  628. or $2, $4, $2
  629. ldub [$1+1], $4
  630. sll $2, 8, $2
  631. or $2, $4, $2
  632. ldub [$1+0], $4
  633. sll $2, 8, $2
  634. or $2, $4, $2
  635. ldub [$1+3+4], $3
  636. add $1, 8, $1
  637. ldub [$1+2+4-8], $4
  638. sll $3, 8, $3
  639. or $3, $4, $3
  640. ldub [$1+1+4-8], $4
  641. sll $3, 8, $3
  642. or $3, $4, $3
  643. ldub [$1+0+4-8], $4
  644. sll $3, 8, $3
  645. or $3, $4, $3
  646. $5a:
  647. })
  648. ! {load_n_bytes}
  649. !
  650. ! Loads 1 to 7 bytes little endian
  651. ! Remaining bytes are zeroed.
  652. !
  653. ! parameter 1 address
  654. ! parameter 2 length
  655. ! parameter 3 destination register left
  656. ! parameter 4 destination register right
  657. ! parameter 5 temp
  658. ! parameter 6 temp2
  659. ! parameter 7 label
  660. ! parameter 8 return label
  661. define(load_n_bytes, {
  662. ! {load_n_bytes}
  663. ! $1 $2 $5 $6 $7 $8 $7 $8 $9
  664. $7.0: call .+8
  665. sll $2, 2, $6
  666. add %o7,$7.jmp.table-$7.0,$5
  667. add $5, $6, $5
  668. mov 0, $4
  669. ld [$5], $5
  670. jmp %o7+$5
  671. mov 0, $3
  672. $7.7:
  673. ldub [$1+6], $5
  674. sll $5, 16, $5
  675. or $3, $5, $3
  676. $7.6:
  677. ldub [$1+5], $5
  678. sll $5, 8, $5
  679. or $3, $5, $3
  680. $7.5:
  681. ldub [$1+4], $5
  682. or $3, $5, $3
  683. $7.4:
  684. ldub [$1+3], $5
  685. sll $5, 24, $5
  686. or $4, $5, $4
  687. $7.3:
  688. ldub [$1+2], $5
  689. sll $5, 16, $5
  690. or $4, $5, $4
  691. $7.2:
  692. ldub [$1+1], $5
  693. sll $5, 8, $5
  694. or $4, $5, $4
  695. $7.1:
  696. ldub [$1+0], $5
  697. ba $8
  698. or $4, $5, $4
  699. .align 4
  700. $7.jmp.table:
  701. .word 0
  702. .word $7.1-$7.0
  703. .word $7.2-$7.0
  704. .word $7.3-$7.0
  705. .word $7.4-$7.0
  706. .word $7.5-$7.0
  707. .word $7.6-$7.0
  708. .word $7.7-$7.0
  709. })
  710. ! {store_little_endian}
  711. !
  712. ! parameter 1 address
  713. ! parameter 2 source left
  714. ! parameter 3 source right
  715. ! parameter 4 temporary
  716. define(store_little_endian, {
  717. ! {store_little_endian}
  718. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  719. ! rightmost in register to first in memory
  720. $5:
  721. and $2, 255, $4
  722. stub $4, [$1+0]
  723. srl $2, 8, $4
  724. and $4, 255, $4
  725. stub $4, [$1+1]
  726. srl $2, 16, $4
  727. and $4, 255, $4
  728. stub $4, [$1+2]
  729. srl $2, 24, $4
  730. stub $4, [$1+3]
  731. and $3, 255, $4
  732. stub $4, [$1+0+4]
  733. srl $3, 8, $4
  734. and $4, 255, $4
  735. stub $4, [$1+1+4]
  736. srl $3, 16, $4
  737. and $4, 255, $4
  738. stub $4, [$1+2+4]
  739. srl $3, 24, $4
  740. stub $4, [$1+3+4]
  741. $5a:
  742. })
  743. ! {store_n_bytes}
  744. !
  745. ! Stores 1 to 7 bytes little endian
  746. !
  747. ! parameter 1 address
  748. ! parameter 2 length
  749. ! parameter 3 source register left
  750. ! parameter 4 source register right
  751. ! parameter 5 temp
  752. ! parameter 6 temp2
  753. ! parameter 7 label
  754. ! parameter 8 return label
  755. define(store_n_bytes, {
  756. ! {store_n_bytes}
  757. ! $1 $2 $5 $6 $7 $8 $7 $8 $9
  758. $7.0: call .+8
  759. sll $2, 2, $6
  760. add %o7,$7.jmp.table-$7.0,$5
  761. add $5, $6, $5
  762. ld [$5], $5
  763. jmp %o7+$5
  764. nop
  765. $7.7:
  766. srl $3, 16, $5
  767. and $5, 0xff, $5
  768. stub $5, [$1+6]
  769. $7.6:
  770. srl $3, 8, $5
  771. and $5, 0xff, $5
  772. stub $5, [$1+5]
  773. $7.5:
  774. and $3, 0xff, $5
  775. stub $5, [$1+4]
  776. $7.4:
  777. srl $4, 24, $5
  778. stub $5, [$1+3]
  779. $7.3:
  780. srl $4, 16, $5
  781. and $5, 0xff, $5
  782. stub $5, [$1+2]
  783. $7.2:
  784. srl $4, 8, $5
  785. and $5, 0xff, $5
  786. stub $5, [$1+1]
  787. $7.1:
  788. and $4, 0xff, $5
  789. ba $8
  790. stub $5, [$1]
  791. .align 4
  792. $7.jmp.table:
  793. .word 0
  794. .word $7.1-$7.0
  795. .word $7.2-$7.0
  796. .word $7.3-$7.0
  797. .word $7.4-$7.0
  798. .word $7.5-$7.0
  799. .word $7.6-$7.0
  800. .word $7.7-$7.0
  801. })
  802. define(testvalue,{1})
  803. define(register_init, {
  804. ! For test purposes:
  805. sethi %hi(testvalue), local0
  806. or local0, %lo(testvalue), local0
  807. ifelse($1,{},{}, {mov local0, $1})
  808. ifelse($2,{},{}, {mov local0, $2})
  809. ifelse($3,{},{}, {mov local0, $3})
  810. ifelse($4,{},{}, {mov local0, $4})
  811. ifelse($5,{},{}, {mov local0, $5})
  812. ifelse($6,{},{}, {mov local0, $6})
  813. ifelse($7,{},{}, {mov local0, $7})
  814. ifelse($8,{},{}, {mov local0, $8})
  815. mov local0, local1
  816. mov local0, local2
  817. mov local0, local3
  818. mov local0, local4
  819. mov local0, local5
  820. mov local0, local7
  821. mov local0, local6
  822. mov local0, out0
  823. mov local0, out1
  824. mov local0, out2
  825. mov local0, out3
  826. mov local0, out4
  827. mov local0, out5
  828. mov local0, global1
  829. mov local0, global2
  830. mov local0, global3
  831. mov local0, global4
  832. mov local0, global5
  833. })
  834. .section ".text"
  835. .align 32
  836. .des_enc:
  837. ! key address in3
  838. ! loads key next encryption/decryption first round from [in4]
  839. rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
  840. .align 32
  841. .des_dec:
  842. ! implemented with out5 as first parameter to avoid
  843. ! register exchange in ede modes
  844. ! key address in4
  845. ! loads key next encryption/decryption first round from [in3]
  846. rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
  847. ! void DES_encrypt1(data, ks, enc)
  848. ! *******************************
  849. .align 32
  850. .global DES_encrypt1
  851. .type DES_encrypt1,#function
  852. DES_encrypt1:
  853. save %sp, FRAME, %sp
  854. sethi %hi(.PIC.DES_SPtrans-1f),global1
  855. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  856. 1: call .+8
  857. add %o7,global1,global1
  858. sub global1,.PIC.DES_SPtrans-.des_and,out2
  859. ld [in0], in5 ! left
  860. cmp in2, 0 ! enc
  861. be .encrypt.dec
  862. ld [in0+4], out5 ! right
  863. ! parameter 6 1/2 for include encryption/decryption
  864. ! parameter 7 1 for move in1 to in3
  865. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  866. ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
  867. rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
  868. fp_macro(in5, out5, 1) ! 1 for store to [in0]
  869. ret
  870. restore
  871. .encrypt.dec:
  872. add in1, 120, in3 ! use last subkey for first round
  873. ! parameter 6 1/2 for include encryption/decryption
  874. ! parameter 7 1 for move in1 to in3
  875. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  876. ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
  877. fp_macro(out5, in5, 1) ! 1 for store to [in0]
  878. ret
  879. restore
  880. .DES_encrypt1.end:
  881. .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
  882. ! void DES_encrypt2(data, ks, enc)
  883. !*********************************
  884. ! encrypts/decrypts without initial/final permutation
  885. .align 32
  886. .global DES_encrypt2
  887. .type DES_encrypt2,#function
  888. DES_encrypt2:
  889. save %sp, FRAME, %sp
  890. sethi %hi(.PIC.DES_SPtrans-1f),global1
  891. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  892. 1: call .+8
  893. add %o7,global1,global1
  894. sub global1,.PIC.DES_SPtrans-.des_and,out2
  895. ! Set sbox address 1 to 6 and rotate halves 3 left
  896. ! Errors caught by destest? Yes. Still? *NO*
  897. !sethi %hi(DES_SPtrans), global1 ! address sbox 1
  898. !or global1, %lo(DES_SPtrans), global1 ! sbox 1
  899. add global1, 256, global2 ! sbox 2
  900. add global1, 512, global3 ! sbox 3
  901. ld [in0], out5 ! right
  902. add global1, 768, global4 ! sbox 4
  903. add global1, 1024, global5 ! sbox 5
  904. ld [in0+4], in5 ! left
  905. add global1, 1280, local6 ! sbox 6
  906. add global1, 1792, out3 ! sbox 8
  907. ! rotate
  908. sll in5, 3, local5
  909. mov in1, in3 ! key address to in3
  910. sll out5, 3, local7
  911. srl in5, 29, in5
  912. srl out5, 29, out5
  913. add in5, local5, in5
  914. add out5, local7, out5
  915. cmp in2, 0
  916. ! we use our own stackframe
  917. be .encrypt2.dec
  918. STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
  919. ld [in3], out0 ! key 7531 first round
  920. mov LOOPS, out4 ! loop counter
  921. ld [in3+4], out1 ! key 8642 first round
  922. sethi %hi(0x0000FC00), local5
  923. call .des_enc
  924. mov in3, in4
  925. ! rotate
  926. sll in5, 29, in0
  927. srl in5, 3, in5
  928. sll out5, 29, in1
  929. add in5, in0, in5
  930. srl out5, 3, out5
  931. LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
  932. add out5, in1, out5
  933. st in5, [in0]
  934. st out5, [in0+4]
  935. ret
  936. restore
  937. .encrypt2.dec:
  938. add in3, 120, in4
  939. ld [in4], out0 ! key 7531 first round
  940. mov LOOPS, out4 ! loop counter
  941. ld [in4+4], out1 ! key 8642 first round
  942. sethi %hi(0x0000FC00), local5
  943. mov in5, local1 ! left expected in out5
  944. mov out5, in5
  945. call .des_dec
  946. mov local1, out5
  947. .encrypt2.finish:
  948. ! rotate
  949. sll in5, 29, in0
  950. srl in5, 3, in5
  951. sll out5, 29, in1
  952. add in5, in0, in5
  953. srl out5, 3, out5
  954. LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
  955. add out5, in1, out5
  956. st out5, [in0]
  957. st in5, [in0+4]
  958. ret
  959. restore
  960. .DES_encrypt2.end:
  961. .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
  962. ! void DES_encrypt3(data, ks1, ks2, ks3)
  963. ! **************************************
  964. .align 32
  965. .global DES_encrypt3
  966. .type DES_encrypt3,#function
  967. DES_encrypt3:
  968. save %sp, FRAME, %sp
  969. sethi %hi(.PIC.DES_SPtrans-1f),global1
  970. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  971. 1: call .+8
  972. add %o7,global1,global1
  973. sub global1,.PIC.DES_SPtrans-.des_and,out2
  974. ld [in0], in5 ! left
  975. add in2, 120, in4 ! ks2
  976. ld [in0+4], out5 ! right
  977. mov in3, in2 ! save ks3
  978. ! parameter 6 1/2 for include encryption/decryption
  979. ! parameter 7 1 for mov in1 to in3
  980. ! parameter 8 1 for mov in3 to in4
  981. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  982. ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
  983. call .des_dec
  984. mov in2, in3 ! preload ks3
  985. call .des_enc
  986. nop
  987. fp_macro(in5, out5, 1)
  988. ret
  989. restore
  990. .DES_encrypt3.end:
  991. .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
  992. ! void DES_decrypt3(data, ks1, ks2, ks3)
  993. ! **************************************
  994. .align 32
  995. .global DES_decrypt3
  996. .type DES_decrypt3,#function
  997. DES_decrypt3:
  998. save %sp, FRAME, %sp
  999. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1000. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1001. 1: call .+8
  1002. add %o7,global1,global1
  1003. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1004. ld [in0], in5 ! left
  1005. add in3, 120, in4 ! ks3
  1006. ld [in0+4], out5 ! right
  1007. mov in2, in3 ! ks2
  1008. ! parameter 6 1/2 for include encryption/decryption
  1009. ! parameter 7 1 for mov in1 to in3
  1010. ! parameter 8 1 for mov in3 to in4
  1011. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1012. ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
  1013. call .des_enc
  1014. add in1, 120, in4 ! preload ks1
  1015. call .des_dec
  1016. nop
  1017. fp_macro(out5, in5, 1)
  1018. ret
  1019. restore
  1020. .DES_decrypt3.end:
  1021. .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
  1022. ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
  1023. ! *****************************************************************
  1024. .align 32
  1025. .global DES_ncbc_encrypt
  1026. .type DES_ncbc_encrypt,#function
  1027. DES_ncbc_encrypt:
  1028. save %sp, FRAME, %sp
  1029. define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
  1030. define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
  1031. define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
  1032. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1033. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1034. 1: call .+8
  1035. add %o7,global1,global1
  1036. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1037. cmp in5, 0 ! enc
  1038. be .ncbc.dec
  1039. STPTR in4, IVEC
  1040. ! addr left right temp label
  1041. load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
  1042. addcc in2, -8, in2 ! bytes missing when first block done
  1043. bl .ncbc.enc.seven.or.less
  1044. mov in3, in4 ! schedule
  1045. .ncbc.enc.next.block:
  1046. load_little_endian(in0, out4, global4, local3, .LLE2) ! block
  1047. .ncbc.enc.next.block_1:
  1048. xor in5, out4, in5 ! iv xor
  1049. xor out5, global4, out5 ! iv xor
  1050. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  1051. ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
  1052. .ncbc.enc.next.block_2:
  1053. !// call .des_enc ! compares in2 to 8
  1054. ! rounds inlined for alignment purposes
  1055. add global1, 768, global4 ! address sbox 4 since register used below
  1056. rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
  1057. bl .ncbc.enc.next.block_fp
  1058. add in0, 8, in0 ! input address
  1059. ! If 8 or more bytes are to be encrypted after this block,
  1060. ! we combine final permutation for this block with initial
  1061. ! permutation for next block. Load next block:
  1062. load_little_endian(in0, global3, global4, local5, .LLE12)
  1063. ! parameter 1 original left
  1064. ! parameter 2 original right
  1065. ! parameter 3 left ip
  1066. ! parameter 4 right ip
  1067. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  1068. ! 2: mov in4 to in3
  1069. !
  1070. ! also adds -8 to length in2 and loads loop counter to out4
  1071. fp_ip_macro(out0, out1, global3, global4, 2)
  1072. store_little_endian(in1, out0, out1, local3, .SLE10) ! block
  1073. ld [in3], out0 ! key 7531 first round next block
  1074. mov in5, local1
  1075. xor global3, out5, in5 ! iv xor next block
  1076. ld [in3+4], out1 ! key 8642
  1077. add global1, 512, global3 ! address sbox 3 since register used
  1078. xor global4, local1, out5 ! iv xor next block
  1079. ba .ncbc.enc.next.block_2
  1080. add in1, 8, in1 ! output address
  1081. .ncbc.enc.next.block_fp:
  1082. fp_macro(in5, out5)
  1083. store_little_endian(in1, in5, out5, local3, .SLE1) ! block
  1084. addcc in2, -8, in2 ! bytes missing when next block done
  1085. bpos .ncbc.enc.next.block
  1086. add in1, 8, in1
  1087. .ncbc.enc.seven.or.less:
  1088. cmp in2, -8
  1089. ble .ncbc.enc.finish
  1090. nop
  1091. add in2, 8, local1 ! bytes to load
  1092. ! addr, length, dest left, dest right, temp, temp2, label, ret label
  1093. load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
  1094. ! Loads 1 to 7 bytes little endian to global4, out4
  1095. .ncbc.enc.finish:
  1096. LDPTR IVEC, local4
  1097. store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
  1098. ret
  1099. restore
  1100. .ncbc.dec:
  1101. STPTR in0, INPUT
  1102. cmp in2, 0 ! length
  1103. add in3, 120, in3
  1104. LDPTR IVEC, local7 ! ivec
  1105. ble .ncbc.dec.finish
  1106. mov in3, in4 ! schedule
  1107. STPTR in1, OUTPUT
  1108. mov in0, local5 ! input
  1109. load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
  1110. .ncbc.dec.next.block:
  1111. load_little_endian(local5, in5, out5, local3, .LLE4) ! block
  1112. ! parameter 6 1/2 for include encryption/decryption
  1113. ! parameter 7 1 for mov in1 to in3
  1114. ! parameter 8 1 for mov in3 to in4
  1115. ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryption ks in4
  1116. fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
  1117. ! in2 is bytes left to be stored
  1118. ! in2 is compared to 8 in the rounds
  1119. xor out5, in0, out4 ! iv xor
  1120. bl .ncbc.dec.seven.or.less
  1121. xor in5, in1, global4 ! iv xor
  1122. ! Load ivec next block now, since input and output address might be the same.
  1123. load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
  1124. store_little_endian(local7, out4, global4, local3, .SLE3)
  1125. STPTR local5, INPUT
  1126. add local7, 8, local7
  1127. addcc in2, -8, in2
  1128. bg .ncbc.dec.next.block
  1129. STPTR local7, OUTPUT
  1130. .ncbc.dec.store.iv:
  1131. LDPTR IVEC, local4 ! ivec
  1132. store_little_endian(local4, in0, in1, local5, .SLE4)
  1133. .ncbc.dec.finish:
  1134. ret
  1135. restore
  1136. .ncbc.dec.seven.or.less:
  1137. load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
  1138. store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
  1139. .DES_ncbc_encrypt.end:
  1140. .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
  1141. ! void DES_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc)
  1142. ! **************************************************************************
  1143. .align 32
  1144. .global DES_ede3_cbc_encrypt
  1145. .type DES_ede3_cbc_encrypt,#function
  1146. DES_ede3_cbc_encrypt:
  1147. save %sp, FRAME, %sp
  1148. define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
  1149. define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
  1150. define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
  1151. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1152. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1153. 1: call .+8
  1154. add %o7,global1,global1
  1155. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1156. LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
  1157. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1158. cmp local3, 0 ! enc
  1159. be .ede3.dec
  1160. STPTR in4, KS2
  1161. STPTR in5, KS3
  1162. load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
  1163. addcc in2, -8, in2 ! bytes missing after next block
  1164. bl .ede3.enc.seven.or.less
  1165. STPTR in3, KS1
  1166. .ede3.enc.next.block:
  1167. load_little_endian(in0, out4, global4, local3, .LLE7)
  1168. .ede3.enc.next.block_1:
  1169. LDPTR KS2, in4
  1170. xor in5, out4, in5 ! iv xor
  1171. xor out5, global4, out5 ! iv xor
  1172. LDPTR KS1, in3
  1173. add in4, 120, in4 ! for decryption we use last subkey first
  1174. nop
  1175. ip_macro(in5, out5, in5, out5, in3)
  1176. .ede3.enc.next.block_2:
  1177. call .des_enc ! ks1 in3
  1178. nop
  1179. call .des_dec ! ks2 in4
  1180. LDPTR KS3, in3
  1181. call .des_enc ! ks3 in3 compares in2 to 8
  1182. nop
  1183. bl .ede3.enc.next.block_fp
  1184. add in0, 8, in0
  1185. ! If 8 or more bytes are to be encrypted after this block,
  1186. ! we combine final permutation for this block with initial
  1187. ! permutation for next block. Load next block:
  1188. load_little_endian(in0, global3, global4, local5, .LLE11)
  1189. ! parameter 1 original left
  1190. ! parameter 2 original right
  1191. ! parameter 3 left ip
  1192. ! parameter 4 right ip
  1193. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  1194. ! 2: mov in4 to in3
  1195. !
  1196. ! also adds -8 to length in2 and loads loop counter to out4
  1197. fp_ip_macro(out0, out1, global3, global4, 1)
  1198. store_little_endian(in1, out0, out1, local3, .SLE9) ! block
  1199. mov in5, local1
  1200. xor global3, out5, in5 ! iv xor next block
  1201. ld [in3], out0 ! key 7531
  1202. add global1, 512, global3 ! address sbox 3
  1203. xor global4, local1, out5 ! iv xor next block
  1204. ld [in3+4], out1 ! key 8642
  1205. add global1, 768, global4 ! address sbox 4
  1206. ba .ede3.enc.next.block_2
  1207. add in1, 8, in1
  1208. .ede3.enc.next.block_fp:
  1209. fp_macro(in5, out5)
  1210. store_little_endian(in1, in5, out5, local3, .SLE5) ! block
  1211. addcc in2, -8, in2 ! bytes missing when next block done
  1212. bpos .ede3.enc.next.block
  1213. add in1, 8, in1
  1214. .ede3.enc.seven.or.less:
  1215. cmp in2, -8
  1216. ble .ede3.enc.finish
  1217. nop
  1218. add in2, 8, local1 ! bytes to load
  1219. ! addr, length, dest left, dest right, temp, temp2, label, ret label
  1220. load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
  1221. .ede3.enc.finish:
  1222. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1223. store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
  1224. ret
  1225. restore
  1226. .ede3.dec:
  1227. STPTR in0, INPUT
  1228. add in5, 120, in5
  1229. STPTR in1, OUTPUT
  1230. mov in0, local5
  1231. add in3, 120, in3
  1232. STPTR in3, KS1
  1233. cmp in2, 0
  1234. ble .ede3.dec.finish
  1235. STPTR in5, KS3
  1236. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
  1237. load_little_endian(local7, in0, in1, local3, .LLE8)
  1238. .ede3.dec.next.block:
  1239. load_little_endian(local5, in5, out5, local3, .LLE9)
  1240. ! parameter 6 1/2 for include encryption/decryption
  1241. ! parameter 7 1 for mov in1 to in3
  1242. ! parameter 8 1 for mov in3 to in4
  1243. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1244. ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
  1245. call .des_enc ! ks2 in3
  1246. LDPTR KS1, in4
  1247. call .des_dec ! ks1 in4
  1248. nop
  1249. fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
  1250. ! in2 is bytes left to be stored
  1251. ! in2 is compared to 8 in the rounds
  1252. xor out5, in0, out4
  1253. bl .ede3.dec.seven.or.less
  1254. xor in5, in1, global4
  1255. load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
  1256. store_little_endian(local7, out4, global4, local3, .SLE7) ! block
  1257. STPTR local5, INPUT
  1258. addcc in2, -8, in2
  1259. add local7, 8, local7
  1260. bg .ede3.dec.next.block
  1261. STPTR local7, OUTPUT
  1262. .ede3.dec.store.iv:
  1263. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1264. store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
  1265. .ede3.dec.finish:
  1266. ret
  1267. restore
  1268. .ede3.dec.seven.or.less:
  1269. load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
  1270. store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
  1271. .DES_ede3_cbc_encrypt.end:
  1272. .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
  1273. .align 256
  1274. .type .des_and,#object
  1275. .size .des_and,284
  1276. .des_and:
  1277. ! This table is used for AND 0xFC when it is known that register
  1278. ! bits 8-31 are zero. Makes it possible to do three arithmetic
  1279. ! operations in one cycle.
  1280. .byte 0, 0, 0, 0, 4, 4, 4, 4
  1281. .byte 8, 8, 8, 8, 12, 12, 12, 12
  1282. .byte 16, 16, 16, 16, 20, 20, 20, 20
  1283. .byte 24, 24, 24, 24, 28, 28, 28, 28
  1284. .byte 32, 32, 32, 32, 36, 36, 36, 36
  1285. .byte 40, 40, 40, 40, 44, 44, 44, 44
  1286. .byte 48, 48, 48, 48, 52, 52, 52, 52
  1287. .byte 56, 56, 56, 56, 60, 60, 60, 60
  1288. .byte 64, 64, 64, 64, 68, 68, 68, 68
  1289. .byte 72, 72, 72, 72, 76, 76, 76, 76
  1290. .byte 80, 80, 80, 80, 84, 84, 84, 84
  1291. .byte 88, 88, 88, 88, 92, 92, 92, 92
  1292. .byte 96, 96, 96, 96, 100, 100, 100, 100
  1293. .byte 104, 104, 104, 104, 108, 108, 108, 108
  1294. .byte 112, 112, 112, 112, 116, 116, 116, 116
  1295. .byte 120, 120, 120, 120, 124, 124, 124, 124
  1296. .byte 128, 128, 128, 128, 132, 132, 132, 132
  1297. .byte 136, 136, 136, 136, 140, 140, 140, 140
  1298. .byte 144, 144, 144, 144, 148, 148, 148, 148
  1299. .byte 152, 152, 152, 152, 156, 156, 156, 156
  1300. .byte 160, 160, 160, 160, 164, 164, 164, 164
  1301. .byte 168, 168, 168, 168, 172, 172, 172, 172
  1302. .byte 176, 176, 176, 176, 180, 180, 180, 180
  1303. .byte 184, 184, 184, 184, 188, 188, 188, 188
  1304. .byte 192, 192, 192, 192, 196, 196, 196, 196
  1305. .byte 200, 200, 200, 200, 204, 204, 204, 204
  1306. .byte 208, 208, 208, 208, 212, 212, 212, 212
  1307. .byte 216, 216, 216, 216, 220, 220, 220, 220
  1308. .byte 224, 224, 224, 224, 228, 228, 228, 228
  1309. .byte 232, 232, 232, 232, 236, 236, 236, 236
  1310. .byte 240, 240, 240, 240, 244, 244, 244, 244
  1311. .byte 248, 248, 248, 248, 252, 252, 252, 252
  1312. ! 5 numbers for initial/final permutation
  1313. .word 0x0f0f0f0f ! offset 256
  1314. .word 0x0000ffff ! 260
  1315. .word 0x33333333 ! 264
  1316. .word 0x00ff00ff ! 268
  1317. .word 0x55555555 ! 272
  1318. .word 0 ! 276
  1319. .word LOOPS ! 280
  1320. .word 0x0000FC00 ! 284
  1321. .global DES_SPtrans
  1322. .type DES_SPtrans,#object
  1323. .size DES_SPtrans,2048
  1324. .align 64
  1325. DES_SPtrans:
  1326. .PIC.DES_SPtrans:
  1327. ! nibble 0
  1328. .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
  1329. .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
  1330. .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
  1331. .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
  1332. .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
  1333. .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
  1334. .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
  1335. .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
  1336. .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
  1337. .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
  1338. .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
  1339. .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
  1340. .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
  1341. .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
  1342. .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
  1343. .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
  1344. ! nibble 1
  1345. .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
  1346. .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
  1347. .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
  1348. .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
  1349. .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
  1350. .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
  1351. .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
  1352. .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
  1353. .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
  1354. .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
  1355. .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
  1356. .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
  1357. .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
  1358. .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
  1359. .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
  1360. .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
  1361. ! nibble 2
  1362. .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
  1363. .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
  1364. .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
  1365. .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
  1366. .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
  1367. .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
  1368. .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
  1369. .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
  1370. .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
  1371. .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
  1372. .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
  1373. .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
  1374. .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
  1375. .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
  1376. .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
  1377. .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
  1378. ! nibble 3
  1379. .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
  1380. .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
  1381. .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
  1382. .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
  1383. .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
  1384. .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
  1385. .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
  1386. .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
  1387. .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
  1388. .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
  1389. .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
  1390. .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
  1391. .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
  1392. .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
  1393. .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
  1394. .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
  1395. ! nibble 4
  1396. .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
  1397. .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
  1398. .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
  1399. .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
  1400. .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
  1401. .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
  1402. .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
  1403. .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
  1404. .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
  1405. .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
  1406. .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
  1407. .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
  1408. .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
  1409. .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
  1410. .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
  1411. .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
  1412. ! nibble 5
  1413. .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
  1414. .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
  1415. .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
  1416. .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
  1417. .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
  1418. .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
  1419. .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
  1420. .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
  1421. .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
  1422. .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
  1423. .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
  1424. .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
  1425. .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
  1426. .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
  1427. .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
  1428. .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
  1429. ! nibble 6
  1430. .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
  1431. .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
  1432. .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
  1433. .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
  1434. .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
  1435. .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
  1436. .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
  1437. .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
  1438. .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
  1439. .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
  1440. .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
  1441. .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
  1442. .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
  1443. .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
  1444. .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
  1445. .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
  1446. ! nibble 7
  1447. .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
  1448. .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
  1449. .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
  1450. .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
  1451. .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
  1452. .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
  1453. .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
  1454. .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
  1455. .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
  1456. .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
  1457. .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
  1458. .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
  1459. .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
  1460. .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
  1461. .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
  1462. .word 0x20000000, 0x20800080, 0x00020000, 0x00820080