123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309 |
- /*
- * Copyright (c) 1995 Colin Plumb. All rights reserved.
- * For licensing and other legal details, see the file legal.c.
- *
- * lbn68020.c - 32-bit bignum primitives for the 68020+ (0r 683xx) processors.
- *
- * This was written for Metrowerks C, and while it should be reasonably
- * portable, NOTE that Metrowerks lets a callee trash a0, a1, d0, d1, and d2.
- * Some 680x0 compilers make d2 callee-save, so instructions to save it
- * will have to be added.
- *
- * This code supports 16 or 32-bit ints, based on UINT_MAX.
- * Regardless of UINT_MAX, only bignums up to 64K words (2 million bits)
- * are supported. (68k hackers will recognize this as a consequence of
- * using dbra.)
- *
- * These primitives use little-endian word order.
- * (The order of bytes within words is irrelevant to this issue.)
- *
- * TODO: Schedule this for the 68040's pipeline. (When I get a 68040 manual.)
- */
- #include <limits.h>
- #include "lbn.h" /* Should include lbn68020.h */
- /*
- * The Metrowerks C compiler (1.2.2) produces bad 68k code for the
- * following input, which happens to be the inner loop of lbnSub1,
- * so a few less than critical routines have been recoded in assembly
- * to avoid the bug. (Optimizer on or off does not matter.)
- *
- * unsigned
- * decrement(unsigned *num, unsigned len)
- * {
- * do {
- * if ((*num++)-- != 0)
- * return 0;
- * } while (--len);
- * return 1;
- * }
- */
- asm BNWORD32
- lbnSub1_32(BNWORD32 *num, unsigned len, BNWORD32 borrow)
- {
- movea.l 4(sp),a0 /* num */
- #if UINT_MAX == 0xffff
- move.l 10(sp),d0 /* borrow */
- #else
- move.l 12(sp),d0 /* borrow */
- #endif
- sub.l d0,(a0)+
- bcc done
- #if UINT_MAX == 0xffff
- move.w 8(sp),d0 /* len */
- #else
- move.w 10(sp),d0 /* len */
- #endif
- subq.w #2,d0
- bcs done
- loop:
- subq.l #1,(a0)+
- dbcc d0,loop
- done:
- moveq.l #0,d0
- addx.w d0,d0
- rts
- }
- asm BNWORD32
- lbnAdd1_32(BNWORD32 *num, unsigned len, BNWORD32 carry)
- {
- movea.l 4(sp),a0 /* num */
- #if UINT_MAX == 0xffff
- move.l 10(sp),d0 /* carry */
- #else
- move.l 12(sp),d0 /* carry */
- #endif
- add.l d0,(a0)+
- bcc done
- #if UINT_MAX == 0xffff
- move.w 8(sp),d0 /* len */
- #else
- move.w 10(sp),d0 /* len */
- #endif
- subq.w #2,d0
- bcs done
- loop:
- addq.l #1,(a0)+
- dbcc d0,loop
- done:
- moveq.l #0,d0
- addx.w d0,d0
- rts
- }
- asm void
- lbnMulN1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)
- {
- machine 68020
-
- movem.l d3-d5,-(sp) /* 12 bytes of extra data */
- moveq.l #0,d4
- move.l 16(sp),a1 /* out */
- move.l 20(sp),a0 /* in */
- #if UINT_MAX == 0xffff
- move.w 24(sp),d5 /* len */
- move.l 26(sp),d2 /* k */
- #else
- move.w 26(sp),d5 /* len */
- move.l 28(sp),d2 /* k */
- #endif
- move.l (a0)+,d3 /* First multiply */
- mulu.l d2,d1:d3 /* dc.w 0x4c02, 0x3401 */
- move.l d3,(a1)+
- subq.w #1,d5 /* Setup for loop unrolling */
- lsr.w #1,d5
- bcs.s m32_even
- beq.s m32_short
-
- subq.w #1,d5 /* Set up software pipeline properly */
- move.l d1,d0
-
- m32_loop:
- move.l (a0)+,d3
- mulu.l d2,d1:d3 /* dc.w 0x4c02, 0x3401 */
- add.l d0,d3
- addx.l d4,d1
- move.l d3,(a1)+
- m32_even:
- move.l (a0)+,d3
- mulu.l d2,d0:d3 /* dc.w 0x4c02, 0x3400 */
- add.l d1,d3
- addx.l d4,d0
- move.l d3,(a1)+
- dbra d5,m32_loop
-
- move.l d0,(a1)
- movem.l (sp)+,d3-d5
- rts
- m32_short:
- move.l d1,(a1)
- movem.l (sp)+,d3-d5
- rts
- }
- asm BNWORD32
- lbnMulAdd1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)
- {
- machine 68020
- movem.l d3-d5,-(sp) /* 12 bytes of extra data */
- moveq.l #0,d4
- move.l 16(sp),a1 /* out */
- move.l 20(sp),a0 /* in */
- #if UINT_MAX == 0xffff
- move.w 24(sp),d5 /* len */
- move.l 26(sp),d2 /* k */
- #else
- move.w 26(sp),d5 /* len */
- move.l 28(sp),d2 /* k */
- #endif
- move.l (a0)+,d3 /* First multiply */
- mulu.l d2,d1:d3 /* dc.w 0x4c02, 0x3401 */
- add.l d3,(a1)+
- addx.l d4,d1
- subq.w #1,d5 /* Setup for loop unrolling */
- lsr.w #1,d5
- bcs.s ma32_even
- beq.s ma32_short
-
- subq.w #1,d5 /* Set up software pipeline properly */
- move.l d1,d0
-
- ma32_loop:
- move.l (a0)+,d3
- mulu.l d2,d1:d3 /* dc.w 0x4c02, 0x3401 */
- add.l d0,d3
- addx.l d4,d1
- add.l d3,(a1)+
- addx.l d4,d1
- ma32_even:
- move.l (a0)+,d3
- mulu.l d2,d0:d3 /* dc.w 0x4c02, 0x3400 */
- add.l d1,d3
- addx.l d4,d0
- add.l d3,(a1)+
- addx.l d4,d0
- dbra d5,ma32_loop
-
- movem.l (sp)+,d3-d5
- rts
- ma32_short:
- move.l d1,d0
- movem.l (sp)+,d3-d5
- rts
- }
- asm BNWORD32
- lbnMulSub1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)
- {
- machine 68020
- movem.l d3-d5,-(sp) /* 12 bytes of extra data */
- moveq.l #0,d4
- move.l 16(sp),a1 /* out */
- move.l 20(sp),a0 /* in */
- #if UINT_MAX == 0xffff
- move.w 24(sp),d5 /* len */
- move.l 26(sp),d2 /* k */
- #else
- move.w 26(sp),d5 /* len */
- move.l 28(sp),d2 /* k */
- #endif
- move.l (a0)+,d3 /* First multiply */
- mulu.l d2,d1:d3 /* dc.w 0x4c02, 0x3401 */
- sub.l d3,(a1)+
- addx.l d4,d1
- subq.w #1,d5 /* Setup for loop unrolling */
- lsr.w #1,d5
- bcs.s ms32_even
- beq.s ms32_short
-
- subq.w #1,d5 /* Set up software pipeline properly */
- move.l d1,d0
-
- ms32_loop:
- move.l (a0)+,d3
- mulu.l d2,d1:d3 /* dc.w 0x4c02, 0x3401 */
- add.l d0,d3
- addx.l d4,d1
- sub.l d3,(a1)+
- addx.l d4,d1
- ms32_even:
- move.l (a0)+,d3
- mulu.l d2,d0:d3 /* dc.w 0x4c02, 0x3400 */
- add.l d1,d3
- addx.l d4,d0
- sub.l d3,(a1)+
- addx.l d4,d0
- dbra d5,ms32_loop
-
- movem.l (sp)+,d3-d5
- rts
-
- ms32_short:
- move.l d1,d0
- movem.l (sp)+,d3-d5
- rts
- }
- asm BNWORD32
- lbnDiv21_32(BNWORD32 *q, BNWORD32 nh, BNWORD32 nl, BNWORD32 d)
- {
- machine 68020
- move.l 8(sp),d0
- move.l 12(sp),d1
- move.l 4(sp),a0
- divu.l 16(sp),d0:d1 /* dc.w 0x4c6f, 0x1400, 16 */
- move.l d1,(a0)
- rts
- }
- asm unsigned
- lbnModQ_32(BNWORD32 const *n, unsigned len, unsigned d)
- {
- machine 68020
- move.l 4(sp),a0 /* n */
- move.l d3,a1
- #if UINT_MAX == 0xffff
- moveq.l #0,d2
- move.w 8(sp),d1 /* len */
- move.w 10(sp),d2 /* d */
- #else
- move.w 10(sp),d1 /* len */
- move.l 12(sp),d2 /* d */
- #endif
- dc.w 0x41f0, 0x1cfc /* lea -4(a0,d1.L*4),a0 */
- /* First time, divide 32/32 - may be faster than 64/32 */
- move.l (a0),d3
- divul.l d2,d0:d3 /* dc.w 0x4c02, 0x3000 */
- subq.w #2,d1
- bmi mq32_done
- mq32_loop:
- move.l -(a0),d3
- divu.l d2,d0:d3 /* dc.w 0x4c02,0x3400 */
- dbra d1,mq32_loop
-
- mq32_done:
- move.l a1,d3
- rts
- }
- /* 45678901234567890123456789012345678901234567890123456789012345678901234567 */
|