📄 vms.mar
字号:
.title vax_bn_mul_add_words unsigned multiply & add, 32*32+32+32=>64;; w.j.m. 15-jan-1999;; it's magic ...;; ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) {; ULONG c = 0;; int i;; for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ;; return c;; }r=4 ;(AP)a=8 ;(AP)n=12 ;(AP) n by value (input)w=16 ;(AP) w by value (input) .psect code,nowrt.entry bn_mul_add_words,^m<r2,r3,r4,r5,r6> moval @r(ap),r2 moval @a(ap),r3 movl n(ap),r4 ; assumed >0 by C code movl w(ap),r5 clrl r6 ; c0$: emul r5,(r3),(r2),r0 ; w, a[], r[] considered signed ; fixup for "negative" r[] tstl (r2) bgeq 10$ incl r110$: ; add in c addl2 r6,r0 adwc #0,r1 ; combined fixup for "negative" w, a[] tstl r5 bgeq 20$ addl2 (r3),r120$: tstl (r3) bgeq 30$ addl2 r5,r130$: movl r0,(r2)+ ; store lo result in r[] & advance addl #4,r3 ; advance a[] movl r1,r6 ; store hi result => c sobgtr r4,0$ movl r6,r0 ; return c ret .title vax_bn_mul_words unsigned multiply & add, 32*32+32=>64;; w.j.m. 15-jan-1999;; it's magic ...;; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) {; ULONG c = 0;; int i;; for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ;; return(c);; }r=4 ;(AP)a=8 ;(AP)n=12 ;(AP) n by value (input)w=16 ;(AP) w by value (input) .psect code,nowrt.entry bn_mul_words,^m<r2,r3,r4,r5,r6> moval @r(ap),r2 ; r2 -> r[] moval @a(ap),r3 ; r3 -> a[] movl n(ap),r4 ; r4 = loop count (assumed >0 by C code) movl w(ap),r5 ; r5 = w clrl r6 ; r6 = c0$: ; <r1,r0> := w * a[] + c emul r5,(r3),r6,r0 ; w, a[], c considered signed ; fixup for "negative" c tstl r6 ; c bgeq 10$ incl r110$: ; combined fixup for "negative" w, a[] tstl r5 ; w bgeq 20$ addl2 (r3),r1 ; a[]20$: tstl (r3) ; a[] bgeq 30$ addl2 r5,r1 ; w30$: movl r0,(r2)+ ; store lo result in r[] & advance addl #4,r3 ; advance a[] movl r1,r6 ; store hi result => c sobgtr r4,0$ movl r6,r0 ; return c ret .title vax_bn_sqr_words unsigned square, 32*32=>64;; w.j.m. 15-jan-1999;; it's magic ...;; void bn_sqr_words(ULONG r[],ULONG a[],int n) {; int i;; for(i = 0; i < n; i++) <r[2*i+1],r[2*i]> := a[i] * a[i] ;; }r=4 ;(AP)a=8 ;(AP)n=12 ;(AP) n by value (input) .psect code,nowrt.entry bn_sqr_words,^m<r2,r3,r4,r5> moval @r(ap),r2 ; r2 -> r[] moval @a(ap),r3 ; r3 -> a[] movl n(ap),r4 ; r4 = n (assumed >0 by C code)0$: movl (r3)+,r5 ; r5 = a[] & advance ; <r1,r0> := a[] * a[] emul r5,r5,#0,r0 ; a[] considered signed ; fixup for "negative" a[] tstl r5 ; a[] bgeq 30$ addl2 r5,r1 ; a[] addl2 r5,r1 ; a[]30$: movl r0,(r2)+ ; store lo result in r[] & advance movl r1,(r2)+ ; store hi result in r[] & advance sobgtr r4,0$ movl #1,r0 ; return SS$_NORMAL ret .title vax_bn_div_words unsigned divide;; Richard Levitte 20-Nov-2000;; ULONG bn_div_words(ULONG h, ULONG l, ULONG d); {; return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);; };; Using EDIV would be very easy, if it didn't do signed calculations.; Any time any of the input numbers are signed, there are problems,; usually with integer overflow, at which point it returns useless; data (the quotient gets the value of l, and the remainder becomes 0).;; If it was just for the dividend, it would be very easy, just divide; it by 2 (unsigned), do the division, multiply the resulting quotient; and remainder by 2, add the bit that was dropped when dividing by 2; to the remainder, and do some adjustment so the remainder doesn't; end up larger than the divisor. For some cases when the divisor is; negative (from EDIV's point of view, i.e. when the highest bit is set),; dividing the dividend by 2 isn't enough, and since some operations; might generate integer overflows even when the dividend is divided by; 4 (when the high part of the shifted down dividend ends up being exactly; half of the divisor, the result is the quotient 0x80000000, which is; negative...) it needs to be divided by 8. Furthermore, the divisor needs; to be divided by 2 (unsigned) as well, to avoid more problems with the sign.; In this case, a little extra fiddling with the remainder is required.;; So, the simplest way to handle this is always to divide the dividend; by 8, and to divide the divisor by 2 if it's highest bit is set.; After EDIV has been used, the quotient gets multiplied by 8 if the; original divisor was positive, otherwise 4. The remainder, oddly; enough, is *always* multiplied by 8.; NOTE: in the case mentioned above, where the high part of the shifted; down dividend ends up being exactly half the shifted down divisor, we; end up with a 33 bit quotient. That's no problem however, it usually; means we have ended up with a too large remainder as well, and the; problem is fixed by the last part of the algorithm (next paragraph).;; The routine ends with comparing the resulting remainder with the; original divisor and if the remainder is larger, subtract the; original divisor from it, and increase the quotient by 1. This is; done until the remainder is smaller than the divisor.;; The complete algorithm looks like this:;; d' = d; l' = l & 7; [h,l] = [h,l] >> 3; [q,r] = floor([h,l] / d) # This is the EDIV operation; if (q < 0) q = -q # I doubt this is necessary any more;; r' = r >> 29; if (d' >= 0); q' = q >> 29; q = q << 3; else; q' = q >> 30; q = q << 2; r = (r << 3) + l';; if (d' < 0); {; [r',r] = [r',r] - q; while ([r',r] < 0); {; [r',r] = [r',r] + d; [q',q] = [q',q] - 1; }; };; while ([r',r] >= d'); {; [r',r] = [r',r] - d'; [q',q] = [q',q] + 1; };; return qh=4 ;(AP) h by value (input)l=8 ;(AP) l by value (input)d=12 ;(AP) d by value (input);r2 = l, q;r3 = h, r;r4 = d;r5 = l';r6 = r';r7 = d';r8 = q' .psect code,nowrt.entry bn_div_words,^m<r2,r3,r4,r5,r6,r7,r8> movl l(ap),r2 movl h(ap),r3 movl d(ap),r4 bicl3 #^XFFFFFFF8,r2,r5 ; l' = l & 7 bicl3 #^X00000007,r2,r2 bicl3 #^XFFFFFFF8,r3,r6 bicl3 #^X00000007,r3,r3 addl r6,r2 rotl #-3,r2,r2 ; l = l >> 3 rotl #-3,r3,r3 ; h = h >> 3 movl r4,r7 ; d' = d movl #0,r6 ; r' = 0 movl #0,r8 ; q' = 0 tstl r4 beql 666$ ; Uh-oh, the divisor is 0... bgtr 1$ rotl #-1,r4,r4 ; If d is negative, shift it right. bicl2 #^X80000000,r4 ; Since d is then a large number, the ; lowest bit is insignificant ; (contradict that, and I'll fix the problem!)1$: ediv r4,r2,r2,r3 ; Do the actual division tstl r2 bgeq 3$ mnegl r2,r2 ; if q < 0, negate it3$: tstl r7 blss 4$ rotl #3,r2,r2 ; q = q << 3 bicl3 #^XFFFFFFF8,r2,r8 ; q' gets the high bits from q bicl3 #^X00000007,r2,r2 bsb 41$4$: ; else rotl #2,r2,r2 ; q = q << 2 bicl3 #^XFFFFFFFC,r2,r8 ; q' gets the high bits from q bicl3 #^X00000003,r2,r241$: rotl #3,r3,r3 ; r = r << 3 bicl3 #^XFFFFFFF8,r3,r6 ; r' gets the high bits from r bicl3 #^X00000007,r3,r3 addl r5,r3 ; r = r + l' tstl r7 bgeq 5$ bitl #1,r7 beql 5$ ; if d' < 0 && d' & 1 subl r2,r3 ; [r',r] = [r',r] - [q',q] sbwc r8,r645$: bgeq 5$ ; while r < 0 decl r2 ; [q',q] = [q',q] - 1 sbwc #0,r8 addl r7,r3 ; [r',r] = [r',r] + d' adwc #0,r6 brb 45$; The return points are placed in the middle to keep a short distance from; all the branch points42$:; movl r3,r1 movl r2,r0 ret666$: movl #^XFFFFFFFF,r0 ret5$: tstl r6 bneq 6$ cmpl r3,r7 blssu 42$ ; while [r',r] >= d'6$: subl r7,r3 ; [r',r] = [r',r] - d' sbwc #0,r6 incl r2 ; [q',q] = [q',q] + 1 adwc #0,r8 brb 5$ .title vax_bn_add_words unsigned add of two arrays;; Richard Levitte 20-Nov-2000;; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {; ULONG c = 0;; int i;; for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;; return(c);; }r=4 ;(AP) r by reference (output)a=8 ;(AP) a by reference (input)b=12 ;(AP) b by reference (input)n=16 ;(AP) n by value (input) .psect code,nowrt.entry bn_add_words,^m<r2,r3,r4,r5,r6> moval @r(ap),r2 moval @a(ap),r3 moval @b(ap),r4 movl n(ap),r5 ; assumed >0 by C code clrl r0 ; c tstl r5 ; carry = 0 bleq 666$0$: movl (r3)+,r6 ; carry untouched adwc (r4)+,r6 ; carry used and touched movl r6,(r2)+ ; carry untouched sobgtr r5,0$ ; carry untouched adwc #0,r0666$: ret .title vax_bn_sub_words unsigned add of two arrays;; Richard Levitte 20-Nov-2000;; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {; ULONG c = 0;; int i;; for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;; return(c);; }r=4 ;(AP) r by reference (output)a=8 ;(AP) a by reference (input)b=12 ;(AP) b by reference (input)n=16 ;(AP) n by value (input) .psect code,nowrt.entry bn_sub_words,^m<r2,r3,r4,r5,r6> moval @r(ap),r2 moval @a(ap),r3 moval @b(ap),r4 movl n(ap),r5 ; assumed >0 by C code clrl r0 ; c tstl r5 ; carry = 0 bleq 666$0$: movl (r3)+,r6 ; carry untouched sbwc (r4)+,r6 ; carry used and touched movl r6,(r2)+ ; carry untouched sobgtr r5,0$ ; carry untouched adwc #0,r0666$: ret;r=4 ;(AP);a=8 ;(AP);b=12 ;(AP);n=16 ;(AP) n by value (input) .psect code,nowrt.entry BN_MUL_COMBA8,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11> movab -924(sp),sp clrq r8 clrl r10 movl 8(ap),r6 movzwl 2(r6),r3 movl 12(ap),r7 bicl3 #-65536,(r7),r2 movzwl 2(r7),r0 bicl2 #-65536,r0 bicl3 #-65536,(r6),-12(fp) bicl3 #-65536,r3,-16(fp) mull3 r0,-12(fp),-4(fp) mull2 r2,-12(fp) mull3 r2,-16(fp),-8(fp) mull2 r0,-16(fp) addl3 -4(fp),-8(fp),r0 bicl3 #0,r0,-4(fp) cmpl -4(fp),-8(fp) bgequ noname.45 addl2 #65536,-16(fp)noname.45: movzwl -2(fp),r0 bicl2 #-65536,r0 addl2 r0,-16(fp) bicl3 #-65536,-4(fp),r0 ashl #16,r0,-8(fp) addl3 -8(fp),-12(fp),r0 bicl3 #0,r0,-12(fp) cmpl -12(fp),-8(fp) bgequ noname.46 incl -16(fp)noname.46: movl -12(fp),r1 movl -16(fp),r2 addl2 r1,r9 bicl2 #0,r9 cmpl r9,r1 bgequ noname.47 incl r2noname.47: addl2 r2,r8 bicl2 #0,r8 cmpl r8,r2 bgequ noname.48 incl r10noname.48: movl 4(ap),r11 movl r9,(r11) clrl r9 movzwl 2(r6),r2 bicl3 #-65536,4(r7),r3 movzwl 6(r7),r0 bicl2 #-65536,r0 bicl3 #-65536,(r6),-28(fp) bicl3 #-65536,r2,-32(fp) mull3 r0,-28(fp),-20(fp) mull2 r3,-28(fp) mull3 r3,-32(fp),-24(fp) mull2 r0,-32(fp) addl3 -20(fp),-24(fp),r0 bicl3 #0,r0,-20(fp) cmpl -20(fp),-24(fp) bgequ noname.49 addl2 #65536,-32(fp)noname.49: movzwl -18(fp),r0 bicl2 #-65536,r0 addl2 r0,-32(fp) bicl3 #-65536,-20(fp),r0 ashl #16,r0,-24(fp) addl3 -24(fp),-28(fp),r0 bicl3 #0,r0,-28(fp) cmpl -28(fp),-24(fp) bgequ noname.50 incl -32(fp)noname.50: movl -28(fp),r1 movl -32(fp),r2 addl2 r1,r8 bicl2 #0,r8 cmpl r8,r1 bgequ noname.51 incl r2noname.51: addl2 r2,r10 bicl2 #0,r10 cmpl r10,r2 bgequ noname.52 incl r9noname.52: movzwl 6(r6),r2 bicl3 #-65536,(r7),r3 movzwl 2(r7),r0 bicl2 #-65536,r0 bicl3 #-65536,4(r6),-44(fp) bicl3 #-65536,r2,-48(fp) mull3 r0,-44(fp),-36(fp) mull2 r3,-44(fp) mull3 r3,-48(fp),-40(fp) mull2 r0,-48(fp) addl3 -36(fp),-40(fp),r0 bicl3 #0,r0,-36(fp) cmpl -36(fp),-40(fp) bgequ noname.53 addl2 #65536,-48(fp)noname.53: movzwl -34(fp),r0 bicl2 #-65536,r0 addl2 r0,-48(fp) bicl3 #-65536,-36(fp),r0 ashl #16,r0,-40(fp) addl3 -40(fp),-44(fp),r0 bicl3 #0,r0,-44(fp) cmpl -44(fp),-40(fp) bgequ noname.54 incl -48(fp)noname.54: movl -44(fp),r1 movl -48(fp),r2 addl2 r1,r8 bicl2 #0,r8 cmpl r8,r1 bgequ noname.55 incl r2noname.55: addl2 r2,r10 bicl2 #0,r10 cmpl r10,r2 bgequ noname.56 incl r9noname.56: movl r8,4(r11) clrl r8 movzwl 10(r6),r2 bicl3 #-65536,(r7),r3 movzwl 2(r7),r0 bicl2 #-65536,r0 bicl3 #-65536,8(r6),-60(fp) bicl3 #-65536,r2,-64(fp) mull3 r0,-60(fp),-52(fp) mull2 r3,-60(fp) mull3 r3,-64(fp),-56(fp) mull2 r0,-64(fp) addl3 -52(fp),-56(fp),r0 bicl3 #0,r0,-52(fp) cmpl -52(fp),-56(fp) bgequ noname.57 addl2 #65536,-64(fp)noname.57: movzwl -50(fp),r0 bicl2 #-65536,r0 addl2 r0,-64(fp) bicl3 #-65536,-52(fp),r0 ashl #16,r0,-56(fp) addl3 -56(fp),-60(fp),r0 bicl3 #0,r0,-60(fp) cmpl -60(fp),-56(fp) bgequ noname.58 incl -64(fp)noname.58: movl -60(fp),r1 movl -64(fp),r2 addl2 r1,r10 bicl2 #0,r10 cmpl r10,r1 bgequ noname.59 incl r2noname.59: addl2 r2,r9 bicl2 #0,r9 cmpl r9,r2 bgequ noname.60 incl r8noname.60: movzwl 6(r6),r2 bicl3 #-65536,4(r7),r3 movzwl 6(r7),r0 bicl2 #-65536,r0 bicl3 #-65536,4(r6),-76(fp) bicl3 #-65536,r2,-80(fp) mull3 r0,-76(fp),-68(fp) mull2 r3,-76(fp) mull3 r3,-80(fp),-72(fp) mull2 r0,-80(fp) addl3 -68(fp),-72(fp),r0 bicl3 #0,r0,-68(fp) cmpl -68(fp),-72(fp) bgequ noname.61 addl2 #65536,-80(fp)noname.61: movzwl -66(fp),r0 bicl2 #-65536,r0 addl2 r0,-80(fp) bicl3 #-65536,-68(fp),r0 ashl #16,r0,-72(fp) addl3 -72(fp),-76(fp),r0 bicl3 #0,r0,-76(fp) cmpl -76(fp),-72(fp) bgequ noname.62 incl -80(fp)noname.62: movl -76(fp),r1 movl -80(fp),r2 addl2 r1,r10 bicl2 #0,r10 cmpl r10,r1 bgequ noname.63 incl r2noname.63: addl2 r2,r9 bicl2 #0,r9 cmpl r9,r2 bgequ noname.64 incl r8noname.64: movzwl 2(r6),r2 bicl3 #-65536,8(r7),r3 movzwl 10(r7),r0 bicl2 #-65536,r0 bicl3 #-65536,(r6),-92(fp) bicl3 #-65536,r2,-96(fp) mull3 r0,-92(fp),-84(fp) mull2 r3,-92(fp) mull3 r3,-96(fp),-88(fp) mull2 r0,-96(fp) addl3 -84(fp),-88(fp),r0 bicl3 #0,r0,-84(fp) cmpl -84(fp),-88(fp) bgequ noname.65 addl2 #65536,-96(fp)noname.65: movzwl -82(fp),r0 bicl2 #-65536,r0 addl2 r0,-96(fp) bicl3 #-65536,-84(fp),r0 ashl #16,r0,-88(fp) addl3 -88(fp),-92(fp),r0 bicl3 #0,r0,-92(fp) cmpl -92(fp),-88(fp) bgequ noname.66 incl -96(fp)noname.66: movl -92(fp),r1 movl -96(fp),r2 addl2 r1,r10 bicl2 #0,r10 cmpl r10,r1 bgequ noname.67 incl r2noname.67: addl2 r2,r9 bicl2 #0,r9 cmpl r9,r2 bgequ noname.68 incl r8noname.68: movl r10,8(r11) clrl r10 movzwl 2(r6),r2 bicl3 #-65536,12(r7),r3 movzwl 14(r7),r0 bicl2 #-65536,r0 bicl3 #-65536,(r6),-108(fp) bicl3 #-65536,r2,-112(fp) mull3 r0,-108(fp),-100(fp) mull2 r3,-108(fp) mull3 r3,-112(fp),-104(fp) mull2 r0,-112(fp) addl3 -100(fp),-104(fp),r0 bicl3 #0,r0,-100(fp) cmpl -100(fp),-104(fp) bgequ noname.69 addl2 #65536,-112(fp)noname.69: movzwl -98(fp),r0 bicl2 #-65536,r0 addl2 r0,-112(fp) bicl3 #-65536,-100(fp),r0 ashl #16,r0,-104(fp) addl3 -104(fp),-108(fp),r0 bicl3 #0,r0,-108(fp) cmpl -108(fp),-104(fp) bgequ noname.70 incl -112(fp)noname.70: movl -108(fp),r1 movl -112(fp),r2 addl2 r1,r9 bicl2 #0,r9 cmpl r9,r1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -