📄 pa-risc2w.s
字号:
COPY %r6,%r24 .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) B,L $$div2U,%r2 EXTRD,U %r6,31,32,%r23 DEPD %r28,31,32,%r29 $D2 STD %r29,-272(%r30) ; q AND %r5,%r19,%r24 ; t & 0xffffffff00000000; EXTRD,U %r24,31,32,%r24 ; ??? FLDD -272(%r30),%fr7 ; q FLDD -280(%r30),%fr8 ; d XMPYU %fr8L,%fr7L,%fr10 FSTD %fr10,-256(%r30) XMPYU %fr8L,%fr7R,%fr22 FSTD %fr22,-264(%r30) XMPYU %fr8R,%fr7L,%fr11 XMPYU %fr8R,%fr7R,%fr23 FSTD %fr11,-232(%r30) FSTD %fr23,-240(%r30) LDD -256(%r30),%r28 DEPD,Z %r28,31,32,%r2 LDD -264(%r30),%r20 ADD,L %r20,%r2,%r31 LDD -232(%r30),%r22 DEPD,Z %r22,31,32,%r22 LDD -240(%r30),%r21 B $00000024 ; enter loop ADD,L %r21,%r22,%r23 $0000002A LDO -1(%r29),%r29 SUB %r23,%r8,%r23 $00000024 SUB %r4,%r31,%r25 AND %r25,%r19,%r26 CMPB,*<>,N %r0,%r26,$00000046 ; (forward) DEPD,Z %r25,31,32,%r20 OR %r20,%r24,%r21 CMPB,*<<,N %r21,%r23,$0000002A ;(backward) SUB %r31,%r6,%r31 ;-------------Break path---------------------$00000046 DEPD,Z %r23,31,32,%r25 ;tl EXTRD,U %r23,31,32,%r26 ;t AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L ADD,L %r31,%r26,%r31 ;th += t; CMPCLR,*>>= %r5,%r24,%r0 ;if (l<tl) LDO 1(%r31),%r31 ; th++; CMPB,*<<=,N %r31,%r4,$00000036 ;if (n < th) (forward) LDO -1(%r29),%r29 ;q--; ADD,L %r4,%r3,%r4 ;h += d;$00000036 ADDIB,=,N -1,%r9,$D1 ;if (--count == 0) break (forward) SUB %r5,%r24,%r28 ; l -= tl; SUB %r4,%r31,%r24 ; h -= th; SHRPD %r24,%r28,32,%r4 ; h = ((h<<32)|(l>>32)); DEPD,Z %r29,31,32,%r10 ; ret = q<<32 b $0000001C DEPD,Z %r28,31,32,%r5 ; l = l << 32 $D1 OR %r10,%r29,%r28 ; ret |= q$D3 LDD -368(%r30),%r2 $D0 LDD -296(%r30),%r10 LDD -304(%r30),%r9 LDD -312(%r30),%r8 LDD -320(%r30),%r7 LDD -328(%r30),%r6 LDD -336(%r30),%r5 LDD -344(%r30),%r4 BVE (%r2) .EXIT LDD,MB -352(%r30),%r3 bn_div_err_case MFIA %r6 ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1 LDO R'bn_div_words-bn_div_err_case(%r1),%r6 ADDIL LT'__iob,%r27,%r1 LDD RT'__iob(%r1),%r26 ADDIL L'C$4-bn_div_words,%r6,%r1 LDO R'C$4-bn_div_words(%r1),%r25 LDO 64(%r26),%r26 .CALL ;in=24,25,26,29;out=28; B,L fprintf,%r2 LDO -48(%r30),%r29 LDD -288(%r30),%r27 .CALL ;in=29; B,L abort,%r2 LDO -48(%r30),%r29 LDD -288(%r30),%r27 B $D0 LDD -368(%r30),%r2 .PROCEND ;in=24,25,26,29;out=28;;----------------------------------------------------------------------------;; Registers to hold 64-bit values to manipulate. The "L" part; of the register corresponds to the upper 32-bits, while the "R"; part corresponds to the lower 32-bits; ; Note, that when using b6 and b7, the code must save these before; using them because they are callee save registers ; ;; Floating point registers to use to save values that; are manipulated. These don't collide with ftemp1-6 and; are all caller save registers;a0 .reg %fr22a0L .reg %fr22La0R .reg %fr22Ra1 .reg %fr23a1L .reg %fr23La1R .reg %fr23Ra2 .reg %fr24a2L .reg %fr24La2R .reg %fr24Ra3 .reg %fr25a3L .reg %fr25La3R .reg %fr25Ra4 .reg %fr26a4L .reg %fr26La4R .reg %fr26Ra5 .reg %fr27a5L .reg %fr27La5R .reg %fr27Ra6 .reg %fr28a6L .reg %fr28La6R .reg %fr28Ra7 .reg %fr29a7L .reg %fr29La7R .reg %fr29Rb0 .reg %fr30b0L .reg %fr30Lb0R .reg %fr30Rb1 .reg %fr31b1L .reg %fr31Lb1R .reg %fr31R;; Temporary floating point variables, these are all caller save; registers;ftemp1 .reg %fr4ftemp2 .reg %fr5ftemp3 .reg %fr6ftemp4 .reg %fr7;; The B set of registers when used.;b2 .reg %fr8b2L .reg %fr8Lb2R .reg %fr8Rb3 .reg %fr9b3L .reg %fr9Lb3R .reg %fr9Rb4 .reg %fr10b4L .reg %fr10Lb4R .reg %fr10Rb5 .reg %fr11b5L .reg %fr11Lb5R .reg %fr11Rb6 .reg %fr12b6L .reg %fr12Lb6R .reg %fr12Rb7 .reg %fr13b7L .reg %fr13Lb7R .reg %fr13Rc1 .reg %r21 ; only regtemp1 .reg %r20 ; only regtemp2 .reg %r19 ; only regtemp3 .reg %r31 ; only regm1 .reg %r28 c2 .reg %r23 high_one .reg %r1ht .reg %r6lt .reg %r5m .reg %r4c3 .reg %r3SQR_ADD_C .macro A0L,A0R,C1,C2,C3 XMPYU A0L,A0R,ftemp1 ; m FSTD ftemp1,-24(%sp) ; store m XMPYU A0R,A0R,ftemp2 ; lt FSTD ftemp2,-16(%sp) ; store lt XMPYU A0L,A0L,ftemp3 ; ht FSTD ftemp3,-8(%sp) ; store ht LDD -24(%sp),m ; load m AND m,high_mask,temp2 ; m & Mask DEPD,Z m,30,31,temp3 ; m << 32+1 LDD -16(%sp),lt ; lt LDD -8(%sp),ht ; ht EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 ADD temp3,lt,lt ; lt = lt+m ADD,L ht,temp1,ht ; ht += temp1 ADD,DC ht,%r0,ht ; ht++ ADD C1,lt,C1 ; c1=c1+lt ADD,DC ht,%r0,ht ; ht++ ADD C2,ht,C2 ; c2=c2+ht ADD,DC C3,%r0,C3 ; c3++.endmSQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht FSTD ftemp1,-16(%sp) ; XMPYU A0R,A1L,ftemp2 ; m = bh*lt FSTD ftemp2,-8(%sp) ; XMPYU A0R,A1R,ftemp3 ; lt = bl*lt FSTD ftemp3,-32(%sp) XMPYU A0L,A1L,ftemp4 ; ht = bh*ht FSTD ftemp4,-24(%sp) ; LDD -8(%sp),m ; r21 = m LDD -16(%sp),m1 ; r19 = m1 ADD,L m,m1,m ; m+m1 DEPD,Z m,31,32,temp3 ; (m+m1<<32) LDD -24(%sp),ht ; r24 = ht CMPCLR,*>>= m,m1,%r0 ; if (m < m1) ADD,L ht,high_one,ht ; ht+=high_one EXTRD,U m,31,32,temp1 ; m >> 32 LDD -32(%sp),lt ; lt ADD,L ht,temp1,ht ; ht+= m>>32 ADD lt,temp3,lt ; lt = lt+m1 ADD,DC ht,%r0,ht ; ht++ ADD ht,ht,ht ; ht=ht+ht; ADD,DC C3,%r0,C3 ; add in carry (c3++) ADD lt,lt,lt ; lt=lt+lt; ADD,DC ht,%r0,ht ; add in carry (ht++) ADD C1,lt,C1 ; c1=c1+lt ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise ADD C2,ht,C2 ; c2 = c2 + ht ADD,DC C3,%r0,C3 ; add in carry (c3++).endm;;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a); arg0 = r_ptr; arg1 = a_ptr;bn_sqr_comba8 .PROC .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN .ENTRY .align 64 STD %r3,0(%sp) ; save r3 STD %r4,8(%sp) ; save r4 STD %r5,16(%sp) ; save r5 STD %r6,24(%sp) ; save r6 ; ; Zero out carries ; COPY %r0,c1 COPY %r0,c2 COPY %r0,c3 LDO 128(%sp),%sp ; bump stack DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 ; ; Load up all of the values we are going to use ; FLDD 0(a_ptr),a0 FLDD 8(a_ptr),a1 FLDD 16(a_ptr),a2 FLDD 24(a_ptr),a3 FLDD 32(a_ptr),a4 FLDD 40(a_ptr),a5 FLDD 48(a_ptr),a6 FLDD 56(a_ptr),a7 SQR_ADD_C a0L,a0R,c1,c2,c3 STD c1,0(r_ptr) ; r[0] = c1; COPY %r0,c1 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 STD c2,8(r_ptr) ; r[1] = c2; COPY %r0,c2 SQR_ADD_C a1L,a1R,c3,c1,c2 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 STD c3,16(r_ptr) ; r[2] = c3; COPY %r0,c3 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 STD c1,24(r_ptr) ; r[3] = c1; COPY %r0,c1 SQR_ADD_C a2L,a2R,c2,c3,c1 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 STD c2,32(r_ptr) ; r[4] = c2; COPY %r0,c2 SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 STD c3,40(r_ptr) ; r[5] = c3; COPY %r0,c3 SQR_ADD_C a3L,a3R,c1,c2,c3 SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 STD c1,48(r_ptr) ; r[6] = c1; COPY %r0,c1 SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 STD c2,56(r_ptr) ; r[7] = c2; COPY %r0,c2 SQR_ADD_C a4L,a4R,c3,c1,c2 SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 STD c3,64(r_ptr) ; r[8] = c3; COPY %r0,c3 SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 STD c1,72(r_ptr) ; r[9] = c1; COPY %r0,c1 SQR_ADD_C a5L,a5R,c2,c3,c1 SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 STD c2,80(r_ptr) ; r[10] = c2; COPY %r0,c2 SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 STD c3,88(r_ptr) ; r[11] = c3; COPY %r0,c3 SQR_ADD_C a6L,a6R,c1,c2,c3 SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 STD c1,96(r_ptr) ; r[12] = c1; COPY %r0,c1 SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 STD c2,104(r_ptr) ; r[13] = c2; COPY %r0,c2 SQR_ADD_C a7L,a7R,c3,c1,c2 STD c3, 112(r_ptr) ; r[14] = c3 STD c1, 120(r_ptr) ; r[15] = c1 .EXIT LDD -104(%sp),%r6 ; restore r6 LDD -112(%sp),%r5 ; restore r5 LDD -120(%sp),%r4 ; restore r4 BVE (%rp) LDD,MB -128(%sp),%r3 .PROCEND ;-----------------------------------------------------------------------------
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -