📄 pa-risc2.s
字号:
EXTRD,U %r3,31,32,%r9 ;offset 0x9a4 EXTRD,U %r4,31,32,%r8 ;offset 0x9a8 .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28; B,L BN_num_bits_word,%r2 ;offset 0x9ac EXTRD,U %r5,31,32,%r7 ;offset 0x9b0 LDI 64,%r20 ;offset 0x9b4 DEPD %r7,31,32,%r5 ;offset 0x9b8 DEPD %r8,31,32,%r4 ;offset 0x9bc DEPD %r9,31,32,%r3 ;offset 0x9c0 CMPB,= %r28,%r20,$00060012 ;offset 0x9c4 COPY %r28,%r24 ;offset 0x9c8 MTSARCM %r24 ;offset 0x9cc DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0 CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4$00060012 SUBI 64,%r24,%r31 ;offset 0x9d8 CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc SUB %r4,%r3,%r4 ;offset 0x9e0$00060016 CMPB,= %r31,%r0,$0006001A ;offset 0x9e4 COPY %r0,%r9 ;offset 0x9e8 MTSARCM %r31 ;offset 0x9ec DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0 SUBI 64,%r31,%r26 ;offset 0x9f4 MTSAR %r26 ;offset 0x9f8 SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc MTSARCM %r31 ;offset 0xa00 DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04$0006001A DEPDI,Z -1,31,32,%r19 ;offset 0xa08 AND %r3,%r19,%r29 ;offset 0xa0c EXTRD,U %r29,31,32,%r2 ;offset 0xa10 DEPDI,Z -1,63,32,%r6 ;offset 0xa14 MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 EXTRD,U %r3,63,32,%r7 ;offset 0xa1c$D2 ;--- not PIC ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 ;--- not PIC LDIL LR'C$7,%r21 ;offset 0xa24 ;--- not PIC LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 ;--- not PIC .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; ;--- not PIC B,L fprintf,%r2 ;offset 0xa2c ;--- not PIC LDO RR'C$7(%r21),%r25 ;offset 0xa30 .CALL ; B,L abort,%r2 ;offset 0xa34 NOP ;offset 0xa38 B $D3 ;offset 0xa3c LDW -212(%r30),%r2 ;offset 0xa40$00060020 COPY %r4,%r26 ;offset 0xa44 EXTRD,U %r4,31,32,%r25 ;offset 0xa48 COPY %r2,%r24 ;offset 0xa4c .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) B,L $$div2U,%r31 ;offset 0xa50 EXTRD,U %r2,31,32,%r23 ;offset 0xa54 DEPD %r28,31,32,%r29 ;offset 0xa58$00060022 STD %r29,-152(%r30) ;offset 0xa5c$D1 AND %r5,%r19,%r24 ;offset 0xa60 EXTRD,U %r24,31,32,%r24 ;offset 0xa64 STW %r2,-160(%r30) ;offset 0xa68 STW %r7,-128(%r30) ;offset 0xa6c FLDD -152(%r30),%fr4 ;offset 0xa70 FLDD -152(%r30),%fr7 ;offset 0xa74 FLDW -160(%r30),%fr8L ;offset 0xa78 FLDW -128(%r30),%fr5L ;offset 0xa7c XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80 FSTD %fr10,-136(%r30) ;offset 0xa84 XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88 FSTD %fr22,-144(%r30) ;offset 0xa8c XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90 XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94 FSTD %fr11,-112(%r30) ;offset 0xa98 FSTD %fr23,-120(%r30) ;offset 0xa9c LDD -136(%r30),%r28 ;offset 0xaa0 DEPD,Z %r28,31,32,%r31 ;offset 0xaa4 LDD -144(%r30),%r20 ;offset 0xaa8 ADD,L %r20,%r31,%r31 ;offset 0xaac LDD -112(%r30),%r22 ;offset 0xab0 DEPD,Z %r22,31,32,%r22 ;offset 0xab4 LDD -120(%r30),%r21 ;offset 0xab8 B $00060024 ;offset 0xabc ADD,L %r21,%r22,%r23 ;offset 0xac0$D0 OR %r9,%r29,%r29 ;offset 0xac4$00060040 EXTRD,U %r29,31,32,%r28 ;offset 0xac8$00060002$L2 LDW -212(%r30),%r2 ;offset 0xacc$D3 LDW -168(%r30),%r9 ;offset 0xad0 LDD -176(%r30),%r8 ;offset 0xad4 EXTRD,U %r8,31,32,%r7 ;offset 0xad8 LDD -184(%r30),%r6 ;offset 0xadc EXTRD,U %r6,31,32,%r5 ;offset 0xae0 LDW -188(%r30),%r4 ;offset 0xae4 BVE (%r2) ;offset 0xae8 .EXIT LDW,MB -192(%r30),%r3 ;offset 0xaec .PROCEND ;in=23,25;out=28,29;fpin=105,107;;----------------------------------------------------------------------------;; Registers to hold 64-bit values to manipulate. The "L" part; of the register corresponds to the upper 32-bits, while the "R"; part corresponds to the lower 32-bits; ; Note, that when using b6 and b7, the code must save these before; using them because they are callee save registers ; ;; Floating point registers to use to save values that; are manipulated. These don't collide with ftemp1-6 and; are all caller save registers;a0 .reg %fr22a0L .reg %fr22La0R .reg %fr22Ra1 .reg %fr23a1L .reg %fr23La1R .reg %fr23Ra2 .reg %fr24a2L .reg %fr24La2R .reg %fr24Ra3 .reg %fr25a3L .reg %fr25La3R .reg %fr25Ra4 .reg %fr26a4L .reg %fr26La4R .reg %fr26Ra5 .reg %fr27a5L .reg %fr27La5R .reg %fr27Ra6 .reg %fr28a6L .reg %fr28La6R .reg %fr28Ra7 .reg %fr29a7L .reg %fr29La7R .reg %fr29Rb0 .reg %fr30b0L .reg %fr30Lb0R .reg %fr30Rb1 .reg %fr31b1L .reg %fr31Lb1R .reg %fr31R;; Temporary floating point variables, these are all caller save; registers;ftemp1 .reg %fr4ftemp2 .reg %fr5ftemp3 .reg %fr6ftemp4 .reg %fr7;; The B set of registers when used.;b2 .reg %fr8b2L .reg %fr8Lb2R .reg %fr8Rb3 .reg %fr9b3L .reg %fr9Lb3R .reg %fr9Rb4 .reg %fr10b4L .reg %fr10Lb4R .reg %fr10Rb5 .reg %fr11b5L .reg %fr11Lb5R .reg %fr11Rb6 .reg %fr12b6L .reg %fr12Lb6R .reg %fr12Rb7 .reg %fr13b7L .reg %fr13Lb7R .reg %fr13Rc1 .reg %r21 ; only regtemp1 .reg %r20 ; only regtemp2 .reg %r19 ; only regtemp3 .reg %r31 ; only regm1 .reg %r28 c2 .reg %r23 high_one .reg %r1ht .reg %r6lt .reg %r5m .reg %r4c3 .reg %r3SQR_ADD_C .macro A0L,A0R,C1,C2,C3 XMPYU A0L,A0R,ftemp1 ; m FSTD ftemp1,-24(%sp) ; store m XMPYU A0R,A0R,ftemp2 ; lt FSTD ftemp2,-16(%sp) ; store lt XMPYU A0L,A0L,ftemp3 ; ht FSTD ftemp3,-8(%sp) ; store ht LDD -24(%sp),m ; load m AND m,high_mask,temp2 ; m & Mask DEPD,Z m,30,31,temp3 ; m << 32+1 LDD -16(%sp),lt ; lt LDD -8(%sp),ht ; ht EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 ADD temp3,lt,lt ; lt = lt+m ADD,L ht,temp1,ht ; ht += temp1 ADD,DC ht,%r0,ht ; ht++ ADD C1,lt,C1 ; c1=c1+lt ADD,DC ht,%r0,ht ; ht++ ADD C2,ht,C2 ; c2=c2+ht ADD,DC C3,%r0,C3 ; c3++.endmSQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht FSTD ftemp1,-16(%sp) ; XMPYU A0R,A1L,ftemp2 ; m = bh*lt FSTD ftemp2,-8(%sp) ; XMPYU A0R,A1R,ftemp3 ; lt = bl*lt FSTD ftemp3,-32(%sp) XMPYU A0L,A1L,ftemp4 ; ht = bh*ht FSTD ftemp4,-24(%sp) ; LDD -8(%sp),m ; r21 = m LDD -16(%sp),m1 ; r19 = m1 ADD,L m,m1,m ; m+m1 DEPD,Z m,31,32,temp3 ; (m+m1<<32) LDD -24(%sp),ht ; r24 = ht CMPCLR,*>>= m,m1,%r0 ; if (m < m1) ADD,L ht,high_one,ht ; ht+=high_one EXTRD,U m,31,32,temp1 ; m >> 32 LDD -32(%sp),lt ; lt ADD,L ht,temp1,ht ; ht+= m>>32 ADD lt,temp3,lt ; lt = lt+m1 ADD,DC ht,%r0,ht ; ht++ ADD ht,ht,ht ; ht=ht+ht; ADD,DC C3,%r0,C3 ; add in carry (c3++) ADD lt,lt,lt ; lt=lt+lt; ADD,DC ht,%r0,ht ; add in carry (ht++) ADD C1,lt,C1 ; c1=c1+lt ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise ADD C2,ht,C2 ; c2 = c2 + ht ADD,DC C3,%r0,C3 ; add in carry (c3++).endm;;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a); arg0 = r_ptr; arg1 = a_ptr;bn_sqr_comba8 .PROC .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN .ENTRY .align 64 STD %r3,0(%sp) ; save r3 STD %r4,8(%sp) ; save r4 STD %r5,16(%sp) ; save r5 STD %r6,24(%sp) ; save r6 ; ; Zero out carries ; COPY %r0,c1 COPY %r0,c2 COPY %r0,c3 LDO 128(%sp),%sp ; bump stack DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 ; ; Load up all of the values we are going to use ; FLDD 0(a_ptr),a0 FLDD 8(a_ptr),a1 FLDD 16(a_ptr),a2 FLDD 24(a_ptr),a3 FLDD 32(a_ptr),a4 FLDD 40(a_ptr),a5 FLDD 48(a_ptr),a6 FLDD 56(a_ptr),a7 SQR_ADD_C a0L,a0R,c1,c2,c3 STD c1,0(r_ptr) ; r[0] = c1; COPY %r0,c1 SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 STD c2,8(r_ptr) ; r[1] = c2; COPY %r0,c2 SQR_ADD_C a1L,a1R,c3,c1,c2 SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 STD c3,16(r_ptr) ; r[2] = c3; COPY %r0,c3 SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 STD c1,24(r_ptr) ; r[3] = c1; COPY %r0,c1 SQR_ADD_C a2L,a2R,c2,c3,c1 SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 STD c2,32(r_ptr) ; r[4] = c2; COPY %r0,c2 SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 STD c3,40(r_ptr) ; r[5] = c3; COPY %r0,c3 SQR_ADD_C a3L,a3R,c1,c2,c3 SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 STD c1,48(r_ptr) ; r[6] = c1; COPY %r0,c1 SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 STD c2,56(r_ptr) ; r[7] = c2; COPY %r0,c2 SQR_ADD_C a4L,a4R,c3,c1,c2 SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 STD c3,64(r_ptr) ; r[8] = c3; COPY %r0,c3 SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 STD c1,72(r_ptr) ; r[9] = c1; COPY %r0,c1 SQR_ADD_C a5L,a5R,c2,c3,c1 SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 STD c2,80(r_ptr) ; r[10] = c2; COPY %r0,c2 SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 STD c3,88(r_ptr) ; r[11] = c3; COPY %r0,c3 SQR_ADD_C a6L,a6R,c1,c2,c3 SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 STD c1,96(r_ptr) ; r[12] = c1; COPY %r0,c1 SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 STD c2,104(r_ptr) ; r[13] = c2; COPY %r0,c2 SQR_ADD_C a7L,a7R,c3,c1,c2 STD c3, 112(r_ptr) ; r[14] = c3 STD c1, 120(r_ptr) ; r[15] = c1 .EXIT LDD -104(%sp),%r6 ; restore r6 LDD -112(%sp),%r5 ; restore r5 LDD -120(%sp),%r4 ; restore r4 BVE (%rp) LDD,MB -128(%sp),%r3
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -