📄 aix_ppc32.s
字号:
mulhwu r8,r5,r6 addc r11,r7,r11 adde r9,r8,r9 addze r10,r0 addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 #sqr_add_c2(a,6,5,c3,c1,c2); lwz r5,20(r4) lwz r6,24(r4) mullw r7,r5,r6 mulhwu r8,r5,r6 addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 addc r11,r7,r11 adde r9,r8,r9 addze r10,r10 stw r11,44(r3) #r[11]=c3; #sqr_add_c(a,6,c1,c2,c3); mullw r7,r6,r6 mulhwu r8,r6,r6 addc r9,r7,r9 adde r10,r8,r10 addze r11,r0 #sqr_add_c2(a,7,5,c1,c2,c3) lwz r6,28(r4) mullw r7,r5,r6 mulhwu r8,r5,r6 addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 addc r9,r7,r9 adde r10,r8,r10 addze r11,r11 stw r9,48(r3) #r[12]=c1; #sqr_add_c2(a,7,6,c2,c3,c1) lwz r5,24(r4) mullw r7,r5,r6 mulhwu r8,r5,r6 addc r10,r7,r10 adde r11,r8,r11 addze r9,r0 addc r10,r7,r10 adde r11,r8,r11 addze r9,r9 stw r10,52(r3) #r[13]=c2; #sqr_add_c(a,7,c3,c1,c2); mullw r7,r6,r6 mulhwu r8,r6,r6 addc r11,r7,r11 adde r9,r8,r9 stw r11,56(r3) #r[14]=c3; stw r9, 60(r3) #r[15]=c1; bclr BO_ALWAYS,CR0_LT .long 0x00000000## NOTE: The following label name should be changed to# "bn_mul_comba4" i.e. remove the first dot# for the gcc compiler. This should be automatically# done in the build#.align 4.bn_mul_comba4:## This is an optimized version of the bn_mul_comba4 routine.## void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)# r3 contains r# r4 contains a# r5 contains b# r6, r7 are the 2 BN_ULONGs being multiplied.# r8, r9 are the results of the 32x32 giving 64 multiply.# r10, r11, r12 are the equivalents of c1, c2, and c3.# xor r0,r0,r0 #r0=0. Used in addze below. #mul_add_c(a[0],b[0],c1,c2,c3); lwz r6,0(r4) lwz r7,0(r5) mullw r10,r6,r7 mulhwu r11,r6,r7 stw r10,0(r3) #r[0]=c1 #mul_add_c(a[0],b[1],c2,c3,c1); lwz r7,4(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r8,r11 adde r12,r9,r0 addze r10,r0 #mul_add_c(a[1],b[0],c2,c3,c1); lwz r6, 4(r4) lwz r7, 0(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r8,r11 adde r12,r9,r12 addze r10,r10 stw r11,4(r3) #r[1]=c2 #mul_add_c(a[2],b[0],c3,c1,c2); lwz r6,8(r4) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r8,r12 adde r10,r9,r10 addze r11,r0 #mul_add_c(a[1],b[1],c3,c1,c2); lwz r6,4(r4) lwz r7,4(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r8,r12 adde r10,r9,r10 addze r11,r11 #mul_add_c(a[0],b[2],c3,c1,c2); lwz r6,0(r4) lwz r7,8(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r8,r12 adde r10,r9,r10 addze r11,r11 stw r12,8(r3) #r[2]=c3 #mul_add_c(a[0],b[3],c1,c2,c3); lwz r7,12(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r8,r10 adde r11,r9,r11 addze r12,r0 #mul_add_c(a[1],b[2],c1,c2,c3); lwz r6,4(r4) lwz r7,8(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r8,r10 adde r11,r9,r11 addze r12,r12 #mul_add_c(a[2],b[1],c1,c2,c3); lwz r6,8(r4) lwz r7,4(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r8,r10 adde r11,r9,r11 addze r12,r12 #mul_add_c(a[3],b[0],c1,c2,c3); lwz r6,12(r4) lwz r7,0(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r8,r10 adde r11,r9,r11 addze r12,r12 stw r10,12(r3) #r[3]=c1 #mul_add_c(a[3],b[1],c2,c3,c1); lwz r7,4(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r8,r11 adde r12,r9,r12 addze r10,r0 #mul_add_c(a[2],b[2],c2,c3,c1); lwz r6,8(r4) lwz r7,8(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r8,r11 adde r12,r9,r12 addze r10,r10 #mul_add_c(a[1],b[3],c2,c3,c1); lwz r6,4(r4) lwz r7,12(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r8,r11 adde r12,r9,r12 addze r10,r10 stw r11,16(r3) #r[4]=c2 #mul_add_c(a[2],b[3],c3,c1,c2); lwz r6,8(r4) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r8,r12 adde r10,r9,r10 addze r11,r0 #mul_add_c(a[3],b[2],c3,c1,c2); lwz r6,12(r4) lwz r7,8(r4) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r8,r12 adde r10,r9,r10 addze r11,r11 stw r12,20(r3) #r[5]=c3 #mul_add_c(a[3],b[3],c1,c2,c3); lwz r7,12(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r8,r10 adde r11,r9,r11 stw r10,24(r3) #r[6]=c1 stw r11,28(r3) #r[7]=c2 bclr BO_ALWAYS,CR0_LT .long 0x00000000## NOTE: The following label name should be changed to# "bn_mul_comba8" i.e. remove the first dot# for the gcc compiler. This should be automatically# done in the build# .align 4.bn_mul_comba8:## Optimized version of the bn_mul_comba8 routine.## void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)# r3 contains r# r4 contains a# r5 contains b# r6, r7 are the 2 BN_ULONGs being multiplied.# r8, r9 are the results of the 32x32 giving 64 multiply.# r10, r11, r12 are the equivalents of c1, c2, and c3.# xor r0,r0,r0 #r0=0. Used in addze below. #mul_add_c(a[0],b[0],c1,c2,c3); lwz r6,0(r4) #a[0] lwz r7,0(r5) #b[0] mullw r10,r6,r7 mulhwu r11,r6,r7 stw r10,0(r3) #r[0]=c1; #mul_add_c(a[0],b[1],c2,c3,c1); lwz r7,4(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 addze r12,r9 # since we didnt set r12 to zero before. addze r10,r0 #mul_add_c(a[1],b[0],c2,c3,c1); lwz r6,4(r4) lwz r7,0(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r10 stw r11,4(r3) #r[1]=c2; #mul_add_c(a[2],b[0],c3,c1,c2); lwz r6,8(r4) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r0 #mul_add_c(a[1],b[1],c3,c1,c2); lwz r6,4(r4) lwz r7,4(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 #mul_add_c(a[0],b[2],c3,c1,c2); lwz r6,0(r4) lwz r7,8(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 stw r12,8(r3) #r[2]=c3; #mul_add_c(a[0],b[3],c1,c2,c3); lwz r7,12(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r0 #mul_add_c(a[1],b[2],c1,c2,c3); lwz r6,4(r4) lwz r7,8(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 #mul_add_c(a[2],b[1],c1,c2,c3); lwz r6,8(r4) lwz r7,4(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 #mul_add_c(a[3],b[0],c1,c2,c3); lwz r6,12(r4) lwz r7,0(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 stw r10,12(r3) #r[3]=c1; #mul_add_c(a[4],b[0],c2,c3,c1); lwz r6,16(r4) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r0 #mul_add_c(a[3],b[1],c2,c3,c1); lwz r6,12(r4) lwz r7,4(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r10 #mul_add_c(a[2],b[2],c2,c3,c1); lwz r6,8(r4) lwz r7,8(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r10 #mul_add_c(a[1],b[3],c2,c3,c1); lwz r6,4(r4) lwz r7,12(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r10 #mul_add_c(a[0],b[4],c2,c3,c1); lwz r6,0(r4) lwz r7,16(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r10 stw r11,16(r3) #r[4]=c2; #mul_add_c(a[0],b[5],c3,c1,c2); lwz r7,20(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r0 #mul_add_c(a[1],b[4],c3,c1,c2); lwz r6,4(r4) lwz r7,16(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 #mul_add_c(a[2],b[3],c3,c1,c2); lwz r6,8(r4) lwz r7,12(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 #mul_add_c(a[3],b[2],c3,c1,c2); lwz r6,12(r4) lwz r7,8(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 #mul_add_c(a[4],b[1],c3,c1,c2); lwz r6,16(r4) lwz r7,4(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 #mul_add_c(a[5],b[0],c3,c1,c2); lwz r6,20(r4) lwz r7,0(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 stw r12,20(r3) #r[5]=c3; #mul_add_c(a[6],b[0],c1,c2,c3); lwz r6,24(r4) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r0 #mul_add_c(a[5],b[1],c1,c2,c3); lwz r6,20(r4) lwz r7,4(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 #mul_add_c(a[4],b[2],c1,c2,c3); lwz r6,16(r4) lwz r7,8(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 #mul_add_c(a[3],b[3],c1,c2,c3); lwz r6,12(r4) lwz r7,12(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 #mul_add_c(a[2],b[4],c1,c2,c3); lwz r6,8(r4) lwz r7,16(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 #mul_add_c(a[1],b[5],c1,c2,c3); lwz r6,4(r4) lwz r7,20(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 #mul_add_c(a[0],b[6],c1,c2,c3); lwz r6,0(r4) lwz r7,24(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 stw r10,24(r3) #r[6]=c1; #mul_add_c(a[0],b[7],c2,c3,c1); lwz r7,28(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r0 #mul_add_c(a[1],b[6],c2,c3,c1); lwz r6,4(r4) lwz r7,24(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r10 #mul_add_c(a[2],b[5],c2,c3,c1); lwz r6,8(r4) lwz r7,20(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r10 #mul_add_c(a[3],b[4],c2,c3,c1); lwz r6,12(r4) lwz r7,16(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r10 #mul_add_c(a[4],b[3],c2,c3,c1); lwz r6,16(r4) lwz r7,12(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r10 #mul_add_c(a[5],b[2],c2,c3,c1); lwz r6,20(r4) lwz r7,8(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r10 #mul_add_c(a[6],b[1],c2,c3,c1); lwz r6,24(r4) lwz r7,4(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r10 #mul_add_c(a[7],b[0],c2,c3,c1); lwz r6,28(r4) lwz r7,0(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r11,r11,r8 adde r12,r12,r9 addze r10,r10 stw r11,28(r3) #r[7]=c2; #mul_add_c(a[7],b[1],c3,c1,c2); lwz r7,4(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r0 #mul_add_c(a[6],b[2],c3,c1,c2); lwz r6,24(r4) lwz r7,8(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 #mul_add_c(a[5],b[3],c3,c1,c2); lwz r6,20(r4) lwz r7,12(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 #mul_add_c(a[4],b[4],c3,c1,c2); lwz r6,16(r4) lwz r7,16(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 #mul_add_c(a[3],b[5],c3,c1,c2); lwz r6,12(r4) lwz r7,20(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 #mul_add_c(a[2],b[6],c3,c1,c2); lwz r6,8(r4) lwz r7,24(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 #mul_add_c(a[1],b[7],c3,c1,c2); lwz r6,4(r4) lwz r7,28(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r12,r12,r8 adde r10,r10,r9 addze r11,r11 stw r12,32(r3) #r[8]=c3; #mul_add_c(a[2],b[7],c1,c2,c3); lwz r6,8(r4) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r0 #mul_add_c(a[3],b[6],c1,c2,c3); lwz r6,12(r4) lwz r7,24(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 #mul_add_c(a[4],b[5],c1,c2,c3); lwz r6,16(r4) lwz r7,20(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 #mul_add_c(a[5],b[4],c1,c2,c3); lwz r6,20(r4) lwz r7,16(r5) mullw r8,r6,r7 mulhwu r9,r6,r7 addc r10,r10,r8 adde r11,r11,r9 addze r12,r12 #mul_add_c(a[6],b[3],c1,c2,c3); lwz r6,24(r4)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -