📄 mips3.s
字号:
#define a_0 t0#define a_1 t1#define a_2 t2#define a_3 t3#define b_0 ta0#define b_1 ta1#define b_2 ta2#define b_3 ta3#define a_4 s0#define a_5 s2#define a_6 s4#define a_7 a1 /* once we load a[7] we don't need a anymore */#define b_4 s1#define b_5 s3#define b_6 s5#define b_7 a2 /* once we load b[7] we don't need b anymore */#define t_1 t8#define t_2 t9#define c_1 v0#define c_2 v1#define c_3 a3#define FRAME_SIZE 48.align 5LEAF(bn_mul_comba8) .set noreorder PTR_SUB sp,FRAME_SIZE .frame sp,64,ra .set reorder ld a_0,0(a1) /* If compiled with -mips3 option on * R5000 box assembler barks on this * line with "shouldn't have mult/div * as last instruction in bb (R10K * bug)" warning. If anybody out there * has a clue about how to circumvent * this do send me a note. * <appro@fy.chalmers.se> */ ld b_0,0(a2) ld a_1,8(a1) ld a_2,16(a1) ld a_3,24(a1) ld b_1,8(a2) ld b_2,16(a2) ld b_3,24(a2) dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ sd s0,0(sp) sd s1,8(sp) sd s2,16(sp) sd s3,24(sp) sd s4,32(sp) sd s5,40(sp) mflo c_1 mfhi c_2 dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ ld a_4,32(a1) ld a_5,40(a1) ld a_6,48(a1) ld a_7,56(a1) ld b_4,32(a2) ld b_5,40(a2) mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu c_3,t_2,AT dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ ld b_6,48(a2) ld b_7,56(a2) sd c_1,0(a0) /* r[0]=c1; */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu c_1,c_3,t_2 sd c_2,8(a0) /* r[1]=c2; */ dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu c_2,c_1,t_2 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sd c_3,16(a0) /* r[2]=c3; */ dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu c_3,c_2,t_2 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sd c_1,24(a0) /* r[3]=c1; */ dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu c_1,c_3,t_2 dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sd c_2,32(a0) /* r[4]=c2; */ dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu c_2,c_1,t_2 dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sd c_3,40(a0) /* r[5]=c3; */ dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu c_3,c_2,t_2 dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sd c_1,48(a0) /* r[6]=c1; */ dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu c_1,c_3,t_2 dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sd c_2,56(a0) /* r[7]=c2; */ dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu c_2,c_1,t_2 dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sd c_3,64(a0) /* r[8]=c3; */ dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu c_3,c_2,t_2 dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sd c_1,72(a0) /* r[9]=c1; */ dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu c_1,c_3,t_2 dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sd c_2,80(a0) /* r[10]=c2; */ dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu c_2,c_1,t_2 dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sd c_3,88(a0) /* r[11]=c3; */ dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu c_3,c_2,t_2 dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT sd c_1,96(a0) /* r[12]=c1; */ dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu c_1,c_3,t_2 dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu AT,c_3,t_2 daddu c_1,AT sd c_2,104(a0) /* r[13]=c2; */ dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ ld s0,0(sp) ld s1,8(sp) ld s2,16(sp) ld s3,24(sp) ld s4,32(sp) ld s5,40(sp) mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sd c_3,112(a0) /* r[14]=c3; */ sd c_1,120(a0) /* r[15]=c1; */ PTR_ADD sp,FRAME_SIZE jr raEND(bn_mul_comba8).align 5LEAF(bn_mul_comba4) .set reorder ld a_0,0(a1) ld b_0,0(a2) ld a_1,8(a1) ld a_2,16(a1) dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ ld a_3,24(a1) ld b_1,8(a2) ld b_2,16(a2) ld b_3,24(a2) mflo c_1 mfhi c_2 sd c_1,0(a0) dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu c_3,t_2,AT dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ mflo t_1 mfhi t_2 daddu c_2,t_1 sltu AT,c_2,t_1 daddu t_2,AT daddu c_3,t_2 sltu c_1,c_3,t_2 sd c_2,8(a0) dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu c_2,c_1,t_2 dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ mflo t_1 mfhi t_2 daddu c_3,t_1 sltu AT,c_3,t_1 daddu t_2,AT daddu c_1,t_2 sltu AT,c_1,t_2 daddu c_2,AT sd c_3,16(a0) dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu c_3,c_2,t_2 dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1 sltu AT,c_1,t_1 daddu t_2,AT daddu c_2,t_2 sltu AT,c_2,t_2 daddu c_3,AT dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ mflo t_1 mfhi t_2 daddu c_1,t_1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -