📄 sobel_asm.asm
字号:
|| MPYU4 .M2 B_in2_h2, B_f2, B_b14_h:B_b14_l ;[ 8,1]
|| MPYU4 .M1 A_in2_l, A_f2, A_b12_h:A_b12_l ;[ 8,1]
MPYU4 .M1 A_in2_l2, A_f2, A_b10_h:A_b10_l ;[ 9,1]
|| DOTPSU4 .M2 B_mult2_b, B_tmp3, B_t4 ;[ 9,1]
|| MVC .S2 B_no_gie, CSR ; Disable ints
; ===== Interrupts masked here =====
MV .L1X B_f1, A_f1
|| UNPKHU4 .L2 B_tmp3, B_u6 ;[10,1]
|| DOTPSU4 .M1 A_mult2_b, A_in3_l, A_t4 ;[10,1]
|| DOTPSU4 .M2 B_mult2_b, B_tmp4, B_t10 ;[10,1]
|| UNPKLU4 .S1 A_in3_l, A_u3 ;[10,1]
|| B .S2 cont
UNPKLU4 .L2 B_tmp3, B_u3 ;[11,1]
|| DOTPSU4 .M2 B_mult1_b, B_tmp2, B_t7 ;[11,1]
|| UNPKLU4 .S1 A_in3_h, A_u8 ;[11,1]
|| DOTPSU4 .M1 A_mult2, A_in3_l, A_t5 ;[11,1]
|| UNPKHU4 .L1 A_in3_l, A_u6 ;[11,1]
|| B .S2 instr6
* ========================================================================= *
* here kernel instructions instr1, instr2, instr3, instr4 are executed *
* ========================================================================= *
cont:
* ========================================================================= *
* this is kernel instruction instr5 in parallel with additional branch *
* to skip the store operation in kernel instruction instr11 *
* ========================================================================= *
UNPKHU4 .L2 B_tmp4, B_u10 ;[16,1]
|| SUB2 .L1 A_b2, A_b1, A_u11 ;[16,1]
|| ADD2 .S1 A_b11_l, A_b10_l, A_u7 ;[16,1]
|| ADD2 .D2 B_u5, B_u6, B_b4 ;[16,1]
|| DOTPSU4 .M2 B_mult1_b, B_tmp1, B_t1 ;[16,1]
|| DOTPSU4 .M1 A_mult1, A_in1_h, A_t8 ;[16,1]
|| LDNDW .D1T1 *+A_in[A_wD4], A_in3_h:A_in3_l ;[ 5,2]
|| B .S2 instr11+1*4
* ========================================================================= *
* here kernel instructions instr6, instr7, instr8, instr9, instr10 *
* and instr11 (with store operation skipped) are executed, then the *
* loop begins *
* ========================================================================= *
* =========================== PIPE LOOP KERNEL ============================ *
loop:
instr1:
ADD2 .D2 B_t9, B_t12, B_H7 ;[23,1]
|| ABS2 .L2 B_u12, B_V8 ;[23,1]
|| PACK2 .S1 A_t10, A_t11, A_t12 ;[23,1]
|| UNPKLU4 .S2 B_tmp1, B_u1 ;[12,2]
|| DOTPSU4 .M2 B_mult2, B_tmp4, B_t11 ;[12,2]
|| UNPKHU4 .L1 A_in1_l, A_u4 ;[12,2]
|| MPYU4 .M1 A_in1_h, A_f1, A_b11_h:A_b11_l ;[12,2]
|| LDNDW .D1T1 *+A_in[A_wD8], A_in2_l2:A_in2_l;[ 1,3]
instr2:
ADD2 .S1 A_t9, A_t12, A_H5 ;[24,1]
|| ADD2 .L2 B_u1, B_b13_l, B_u2 ;[13,2]
|| UNPKHU4 .S2 B_tmp1, B_u4 ;[13,2]
|| DOTPSU4 .M2 B_mult2, B_tmp3, B_t5 ;[13,2]
|| ADD2 .D1 A_u4, A_b12_h, A_u5 ;[13,2]
|| UNPKLU4 .L1 A_in1_l, A_u1 ;[13,2]
|| DOTPSU4 .M1 A_mult1_b, A_in1_l, A_t1 ;[13,2]
|| LDNDW .D *+B_in[B_wD8], B_in2_h2:B_in2_h;[ 2,3]
instr3:
ABS2 .L2 B_H3, B_H3 ;[25,1]
|| ABS2 .L1 A_u12, A_V6 ;[25,1]
|| ADD2 .D2 B_u2, B_u3, B_b3 ;[14,2]
|| ADD2 .S2 B_u4, B_b13_h, B_u5 ;[14,2]
|| MPYU4 .M2 B_tmp2, B_f1, B_b15_h:B_b15_l ;[14,2]
|| ADD2 .D1 A_u1, A_b12_l, A_u2 ;[14,2]
|| ADD2 .S1 A_u5, A_u6, A_b2 ;[14,2]
|| DOTPSU4 .M1 A_mult1, A_in1_l, A_t2 ;[14,2]
instr4:
ABS2 .L1 A_H5, A_H5 ;[26,1]
|| ADD2 .S2 B_H3, B_V4, B_r10 ;[26,1]
|| ABS2 .L2 B_H7, B_H7 ;[26,1]
|| DOTPSU4 .M2 B_mult1, B_tmp1, B_t2 ;[15,2]
|| ADD2 .D1 A_u2, A_u3, A_b1 ;[15,2]
|| DOTPSU4 .M1 A_mult1_b, A_in1_h, A_t7 ;[15,2]
|| PACK2 .S1 A_t4, A_t5, A_t6 ;[15,2]
|| LDNDW .D *+B_in[B_wD4], B_tmp4:B_tmp3 ;[ 4,3]
instr5:
ADD2 .L2 B_H7, B_V8, B_r12 ;[27,1]
|| UNPKHU4 .S2 B_tmp4, B_u10 ;[16,2]
|| SUB2 .L1 A_b2, A_b1, A_u11 ;[16,2]
|| ADD2 .S1 A_b11_l, A_b10_l, A_u7 ;[16,2]
|| ADD2 .D2 B_u5, B_u6, B_b4 ;[16,2]
|| DOTPSU4 .M2 B_mult1_b, B_tmp1, B_t1 ;[16,2]
|| DOTPSU4 .M1 A_mult1, A_in1_h, A_t8 ;[16,2]
|| LDNDW .D1T1 *+A_in[A_wD4], A_in3_h:A_in3_l ;[ 5,3]
instr6:
BDEC .S1 loop, A_cnt ;[28,1]
|| SPACKU4 .S2X B_r10, A_r9, B_r13 ;[28,1]
|| ADD2 .D1 A_u7, A_u8, A_b5 ;[17,2]
|| ADD2 .L1 A_b11_h, A_b10_h, A_u9 ;[17,2]
|| PACK2 .L2 B_t4, B_t5, B_t6 ;[17,2]
|| DOTPSU4 .M2 B_mult1, B_tmp2, B_t8 ;[17,2]
|| DOTPSU4 .M1 A_mult2_b, A_in3_h, A_t10 ;[17,2]
|| LDNDW .D *B_in++, B_tmp2:B_tmp1 ;[ 6,3]
instr7:
ADD2 .D2 B_b15_l, B_b14_l, B_u7 ;[18,2]
|| ADD2 .S2 B_b15_h, B_b14_h, B_u9 ;[18,2]
|| PACK2 .L2 B_t10, B_t11, B_t12 ;[18,2]
|| DOTPSU4 .M1 A_mult2, A_in3_h, A_t11 ;[18,2]
|| PACK2 .S1 A_t1, A_t2, A_t3 ;[18,2]
|| ABS2 .L1 A_u11, A_V2 ;[18,2]
|| MPYU4 .M2 B_in2_h, B_f2, B_b13_h:B_b13_l ;[ 7,3]
|| LDNDW .D1T1 *A_in++, A_in1_h:A_in1_l ;[ 7,3]
instr8:
ADD2 .S1 A_H5, A_V6, A_r11 ;[30,1]
|| SUB2 .L2 B_b4, B_b3, B_u11 ;[19,2]
|| ADD2 .D2 B_u9, B_u10, B_b8 ;[19,2]
|| UNPKLU4 .S2 B_tmp4, B_u8 ;[19,2]
|| UNPKHU4 .L1 A_in3_h, A_u10 ;[19,2]
|| ADD2 .D1 A_t3, A_t6, A_H ;[19,2]
|| MPYU4 .M2 B_in2_h2, B_f2, B_b14_h:B_b14_l ;[ 8,3]
|| MPYU4 .M1 A_in2_l, A_f2, A_b12_h:A_b12_l ;[ 8,3]
instr9:
ADD2 .D2 B_u7, B_u8, B_b7 ;[20,2]
|| PACK2 .S2 B_t1, B_t2, B_t3 ;[20,2]
|| ABS2 .L2 B_u11, B_V4 ;[20,2]
|| ADD2 .D1 A_u9, A_u10, A_b6 ;[20,2]
|| ABS2 .L1 A_H, A_H ;[20,2]
|| PACK2 .S1 A_t7, A_t8, A_t9 ;[20,2]
|| MPYU4 .M1 A_in2_l2, A_f2, A_b10_h:A_b10_l ;[ 9,3]
|| DOTPSU4 .M2 B_mult2_b, B_tmp3, B_t4 ;[ 9,3]
instr10:
SPACKU4 .S2X B_r12, A_r11, B_r14 ;[32,1]
|| ADD2 .D2 B_t3, B_t6, B_H3 ;[21,2]
|| SUB2 .L1 A_b6, A_b5, A_u12 ;[21,2]
|| ADD2 .D1 A_H, A_V2, A_r9 ;[21,2]
|| UNPKHU4 .L2 B_tmp3, B_u6 ;[10,3]
|| DOTPSU4 .M1 A_mult2_b, A_in3_l, A_t4 ;[10,3]
|| DOTPSU4 .M2 B_mult2_b, B_tmp4, B_t10 ;[10,3]
|| UNPKLU4 .S1 A_in3_l, A_u3 ;[10,3]
instr11:
STNDW .D B_r14:B_r13, *A_out++ ;[33,1]
|| SUB2 .D2 B_b8, B_b7, B_u12 ;[22,2]
|| PACK2 .S2 B_t7, B_t8, B_t9 ;[22,2]
|| UNPKLU4 .L2 B_tmp3, B_u3 ;[11,3]
|| DOTPSU4 .M2 B_mult1_b, B_tmp2, B_t7 ;[11,3]
|| UNPKLU4 .S1 A_in3_h, A_u8 ;[11,3]
|| DOTPSU4 .M1 A_mult2, A_in3_l, A_t5 ;[11,3]
|| UNPKHU4 .L1 A_in3_l, A_u6 ;[11,3]
* =========================== PIPE LOOP EPILOG ============================ *
ADD2 .D2 B_t9, B_t12, B_H7 ;[23,3]
|| ABS2 .L2 B_u12, B_V8 ;[23,3]
|| PACK2 .S1 A_t10, A_t11, A_t12 ;[23,3]
ADD2 .S1 A_t9, A_t12, A_H5 ;[24,3]
|| ABS2 .L2 B_H3, B_H3 ;[25,3]
|| ABS2 .L1 A_u12, A_V6 ;[25,3]
ABS2 .L1 A_H5, A_H5 ;[26,3]
|| ADD2 .D2 B_H3, B_V4, B_r10 ;[26,3]
|| ABS2 .L2 B_H7, B_H7 ;[26,3]
|| B .S2 B_ret
ADD2 .L2 B_H7, B_V8, B_r12 ;[27,3]
|| SPACKU4 .S2X B_r10, A_r9, B_r13 ;[28,3]
|| ADD2 .S1 A_H5, A_V6, A_r11 ;[30,3]
|| LDW .D2T2 *++B_SP[2], B_csr ; Get rtn, CSR
SPACKU4 .S2X B_r12, A_r11, B_r14 ;[32,3]
* ========================================================================= *
STNW .D1T2 B_r13, *A_out
|| EXTU .S2 B_r14, 24, 24, B_r15
STB .D1T2 B_r15, *++A_out[4]
|| EXTU .S2 B_r14, 16, 24, B_r16
STB .D1T2 B_r16, *++A_out
|| MVC .S2 B_csr, CSR ; Restore CSR
; ===== Branch Occurs =====
; ===== Interruptibility state restored here =====
* ========================================================================= *
* End of file: img_sobel.asm *
* ------------------------------------------------------------------------- *
* Copyright (c) 2002 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -