📄 meanfilter.asm
字号:
.text
.global _IMG_median_3x3
_IMG_median_3x3:
*====================== SYMBOLIC REGISTER ASSIGNMENTS ======================*
.asg B15, B_SP ;Stack pointer, B datapath
.asg B24, B_csr ;CSR's value
.asg B25, B_no_gie ;CSR w/ GIE bit cleared
.asg B3, B_ret ;Return address
.asg A0, A_pro ;prolog collapse counter
.asg B0, B_pro ;prolog collapse counter
.asg B4, B_n ;number of values in image width
.asg A27, A_two ;const = 2
.asg B27, B_two ;const = 2
.asg A4, A_ptr_in ;indata
.asg A6, A_ptr_out ;out data
.asg A3, A_line0 ;line i
.asg B6, B_line1 ;line i+1
.asg B5, B_line2 ;line i+2
.asg A1, A_i ;loop count
.asg A5, A_x01x00 ;existing data 2 pts row 0
.asg B21, B_x11x10 ;existing data 2 pts row 1
.asg B20, B_x21x20 ;existing data 2 pts row 2
.asg A18, A_x0_3210 ;4 new points row 0
.asg B4, B_x1_3210 ;4 new points row 1
.asg B9, B_x2_3210 ;4 new points row 2
.asg B22, B_d1_3210_a ;intermediate values of
.asg B16, B_m1_3210_a ;MAX operation bewteen
.asg B16, B_x1_3210_a0 ;x2_3210
.asg B8, B_x1_3210_a1 ;and x1_3210
.asg B8, B_x1_3210_a ;max of x1,x2
.asg B7, B_x2_3210_a ;min of x1_3210 and x2_3210
.asg A4, A_x0_3210_b ;max of x0_3210,x1_3210_a
.asg A17, A_x1_3210_b ;final max of new data
.asg B16, B_x1_3210_c ;final med of new data
.asg B17, B_x2_3210_c ;final min of new data
.asg A8, A_max0_3210 ;transposed maximums
.asg A17, A_max1_3210 ;transposed maximums
.asg A8, A_minmax_3210 ;minimum of maximums
.asg B19, B_min0_3210 ;transposed minimums
.asg B9, B_min1_3210 ;transposed minimums
.asg A16, A_min2_3210 ;transposed minimums
.asg B18, B_maxmin_3210 ;partial maximum of mins
.asg A7, A_maxmin_3210 ;maximum of mins
.asg B21, B_med0_3210 ;tranposed medians
.asg B20, B_med1_3210 ;tranposed medians
.asg B19, B_maxmed_3210_0 ;maximum of medians
.asg B17, B_minmed_3210_0 ;tmp minimum of medians
.asg B18, B_minmed_3210_1 ;minimum of medians
.asg B18, B_medmed_3210 ;median of medians
.asg A9, A_maxa_3210 ;find block of
.asg A9, A_mina_3210 ;medians
.asg A5, A_minb_3210 ;from the medians
.asg A7, A_d_med_3210 ;calculated for the new data
.asg A16, A_m_med_3210 ;using CMPGTU4 and XPND
.asg A9, A_med_3210a ;& and |.
.asg A4, A_med_3210b ;
.asg A9, A_med_3210 ;final medians of the 3 3x3 blocks
*============================= PIPE LOOP PROLOG ===========================*
MVC .S2 CSR, B_csr ;Get CSR state
AND .D2 B_csr, -2, B_no_gie ;Clear GIE
ADD .L2X B_n, A_ptr_in, B_line1 ;line 1
|| MVC .S2 B_no_gie, CSR ;Disable intrpt
;INTERRUPTS MASKED HERE
LDW .D2T2 *B_line1++, B_x1_3210 ;line 1 4 pts
|| ADD .L2 B_line1, B_n, B_line2 ;line 2
LDW .D2T2 *B_line2++, B_x2_3210 ;4 pts line 2
MVK .S2 2, B_two ;const = 2
;-
MVKLH .S2 0x7f7f, B_x11x10 ;initial values
|| MVK .S1 2, A_two ;const = 2
MVKLH .S1 0x7f7f, A_x01x00 ;initial values
|| SHL .S2 B_two, 14, B_pro ;prolog counter
|| ADD .L1X B_n, 8, A_i ;loop count
MV .L2 B_x11x10, B_x21x20 ;initial values
|| SHL .S1 A_two, 13, A_pro ;prolog counter
|| MV .D1 A_ptr_in, A_line0 ;line 0
*============================= PIPE LOOP KERNEL ===========================*
LOOP:
[!A_pro]MINU4.L1X A_maxmin_3210, B_medmed_3210, A_mina_3210 ;sort cols
||[!B_pro]SHLMB.S2 B_x11x10, B_x1_3210_c, B_med1_3210 ;transpose
||[!B_pro]MAXU4.L2 B_min0_3210, B_min1_3210, B_maxmin_3210 ;sort cols
[!A_pro]CMPGTU4.S1 A_minb_3210, A_mina_3210, A_d_med_3210 ;sort cols
||[!B_pro]MVD .M1 A_x0_3210_b, A_x01x00 ;feed back data
||[!B_pro]MAXU4.L2 B_med1_3210, B_x1_3210_c, B_maxmed_3210_0;sort cols
||[!B_pro]PACKLH2.S2 B_x1_3210_c, B_x11x10, B_med0_3210 ;transpose
||[!B_pro]MINU4.L1 A_max0_3210, A_max1_3210, A_minmax_3210 ;sort cols
[ A_i]B .S1 LOOP ;(i=0;i<n;i++){
||[!A_pro]XPND4.M1 A_d_med_3210, A_m_med_3210 ;synthesize MAX
||[!B_pro]MVD .M2 B_x2_3210_c, B_x21x20 ;feed back data
||[!B_pro]MINU4.L2 B_med1_3210, B_x1_3210_c, B_minmed_3210_0;sort cols
|| MINU4 .L1X A_x0_3210, B_x1_3210_a, A_x1_3210_b ;sort cols
|| CMPGTU4.S2 B_x2_3210, B_x1_3210, B_d1_3210_a ;sort cols
[!B_pro]MINU4.L2 B_med0_3210, B_maxmed_3210_0, B_minmed_3210_1;sort cols
||[!B_pro]MAXU4.L1X B_maxmin_3210, A_min2_3210, A_maxmin_3210 ;sort cols
|| XPND4 .M2 B_d1_3210_a, B_m1_3210_a ;synthesize MAX
||[A_i] LDW .D2T2 *B_line1++, B_x1_3210 ;get next 4 pts
||[!A_i]B .S2 B_ret ;return to call
|| [A_i]SUB .D1 A_i, 4, A_i ;dec loop
[!A_pro]ANDN .S1 A_mina_3210, A_m_med_3210, A_med_3210b ;synthesize MAX
||[!A_pro]AND .D1 A_minb_3210, A_m_med_3210, A_med_3210a ;synthesize MAX
||[!B_pro]MVD .M2 B_x1_3210_c, B_x11x10 ;feedback data
||[!B_pro]MAXU4.L2 B_minmed_3210_1,B_minmed_3210_0,B_medmed_3210;sort cols
||[!B_pro]MINU4.L1 A_minmax_3210, A_x0_3210_b, A_minmax_3210 ;sort cols
||[A_i] LDW .D2T2 *B_line2++, B_x2_3210 ;get next 4 pts
MPYU .M1 A_two, A_pro, A_pro ;proog count
||[!A_pro]OR .S1 A_med_3210a, A_med_3210b, A_med_3210 ;synthesize MAX
|| MINU4 .L2X B_x2_3210_a, A_x1_3210_b, B_x2_3210_c ;sort cols
|| MAXU4 .L1X A_x0_3210, B_x1_3210_a, A_x0_3210_b ;sort cols
|| ANDN .D2 B_x1_3210, B_m1_3210_a, B_x1_3210_a1 ;synthesize MAX
|| AND .S2 B_x2_3210, B_m1_3210_a, B_x1_3210_a0 ;synthesize MAX
||[A_i] LDW .D1T1 *A_line0++, A_x0_3210 ;get next 4 pts
[!A_pro]STW .D1T1 A_med_3210, *A_ptr_out++ ;4 new med's
||[!B_pro]MAXU4.L1X A_maxmin_3210, B_medmed_3210, A_maxa_3210 ;sort cols
|| PACKLH2.S2 B_x2_3210_c, B_x21x20, B_min0_3210 ;transpose
|| SHLMB .S1 A_x01x00, A_x0_3210_b, A_max1_3210 ;transpose
|| MAXU4 .L2X B_x2_3210_a, A_x1_3210_b, B_x1_3210_c ;sort cols
|| OR .D2 B_x1_3210_a0, B_x1_3210_a1, B_x1_3210_a ;synthesize MAX
|| MPYU .M2 B_two, B_pro, B_pro ;pro cnt
[!B_pro]MINU4.L1 A_minmax_3210, A_maxa_3210, A_minb_3210 ;sort cols
|| MV .D1X B_x2_3210_c, A_min2_3210 ;feed back data
|| SHLMB .S2 B_x21x20, B_x2_3210_c, B_min1_3210 ;transpose
|| PACKLH2.S1 A_x0_3210_b, A_x01x00, A_max0_3210 ;transpose
|| MINU4 .L2 B_x2_3210, B_x1_3210, B_x2_3210_a ;get MIN
* =========================== PIPE LOOP EPILOG ============================ *
MVC .S2 B_csr, CSR ;Restore CSR
* ===== Interruptibility state restored here ============================== *
;BRANCH OCCURS
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -