📄 ycbcr422pl_to_rgb565_h.asm
字号:
LDHU .D2T1 *B_coef[1], A_rcr ; rcr = coeff[1]
|| LDHU .D1T2 *A_coef[4], B_bcb ; rcr = coeff[2]
STW .D1T1 A10, *+A_SP[ 4] ; Save A10
|| STW .D2T2 B10, *+B_SP[ 3] ; Save B10
|| MV .L1X B_y_data, A_y_ptr ; Partitioning MV
STW .D1T1 A_csr, *+A_SP[ 2] ; Save CSR
|| STW .D2T2 B_ret, *+B_SP[ 1] ; Save return address
|| MVK .S2 128, B_k32_k128 ; Constant: 128
;-
; =========================== PIPE LOOP PROLOG ============================ ;
LDBU .D2T1 *B_cr_ptr++, A_cr_ ;[ 1,1] cr = *cr_ptr++
|| AND .L1X B_num_pix, -2, A_i ; Make num_pix even
|| MV .L2X A_cb_data, B_cb_ptr ; Partitioning MV
|| MVKLH .S1 1, A_one_lum ; Constant: 1
|| MVKLH .S2 32, B_k32_k128 ; Constant: 32
|| MPY .M2 B_k32_k128, 1, B_p ; Prolog collapse count
|| MPYH .M1 A_one_lum, A_one_lum, A_lneg ; lneg = coeff[0] < 0
;-
LDBU .D1T1 *A_y_ptr++[2], A_y0 ;[ 2,1] y0 = *y_ptr++
|| SHL .S2X A_one_lum, 4, B_y_bias; ((128<<13)+16*luma)
|| MVKH .S1 0x84108410, A_sflip ; Sign-flip cst, high
LDBU .D2T2 *B_cb_ptr++, B_cb_ ;[ 3,1] cb = *cb_ptr++
|| ADD .D1 A_i, 2, A_i ; Adjust for para iter
|| SHL .S1 A_lneg, 20, A_lneg ; Handle luma < 0
|| MV .L1X B_k32_k128, A_k32_k128 ; Twin constant reg.
|| MV .L2X A_rgb_data, B_rgb_ptr ; Partitioning MV
;-
LDBU .D1T2 *-A_y_ptr[1], B_y1 ;[ 4,1] y1 = *y_ptr++
|| SHL .S1 A_gcr_, 16, A_gcr ; Put gcr in high half
|| SHL .S2 B_gcb_, 16, B_gcb ; Put gcb in high half
|| SUB .L2X B_y_bias, A_lneg, B_y_bias; Sign bit, coeff[0]<0
STW .D1T2 B_irp, *+A_SP[13] ; Save IRP
|| ADD .L1 A_gcr, A_rcr, A_gcr_rcr ; Merge gcr, rcr
|| ADD .L2 B_gcb, B_bcb, B_gcb_bcb ; Merge gcb, rcb
|| MVKLH .S2 0xF800, B_ms5 ; Mask 5, high
;-
; =========================== PIPE LOOP KERNEL ============================ ;
conv_loop:
[ A_i]B .S1 conv_loop ;[24,1] while (i)
|| ADD .L2X B_rgb1, A_rgb0, B_rgb_ ;[24,1] merge pix 0, 1
|| MPYHUS .M1X A_g0t, B_ms5, A_g0f ;[18,2] >> 5 and negate
|| SSHL .S2 B_g1, 11, B_g1s ;[18,2] g1s = sat(g1)
|| ADD .D1 A_y0t, A_rt, A_r0 ;[12,3] r0 = y0t + rt
|| SUB .D2 B_y1t_, B_y_bias, B_y1t ;[12,3] y1t-= y_bias
|| SUB .L1 A_cr_, A_k32_k128, A_cr ;[ 6,4] cr -= 128
|| MPYUS .M2 B_p, 8, B_p ; prolog collapse count
;-
ADD .D1 A_r0t, A_b0f, A_r_b0 ;[19,2] Merge r0, b0
|| MPYHU .M2 B_b1t, B_k32_k128, B_b1f ;[19,2] >> 11
|| AND .S2X B_g1s, A_ms6, B_g1t ;[19,2] g1t = g1s & ms6
|| SSHL .S1 A_r0, 11, A_r0s ;[13,3] r0s = sat(r0)
|| ADD .L2 B_y1t, B_bt, B_b1 ;[13,3] b1 = y1t + bt
|| ADD .L1X B_gt_, A_gt_, A_gt ;[13,3]gt=gcr*cr+gcb*cb
|| MPYLH .M1 A_cr, A_gcr_rcr, A_gt_ ;[ 7,4] gcr *c r
|| LDBU .D2T1 *B_cr_ptr++, A_cr_ ;[ 1,5] cr = *cr_ptr++
;-
XOR .L2X B_rgb_, A_sflip, B_rgb ;[26,1] Fix sign bits
|| MPYHUS .M2 B_g1t, B_ms5, B_g1f ;[20,2] >> 5 and negate
|| SSHL .S2 B_b1, 11, B_b1s ;[14,3] b1s = sat(b1)
|| ADD .L1X A_y0t, B_bt, A_b0 ;[14,3] b0 = y0t + bt
|| ADD .S1 A_y0t, A_gt, A_g0 ;[14,3] g0 = y0t + gt
|| MPY .M1 A_y0, A_one_lum, A_y0t_ ;[ 8,4] y0t = y0 * luma
|| SUB .D2 B_cb_, B_k32_k128, B_cb ;[ 8,4] cb -= 128
|| LDBU .D1T1 *A_y_ptr++[2], A_y0 ;[ 2,5] y0 = *y_ptr++
;-
SUB .D1 A_r_b0, A_g0f, A_rgb0_ ;[21,2] merge r0,g0,b0
|| ADD .L2 B_r1t, B_b1f, B_r_b1 ;[21,2] merge r1, b1
|| AND .L1X A_r0s, B_ms5, A_r0t ;[15,3] r0s = r0t & ms5
|| SSHL .S1 A_b0, 11, A_b0s ;[15,3] b0s = sat(b0)
|| ADD .S2X B_y1t, A_rt, B_r1 ;[15,3] r1 = y1t + rt
|| MPY .M1 A_cr, A_gcr_rcr, A_rt ;[ 9,4] rt = rcr * cr
|| MPYLH .M2 B_cb, B_gcb_bcb, B_gt_ ;[ 9,4] gcb * cb
|| LDBU .D2T2 *B_cb_ptr++, B_cb_ ;[ 3,5] cb = *cb_ptr++
;-
MPYHU .M1 A_rgb0_, A_one_lum, A_rgb0 ;[22,2] rgb0 in lo half
|| SUB .D2 B_r_b1, B_g1f, B_rgb1 ;[22,2] merge r1,g1,b1
|| AND .L1X A_b0s, B_ms5, A_b0t ;[16,3] b0t = b0s & ms5
|| AND .L2 B_b1s, B_ms5, B_b1t ;[16,3] b1t = b1s & ms5
|| SSHL .S1 A_g0, 11, A_g0s ;[16,3] g0s = sat(g0)
|| SSHL .S2 B_r1, 11, B_r1s ;[16,3] r1s = sat(r1)
|| MPY .M2X B_y1, A_one_lum, B_y1t_ ;[10,4] y1t = y1 * luma
|| LDBU .D1T2 *-A_y_ptr[1], B_y1 ;[ 4,5] y1 = *y_ptr++
;-
[!B_p]STW .D2T2 B_rgb, *B_rgb_ptr++ ;[29,1] *rgb_ptr++=rgb
|| SUB .D1 A_i, 2, A_i ;[23,2] i -= 2
|| MPYHU .M1 A_b0t, A_k32_k128, A_b0f ;[17,3] >> 11
|| AND .L1 A_g0s, A_ms6, A_g0t ;[17,3] g0t = g0s & ms6
|| AND .L2 B_r1s, B_ms5, B_r1t ;[17,3] r1t = r1s & ms5
|| ADD .S2X B_y1t, A_gt, B_g1 ;[17,3] g1 = y1t + gt
|| MPY .M2 B_cb, B_gcb_bcb, B_bt ;[11,4] bt = bcb * cb
|| SUB .S1X A_y0t_, B_y_bias, A_y0t ;[11,4] y0t-= y_bias
; =========================== PIPE LOOP EPILOG ============================ ;
; ================ SYMBOLIC REGISTER ASSIGNMENTS: CLEANUP ================= ;
.asg B15, B_SP ; Stack ptr, B side
.asg A3, A_SP ; Stack ptr, A side
.asg A0, A_csr ; CSR value
.asg B0, B_irp ; IRP value
.asg B3, B_ret ; Return address
; ========================================================================= ;
;-
MVC .S2 IRP, B_SP ; Restore stack ptr
|| ADD .L2X B_rgb1, A_rgb0, B_rgb_ ;[24,5] merge pix 0, 1
MV .L1X B_SP, A_SP ; Twin Stack Pointer
|| LDW .D2T2 *+B_SP[13], B_irp ; Get IRP's value
LDW .D1T2 *+A_SP[ 1], B_ret ; Get return address
|| LDW .D2T1 *+B_SP[ 2], A_csr ; Get CSR's value
LDW .D1T2 *+A_SP[ 3], B10 ; Restore B10
|| LDW .D2T1 *+B_SP[ 4], A10 ; Restore A10
;-
LDW .D1T2 *+A_SP[11], B14 ; Restore B14
|| LDW .D2T1 *+B_SP[12], A14 ; Restore A14
|| XOR .L2X B_rgb_, A_sflip, B_rgb ;[26,5] fix sign bits
LDW .D1T2 *+A_SP[ 7], B12 ; Restore B12
|| LDW .D2T1 *+B_SP[ 8], A12 ; Restore A12
LDW .D1T2 *+A_SP[ 9], B13 ; Restore B13
|| LDW .D2T1 *+B_SP[10], A13 ; Restore A13
|| MVC .S2 B_irp, IRP ; Restore IRP
;-
LDW .D1T2 *+A_SP[ 5], B11 ; Restore B11
|| LDW .D2T1 *+B_SP[ 6], A11 ; Restore A11
|| B .S2 B_ret ; Return to caller
MVC .S2X A_csr, CSR ; Restore CSR
|| LDW .D2T1 *++B_SP[14],A15 ; Restore A15
; ===== Interruptibility state (GIE) restored here =====
STW .D2T2 B_rgb, *B_rgb_ptr ;[29,5] *rgb_ptr++=rgb
NOP 3
; ===== Branch occurs =====
; ===== Interrupts may occur here =====
* ========================================================================= *
* End of file: ycbcr422pl_to_rgb565_h.asm *
* ------------------------------------------------------------------------- *
* Copyright (c) 1999 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -