📄 ycbcr422pl_to_rgb565_p.sa
字号:
PACK2 A_g_cr, B_g_cb, B_c3c2 ; Packed g_cr and g_cb PACK2 B_g_cb, A_g_cr, A_c2c3 ; Packed g_cb and g_cr LDH *A_coef[4], A_c4 ; b_cb PACK2 A_c4, A_c4, A_c4 ; Packed b_cb and b_cb MVKL.1 0x80808080, A_k80 PACK2.1 A_k80, A_k80, A_k80 ; MVKL 0x08080808, A_k08 PACK2 A_k08, A_k08, A_k08 ; MV A_k08, B_k08 MVKL 0x00800080, B_n16 PACK2 B_n16, B_n16, B_n16 ; MVKL.2 0x01010101, B_k01 PACK2.2 B_k01, B_k01, B_k01 ; MV.1x B_k01, A_k01 MVKL 0xFCFCFCFC, A_msk6 ; Mask to extract upper 6 bits of PACK2 A_msk6, A_msk6, A_msk6 ; Value = 0xFCFCFCFC MVKL 0xF8F8F8F8, B_msk5 ; Mask to extract upper 5 bits of R & B PACK2 B_msk5, B_msk5, B_msk5 ; Value = 0xF8F8F8F8 ADD A_rgb_data, 8, B_rgb_ptr; Pointer to output RGB data SHRU B_num_pix, 3, B_loopcnt; Divide loop count by 8 SUB B_loopcnt, 2, B_loopcnt; Reduce loop count since we're ; using BDEC to branch loop: LDDW .D1T2 *A_y_data++, B_y_7654:B_y_3210; Load the 8 Luma pixels LDW .D2T2 *B_cr_data++, B_cr6420 ; Load the 4 Cr pixels LDW .D1T1 *A_cb_data++, A_cb6420 ; Load the 4 Cb pixles XOR .2x B_cr6420, A_k80, B_cr6420_ ; Subtract the Chroma bias XOR .1 A_cb6420, A_k80, A_cb6420_ ; Subtract the Chroma bia MPYU4 .2 B_y_7654, B_k08, B_y_76_:B_y_54_; Unpack luma values MPYU4 .1x B_y_3210, A_k08, A_y_32_:A_y_10_; Shift right by 3 SUB2 .2 B_y_76_, B_n16, B_y_76 ; Subtract Luma bias SUB2 .2 B_y_54_, B_n16, B_y_54 ; Subtract Luma bias SUB2 .1 A_y_32_, B_n16, A_y_32 ; Subtract Luma bias SUB2 .1 A_y_10_, B_n16, A_y_10 ; Subtract Luma bias MPYSU4 .2 B_cr6420_, B_k08, B_cr64:B_cr20 ; Shift Cr by 3 MPYSU4 .1 A_cb6420_, A_k08, A_cb64:A_cb20 ; Shift Cb by 3 MPY2 .2 B_y_76, B_c0, B_y_7_c0:B_y_6_c0; Performing matrix multiplication MPY2 .2 B_y_54, B_c0, B_y_5_c0:B_y_4_c0 MPY2 .1 A_y_32, A_c0, A_y_3_c0:A_y_2_c0 MPY2 .1 A_y_10, A_c0, A_y_1_c0:A_y_0_c0 MPY2 .2 B_cr64, B_c1, B_cr6_c1:B_cr4_c1 MPY2 .2 B_cr20, B_c1, B_cr2_c1:B_cr0_c1 MPY2 .1 A_cb64, A_c4, A_cb6_c4:A_cb4_c4 MPY2 .1 A_cb20, A_c4, A_cb2_c4:A_cb0_c4 PACKH2 .2x B_cr64, A_cb64, B_cr6cb6 PACK2 .2x B_cr64, A_cb64, B_cr4cb4 PACKH2 .1x A_cb20, B_cr20, A_cb2cr2 PACK2 .1x A_cb20, B_cr20, A_cb0cr0 DOTP2 .2 B_cr6cb6, B_c3c2, B_cg6 ; Add cbX_c2 to crX_c3 DOTP2 .2 B_cr4cb4, B_c3c2, B_cg4 DOTP2 .1 A_cb2cr2, A_c2c3, A_cg2 DOTP2 .1 A_cb0cr0, A_c2c3, A_cg0 ADD .2 B_y_7_c0, B_cr6_c1, B_r_7 ; Generate all R, G & B pixels ADD .2 B_y_6_c0, B_cr6_c1, B_r_6 ; Pixels are now in 16Q16 format ADD .2 B_y_5_c0, B_cr4_c1, B_r_5 ; due to earlier multiplication ADD .2 B_y_4_c0, B_cr4_c1, B_r_4 ; of the Y pixels by 3 ADD .1x A_y_3_c0, B_cr2_c1, A_r_3 ADD .1x A_y_2_c0, B_cr2_c1, A_r_2 ADD .1x A_y_1_c0, B_cr0_c1, A_r_1 ADD .1x A_y_0_c0, B_cr0_c1, A_r_0 ADD .2 B_y_7_c0, B_cg6, B_g_7 ADD .2 B_y_6_c0, B_cg6, B_g_6 ADD .2 B_y_5_c0, B_cg4, B_g_5 ADD .2 B_y_4_c0, B_cg4, B_g_4 ADD .1 A_y_3_c0, A_cg2, A_g_3 ADD .1 A_y_2_c0, A_cg2, A_g_2 ADD .1 A_y_1_c0, A_cg0, A_g_1 ADD .1 A_y_0_c0, A_cg0, A_g_0 ADD .2x B_y_7_c0, A_cb6_c4, B_b_7 ADD .2x B_y_6_c0, A_cb6_c4, B_b_6 ADD .2x B_y_5_c0, A_cb4_c4, B_b_5 ADD .2x B_y_4_c0, A_cb4_c4, B_b_4 ADD .1 A_y_3_c0, A_cb2_c4, A_b_3 ADD .1 A_y_2_c0, A_cb2_c4, A_b_2 ADD .1 A_y_1_c0, A_cb0_c4, A_b_1 ADD .1 A_y_0_c0, A_cb0_c4, A_b_0 PACKH2 .2 B_r_7, B_r_6, B_r_76 ; Pack results to 16 bits PACKH2 .2 B_r_5, B_r_4, B_r_54 PACKH2 .1 A_r_3, A_r_2, A_r_32 PACKH2 .1 A_r_1, A_r_0, A_r_10 PACKH2 .2 B_g_7, B_g_6, B_g_76 PACKH2 .2 B_g_5, B_g_4, B_g_54 PACKH2 .1 A_g_3, A_g_2, A_g_32 PACKH2 .1 A_g_1, A_g_0, A_g_10 PACKH2 .2 B_b_7, B_b_6, B_b_76 PACKH2 .2 B_b_5, B_b_4, B_b_54 PACKH2 .1 A_b_3, A_b_2, A_b_32 PACKH2 .1 A_b_1, A_b_0, A_b_10 SPACKU4 .2 B_r_76, B_r_54, B_r_7654 ; Saturate and pack results SPACKU4 .1 A_r_32, A_r_10, A_r_3210 ; to 8 bit values SPACKU4 .2 B_g_76, B_g_54, B_g_7654 SPACKU4 .1 A_g_32, A_g_10, A_g_3210 SPACKU4 .2 B_b_76, B_b_54, B_b_7654 SPACKU4 .1 A_b_32, A_b_10, A_b_3210 AND .2 B_r_7654, B_msk5, B_r_7654_; Apply mask to get upper 5 bits AND .2 B_g_7654, A_msk6, B_g_7654_; Apply mask to get upper 6 bits AND .2 B_b_7654, B_msk5, B_b_7654_; Apply mask to get upper 5 bits AND .1 A_r_3210, B_msk5, A_r_3210_ AND .1 A_g_3210, A_msk6, A_g_3210_ AND .1 A_b_3210, B_msk5, A_b_3210_ MPYU4 .2 B_r_7654_, A_k80, B_r7_r6:B_r5_r4 ; r << 7 MPYU4 .1 A_r_3210_, A_k80, A_r3_r2:A_r1_r0 ; r << 7 MPYU4 .2 B_g_7654_, B_k08, B_g7_g6:B_g5_g4 ; g << 3 MPYU4 .1 A_g_3210_, A_k08, A_g3_g2:A_g1_g0 ; g << 3 ROTL .2 B_b_7654_, 29, B_b_7654__ ROTL .1 A_b_3210_, 29, A_b_3210__ MPYU4 .2 B_b_7654__, B_k01, B_b7_b6:B_b5_b4 ; b >> 3 MPYU4 .1 A_b_3210__, A_k01, A_b3_b2:A_b1_b0 ; b >> 3 ADDAH .2 B_b7_b6, B_r7_r6, B_r_b76 ; (r<<8)|(b>>3) ADDAH .2 B_b5_b4, B_r5_r4, B_r_b54 ; (r<<8)|(b>>3) ADDAH .1 A_b3_b2, A_r3_r2, A_r_b32 ; (r<<8)|(b>>3) ADDAH .1 A_b1_b0, A_r1_r0, A_r_b10 ; (r<<8)|(b>>3) ADD .2 B_r_b76, B_g7_g6, B_rgb76 ADD .2 B_r_b54, B_g5_g4, B_rgb54 ADD .1 A_r_b32, A_g3_g2, A_rgb32 ADD .1 A_r_b10, A_g1_g0, A_rgb10 STDW .D2T2 B_rgb76:B_rgb54, *B_rgb_ptr++[2] STDW .D1T1 A_rgb32:A_rgb10, *A_rgb_data++[2] BDEC .2 loop, B_loopcnt .return .endproc* ========================================================================= ** End of file: ycbcr422pl_to_rgb565_p.sa ** ------------------------------------------------------------------------- ** Copyright (c) 2001 Texas Instruments, Incorporated. ** All Rights Reserved. ** ========================================================================= *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -