📄 fdct_8x8.asm
字号:
DPACK2 .L2 B_F7F1, B_F7F1x, B_F7F7:B_F1F1 ;[31,1]
; stage 4
STW .D2T1 A_F6F6, *+B_o_ptr[12] ;[32,1]
||[!A_f] SWAP2 .S2 B_F7F7, B_F7F7 ;[32,1]
||[!A_f] SWAP2 .S1 A_F2F2, A_F2F2 ;[32,1]
|| DPACK2 .L1 A_P0P1, A_P0P1x, A_F0F0:A_F4F4 ;[32,1]
|| DPACK2 .L2 B_F3F5, B_F3F5x, B_F3F3:B_F5F5 ;[32,1]
||[!A_f] ROTL .M2 B_F1F1, 16, B_F1F1 ;[32,1]
STW .D2T1 A_F2F2, *-B_o_ptr[4] ;[33,1]
||[!A_f] SWAP2 .S2 B_F3F3, B_F3F3 ;[33,1]
||[!A_f] ROTL .M1 A_F4F4, 16, A_F4F4 ;[33,1]
STW .D2T2 B_F1F1, *-B_o_ptr[8] ;[34,1]
||[!A_f] ROTL .M2 B_F5F5, 16, B_F5F5 ;[34,1]
STW .D2T2 B_F7F7, *+B_o_ptr[16] ;[35,1]
||[!A_f] ROTL .M1 A_F0F0, 16, A_F0F0 ;[35,1]
STW .D2T2 B_F5F5, *+B_o_ptr[8] ;[36,1]
|| SUB .S1 1, A_f, A_f ;[36,1]
STW .D2T1 A_F4F4, *+B_o_ptr[4] ;[37,1]
|| SHR .S2 B_fix, 24, B_fix_ ;[37,1]
STW .D2T1 A_F0F0, *-B_o_ptr[12] ;[38,1]
|| ROTL .M2 B_fix, 8, B_fix ;[38,1]
SPKERNEL 3, 6
|| STW .D2T2 B_F3F3, *B_o_ptr++[B_fix_] ;[39,1]
* ========================================================================= *
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
.asg A25, A_i_ptr
.asg B6, B_i_ptr
.asg A24, A_o_ptr
.asg B7, B_o_ptr
.asg B17, B_C4nC4
.asg A5, A_D4D4
.asg A3, A_D6D2
.asg B18, B_D7D1
.asg B19, B_D3D5
.asg B21, B_f76
.asg B20, B_f45
.asg B23, B_f32
.asg B22, B_f01
.asg A29, A_f76
.asg A28, A_f45
.asg A21, A_f32
.asg A20, A_f01
.asg A21, A_g0g1
.asg A20, A_h2h3
.asg A23, A_h0h1
.asg A22, A_g2g3
.asg B23, B_g0g1
.asg B22, B_h2h3
.asg B29, B_h0h1
.asg B28, B_g2g3
.asg A27, A_p0p1
.asg A26, A_r0r1
.asg B31, B_p0p1
.asg B30, B_r0r1
.asg A19, A_h2g2
.asg A7, A_g3h3
.asg B8, B_h2g2
.asg B5, B_g3h3
.asg A6, A_s0q0
.asg B20, B_s0q0
.asg A18, A_F4F0
.asg A8, A_F6F2
.asg A7, A_S1Q1
.asg A6, A_S0Q0
.asg B9, B_F4F0
.asg B16, B_F6F2
.asg B27, B_S1Q1
.asg B26, B_S0Q0
.asg A9, A_F7F1
.asg A17, A_F3F5
.asg B5, B_F7F1
.asg B24, B_F3F5
.asg A29, A_F76
.asg A28, A_F54
.asg A17, A_F32
.asg A16, A_F10
.asg B25, B_F76
.asg B24, B_F54
.asg B27, B_F32
.asg B26, B_F10
* ========================================================================= *
ADD .L2X A_dct_data, 0, B_i_ptr
SPLOOPD 5
|| MVC .S2 B_count, ILC
|| ADD A_dct_data, 0, A_i_ptr
; stage 0
SPMASK
|| LDDW .D1T1 *+A_i_ptr[1], A_f76:A_f45 ;[ 1,1]
||^ MVKL .S2 cst_D1, B_D7D1
||^ MVKL .S1 cst_D2, A_D6D2
||^ ADDAH .D2 B_i_ptr, 8, B_i_ptr
SPMASK
|| LDDW .D1T1 *A_i_ptr++[4], A_f32:A_f01 ;[ 2,1]
|| LDDW .D2T2 *+B_i_ptr[1], B_f76:B_f45 ;[ 2,1]
||^ MVKLH .S2 cst_D7, B_D7D1
||^ MVKLH .S1 cst_D6, A_D6D2
SPMASK
|| LDDW .D2T2 *B_i_ptr++[4], B_f32:B_f01 ;[ 3,1]
||^ MVKL .S1 cst_D4, A_D4D4
||^ MVKL .S2 cst_nC4, B_C4nC4
||^ ADD .L2 A_dct_data, 0, B_o_ptr
||^ ADD .L1 A_dct_data, 0, A_o_ptr
SPMASK
||^ MVKLH .S2 cst_C4, B_C4nC4
||^ MVKLH .S1 cst_D4, A_D4D4
||^ ADDAH .D2 B_o_ptr, 8, B_o_ptr
SPMASK
||^ MVKL .S2 cst_D5, B_D3D5
; stage 1
SPMASK
||^ MVKLH .S2 cst_D3, B_D3D5
ADDSUB2 .L1 A_f32, A_f45, A_h0h1:A_g2g3 ;[ 7,1]
ADDSUB2 .L2 B_f32, B_f45, B_h0h1:B_g2g3 ;[ 8,1]
|| ADDSUB2 .L1 A_f01, A_f76, A_g0g1:A_h2h3 ;[ 8,1]
PACK2 .L1 A_g2g3, A_h2h3, A_g3h3 ;[ 9,1]
|| ADDSUB2 .L2 B_f01, B_f76, B_g0g1:B_h2h3 ;[ 9,1]
PACK2 .S2 B_g2g3, B_h2h3, B_g3h3 ;[10,1]
|| PACKH2 .S1 A_h2h3, A_g2g3, A_h2g2 ;[10,1]
|| CMPYR1 .M1X A_g3h3, B_C4nC4, A_s0q0 ;[10,1]
; stage 2
PACKH2 .S2 B_h2h3, B_g2g3, B_h2g2 ;[11,1]
|| ADDSUB2 .L1 A_g0g1, A_h0h1, A_p0p1:A_r0r1 ;[11,1]
|| CMPYR1 .M2 B_g3h3, B_C4nC4, B_s0q0 ;[11,1]
ADDSUB2 .L2 B_g0g1, B_h0h1, B_p0p1:B_r0r1 ;[12,1]
NOP 1
CMPYR .M1 A_p0p1, A_D4D4, A_F4F0 ;[14,1]
ADDSUB2 .L1 A_h2g2, A_s0q0, A_S1Q1:A_S0Q0 ;[15,1]
|| CMPYR .M2X B_r0r1, A_D6D2, B_F6F2 ;[15,1]
; stage 3
ADDSUB2 .L2 B_h2g2, B_s0q0, B_S1Q1:B_S0Q0 ;[16,1]
|| CMPYR .M1 A_r0r1, A_D6D2, A_F6F2 ;[16,1]
CMPYR .M1X A_S1Q1, B_D7D1, A_F7F1 ;[17,1]
|| CMPYR .M2X B_p0p1, A_D4D4, B_F4F0 ;[17,1]
CMPYR .M2 B_S1Q1, B_D7D1, B_F7F1 ;[18,1]
|| CMPYR .M1X A_S0Q0, B_D3D5, A_F3F5 ;[18,1]
CMPYR .M2 B_S0Q0, B_D3D5, B_F3F5 ;[19,1]
NOP 1
; stage 4
PACK2 .S1 A_F7F1, A_F4F0, A_F10 ;[21,1]
PACKH2 .S2 B_F7F1, B_F6F2, B_F76 ;[22,1]
|| PACKLH2 .S1 A_F3F5, A_F4F0, A_F54 ;[22,1]
PACKHL2 .S2 B_F3F5, B_F6F2, B_F32 ;[23,1]
|| PACKH2 .S1 A_F7F1, A_F6F2, A_F76 ;[23,1]
STDW .D1T1 A_F76:A_F54, *+A_o_ptr[1] ;[24,1]
|| PACKLH2 .S2 B_F3F5, B_F4F0, B_F54 ;[24,1]
|| PACKHL2 .S1 A_F3F5, A_F6F2, A_F32 ;[24,1]
STDW .D1T1 A_F32:A_F10, *A_o_ptr++[4] ;[25,1]
|| STDW .D2T2 B_F76:B_F54, *+B_o_ptr[1] ;[25,1]
|| PACK2 .L2 B_F7F1, B_F4F0, B_F10 ;[25,1]
; stage 5
SPKERNEL 3, 0
|| STDW .D2T2 B_F32:B_F10, *B_o_ptr++[4] ;[26,1]
;; below is overlapped with epilog.
RETNOP .S2 B3, 5
* ========================================================================= *
.end
* ======================================================================== *
* End of file: fdct_8x8.asm *
* ------------------------------------------------------------------------ *
* Copyright (C) 2005 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ======================================================================== *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -