📄 pix_expand_h.asm
字号:
* ========================================================================= *
* TEXAS INSTRUMENTS, INC. *
* *
* NAME *
* pix_expand *
* *
* USAGE *
* This routine is C-callable and can be called as: *
* *
* void pix_expand_asm *
* ( *
* int n, /* # of elements */ *
* const unsigned char *restrict in_data, /* Input data */ *
* short *restrict out_data /* Output data */ *
* ) *
* *
* *
* DESCRIPTION *
* The code takes an array of bytes and promotes them to half-words *
* by zero-extension. *
* *
* This is the C equivalent of the assembly code, without *
* restrictions. The assembly code has restrictions, as noted below. *
* *
* void pix_expand *
* ( *
* int n, *
* const unsigned char *restrict in_data, *
* short *restrict out_data *
* ) *
* { *
* int i; *
* *
* for (i = 0; i < n; i++) *
* out_data[i] = in_data[i]; *
* } *
* *
* ASSUMPTIONS *
* Input and output arrays must be double-word (8-byte) aligned. *
* *
* The input must be at least 16 elements long and contain a *
* multiple of 16 elements. *
* *
* NOTE *
* Interrupts are masked during the entire duration of this *
* function, as the entire function occurs within branch delay slots. *
* *
* MEMORY NOTE *
* No bank conflicts occur. This is a LITTLE ENDIAN implementation. *
* *
* TECHNIQUES *
* The loop is unrolled 16 times, loading bytes with LDDW. It uses *
* UNPKHU4 and UNPKLU4 to unpack the data and store the results with *
* STDW. *
* *
* To shave a few extra cycles from the function, the return branch *
* is issued from within the kernel. *
* *
* CYCLES *
* cycles = 3 * (n / 16) + 15. *
* For n = 1072, cycles = 216. *
* *
* CODESIZE *
* 100 bytes. *
* *
* ------------------------------------------------------------------------- *
* Copyright (c) 2000 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
.include "pix_expand_h.h62"
_pix_expand_asm:
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
.asg A0, A_i
.asg B1, B_i
.asg A2, A_p_10
.asg A3, A_p_32
.asg A4, A_n
.asg A6, A_o0
.asg A7, A_i1
.asg A8, A_p_3210
.asg A8, A_p_98
.asg A9, A_p_7654
.asg A9, A_p_BA
.asg B0, B_p
.asg B3, B_ret
.asg B4, B_i0
.asg B5, B_o1
.asg B6, B_p_BA98
.asg B6, B_p_DC
.asg B7, B_p_FE
.asg B7, B_p_FEDC
.asg B8, B_p_54
.asg B9, B_p_76
* ========================================================================= *
* =========================== PIPE LOOP PROLOG ============================ *
B loop ;[ 4,1]
ADD B_i0, 8, A_i1
|| ADD A_o0, 8, B_o1
|| SHR A_n, 4, A_i
LDDW .D1T2 *A_i1++[2], B_p_FEDC:B_p_BA98 ;[ 1,1]
|| LDDW .D2T1 *B_i0++[2], A_p_7654:A_p_3210 ;[ 1,1]
|| MVK 0xFFFF8000, B_p
* =========================== PIPE LOOP KERNEL ============================ *
loop:
[!B_p]STDW .D1T1 A_p_BA:A_p_98, *A_o0[2] ;[ 8,1]
||[!B_p]STDW .D2T2 B_p_FE:B_p_DC, *B_o1[2] ;[ 8,1]
||[ A_i]BPOS .S1 loop, A_i ;[ 5,2]
||[!A_i]B .S2 B_ret
|| SUB .L1 A_i, 1, A_i
|| ADD .L2X 1, A_i, B_i ;final load
||[ B_p]MPY .M2 B_p, 2, B_p
[!B_p]STDW .D1T1 A_p_32:A_p_10, *A_o0++[4] ;[ 9,1]
||[!B_p]STDW .D2T2 B_p_76:B_p_54, *B_o1++[4] ;[ 9,1]
|| UNPKLU4 .S1 A_p_3210, A_p_10 ;[ 6,2]
|| UNPKHU4 .L1 A_p_3210, A_p_32 ;[ 6,2]
|| UNPKLU4 .L2X A_p_7654, B_p_54 ;[ 6,2]
|| UNPKHU4 .S2X A_p_7654, B_p_76 ;[ 6,2]
UNPKLU4 .S1X B_p_BA98, A_p_98 ;[ 7,2]
|| UNPKHU4 .L1X B_p_BA98, A_p_BA ;[ 7,2]
|| UNPKLU4 .L2 B_p_FEDC, B_p_DC ;[ 7,2]
|| UNPKHU4 .S2 B_p_FEDC, B_p_FE ;[ 7,2]
||[ B_i]LDDW .D1T2 *A_i1++[2], B_p_FEDC:B_p_BA98 ;[ 1,4]
||[ B_i]LDDW .D2T1 *B_i0++[2], A_p_7654:A_p_3210 ;[ 1,4]
* ========================================================================= *
* ========================================================================= *
* End of file: pix_expand_h.asm *
* ------------------------------------------------------------------------- *
* Copyright (c) 2000 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -