📄 scale_vert_h.asm
字号:
* For cols = 800 and l_hh = 4, cycles = 1056. *
* *
* CODESIZE *
* 612 bytes *
* ------------------------------------------------------------------------- *
* Copyright (c) 2000 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
.sect ".data:copyright_h"
_Copyright: .string "Copyright (C) 2000 Texas Instruments Incorporated. "
.string "All Rights Reserved."
.include "scale_vert_h.h62"
*==============================================================================*
_scale_vert_asm:
*================= SYMBOLIC REGISTER ASSIGNMENTS: =============================*
.asg A0, A_filter ; copy of filter pointer inc.
.asg A1, A_taps ; number of taps used
.asg A2, A_taps_ ; copy of number of taps used
.asg A3, A_n_x_ ; copy of length of line
.asg A4, A_p01 ; temp sum of products
.asg A4, A_p20 ; temp sum of products
.asg A4, A_p202 ; temp sum of products
.asg A4, A_ptr_plane_x ; pntr to input data buffer
.asg A4, A_x11x01 ; line 0:1 data 1
.asg A4, A_x32x22 ; line 2:3 data 2
.asg A5, A_ka ; index along buffer
.asg A5, A_p02 ; temp sum of products
.asg A6, A_n_x ; width of line in shorts
.asg A6, A_p00 ; temp sum of products
.asg A6, A_p200 ; temp sum of products
.asg A6, A_p203 ; temp sum of products
.asg A6, A_x33x23 ; line 2:3 data 3
.asg A7, A_p201 ; temp sum of products
.asg A7, A_p23 ; temp sum of products
.asg A8, A_mod_hh ; point to rotated filter
.asg A8, A_p22 ; temp sum of products
.asg A8, A_x31x21 ; line 2:3 data 1
.asg A9, A_ptr_ln_y ; pntr to even outputs
.asg A10, A_start_ln ; start line into input buffer
.asg A10, A_x13x03 ; line 0:1 data 3
.asg A11, A_ptr_ln0_x ; line0 data 0 to 3
.asg A12, A_ptr_ln1_x ; line1 data 0 to 3
.asg A13, A_ptr_ln2_x ; line2 data 0 to 3
.asg A14, A_ptr_ln3_x ; line3 data 0 to 3
.asg A0, A_SP ; Stack pointer, A datapath
.asg A16, A_t_y10 ; packed outputs 1:0
.asg A16, A_x11x10 ; line 1 data 1:0
.asg A17, A_p03 ; temp sum of products
.asg A17, A_t_y32 ; packed outputs 3:2
.asg A17, A_x10x00 ; line 0:1 data 0
.asg A17, A_x12x02 ; line 0:1 data 2
.asg A17, A_x13x12 ; line 1 data 3:2
.asg A18, A_x21x20 ; line 2 data 1:0
.asg A18, A_x30x20 ; line 2:3 data 0
.asg A19, A_x23x22 ; line 2 data 3:2
.asg A20, A_p21 ; temp sum of products
.asg A20, A_x31x30 ; line 3 data 1:0
.asg A21, A_x33x32 ; line 3 data 3:2
.asg A22, A_h1h0 ; filter taps i:i+1
.asg A23, A_h3h2 ; filter taps i+2:i+3
.asg A24, A_x01x00 ; line 0 data 1:0
.asg A25, A_x03x02 ; line 0 data 3:2
.asg A26, A_y0 ; acumulater 0
.asg A27, A_y1 ; acumulater 1
.asg A28, A_y2 ; acumulater 2
.asg A29, A_y3 ; acumulater 3
.asg A30, A_hh ; base address of rotated filter
.asg A31, A_l_hh ; copy of length of filter
.asg B0, B_j ; count in filter rotate loop
.asg B0, B_p07 ; temp sum of products
.asg B0, B_p207 ; temp sum of products
.asg B0, B_start_ln ; copy of start_line
.asg B0, B_x14x04 ; line 0:1 data 4
.asg B1, B_no_gie ; CSR w/ GIE bit cleared
.asg B1, B_p24 ; temp sum of products
.asg B1, B_p27 ; temp sum of products
.asg B2, B_csr ; CSR's value
.asg B2, B_k ; count to rotate filter
.asg B2, B_pro ; prologue predication
.asg B3, B_ret ; Return address
.asg B3, B_x36x26 ; line 2:3 data 6
.asg B4, B_p06 ; temp sum of products
.asg B4, B_p206 ; temp sum of products
.asg B4, B_ptr_plane_y ; pntr to output line
.asg B4, B_x34x24 ; line 2:3 data 4
.asg B5, B_filter ; copy of pointto filter
.asg B5, B_p05 ; temp sum of products
.asg B5, B_p205 ; temp sum of products
.asg B5, B_p26 ; temp sum of products
.asg B6, B_ptr_hh ; pntr to filter
.asg B6, B_x17x07 ; line 0:1 data 7
.asg B7, B_i ; main scale_vert loop count % 32
.asg B8, B_l_hh ; length of filter in shorts
.asg B8, B_ptr_ln_y ; pntr to odd outputs
.asg B9, B_mod_hh ; copy of filter pntr
.asg B9, B_x15x05 ; line 0:1 data 5
.asg B10, B_x35x25 ; line 2:3 data 5
.asg B11, B_ka ; index along buffer
.asg B12, B_ptr_ln0_x ; line0 data 4 to 7
.asg B13, B_ptr_ln1_x ; line1 data 4 to 7
.asg B14, B_ptr_ln2_x ; line2 data 4 to 7
.asg B15, B_SP ; Stack pointer, B datapath
.asg B16, B_ptr_ln3_x ; line3 data 4 to 7
.asg B17, B_n_x ; copy of length of line
.asg B18, B_block ; amount of data in buffer - 8
.asg B19, B_zero ; constant of 0x00000000
.asg B20, B_p25 ; temp sum of products
.asg B20, B_x15x14 ; line 1 data 5:4
.asg B21, B_p04 ; temp sum of products
.asg B21, B_p204 ; temp sum of products
.asg B21, B_x17x16 ; line 1 data 7:6
.asg B22, B_t_y54 ; packed outputs 5:4
.asg B22, B_x35x34 ; line 3 data 5:4
.asg B22, B_x37x27 ; line 2:3 data 7
.asg B23, B_t_y76 ; packed outputs 7:6
.asg B23, B_x37x36 ; line 3 data 7:6
.asg B24, B_x05x04 ; line 0 data 5:4
.asg B25, B_hh_i ; values in rotated filter
.asg B25, B_x07x06 ; line 0 data 7:6
.asg B25, B_x16x06 ; line 0:1 data 6
.asg B26, B_x25x24 ; line 2 data 5:4
.asg B27, B_x27x26 ; line 2 data 7:6
.asg B28, B_y6 ; acumulater 6
.asg B29, B_y7 ; acumulater 7
.asg B30, B_y4 ; acumulater 4
.asg B31, B_y5 ; acumulater 5
*==============================================================================*
;-
B .S2 BLOCK ; block interupts
STW .D2T2 B14, *B_SP--[14] ; Save B14, get stk
|| MV B_SP, A_SP ; Twin Stack Ptr
STDW .D1T1 A15:A14, *-A_SP[1] ; Save A15, A14
|| STDW .D2T2 B13:B12, *+B_SP[5] ; Save B13, B12
|| MVC .S2 CSR, B_csr ; Remember CSR
STDW .D1T1 A13:A12, *-A_SP[3] ; Save A13, A12
|| STDW .D2T2 B11:B10, *+B_SP[3] ; Save B11, B10
|| AND B_csr, -2, B_no_gie ; Clear GIE
;-
STDW .D1T1 A11:A10, *-A_SP[5] ; Save A11, A10
|| STDW .D2T2 B_ret:B_csr,*+B_SP[1] ; Save Return, CSR
|| MVC .S2 B_no_gie, CSR ; Disable ints
|| MV .L1X B_l_hh, A_l_hh ;copy l_hh
|| MV .L2X A_start_ln, B_start_ln ;copy start line
ADDAH .D2 B_ptr_hh, B_l_hh, B_filter ;rotated filter start
|| MV .L2X A_n_x, B_n_x ;copy cols
||[!B_start_ln]MV.S2 B_l_hh, B_start_ln ;if(!start)start=l_hh
BLOCK:
SUBAH .D2 B_filter, B_start_ln, B_filter ;ptr_hh+l_hh-start
|| ADD .S2X A_ptr_plane_x, 8, B_ptr_ln0_x ;ptr_ln1_x=plane_x+1
|| MV .S1 A_ptr_plane_x, A_ptr_ln0_x ;ptr_ln0_x=plane_x
SUB .L2 B_l_hh, 1, B_k ;for(i=0;i<l_hh;i++){
|| MV .S2X A_mod_hh, B_mod_hh ;copy rotated filter
|| ADDAH .D2 B_ptr_ln0_x, B_n_x, B_ptr_ln1_x ;next line
*==============================================================================*
LOOP_Y:
LDH .D2T2 *B_filter++,B_hh_i ;load filter tap
||[ B_k]B .S2 LOOP_Y ;for(i=0;i<l_hh;i++){
SUB .D2 B_j, 1, B_j ;j--
[!B_j]MV .L2 B_ptr_hh, B_filter ;if(!j)filter= ptr_hh
|| MPY .M2X B_n_x, A_l_hh, B_block ;block=n_x*l_hh shorts
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -