⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scale_vert_h.asm

📁 基于DM642平台的视频缩小放大功能 程序源代码
💻 ASM
📖 第 1 页 / 共 3 页
字号:

  [!B_j]MV      .S2   A_l_hh,     B_j                     ;if(!j)j=l_hh;
||      ADDAH   .D2   B_ptr_ln1_x, B_n_x,     B_ptr_ln2_x ;3rd line index
||      ADDAH   .D1   A_ptr_ln0_x, A_n_x,     A_ptr_ln1_x ;1st line index
;=
  [ B_k]SUB     .L2   B_k,        1,          B_k         ;if(k)k++
||      ADDAH   .D2   B_ptr_ln2_x, B_n_x,     B_ptr_ln3_x ;3rd line 
||      ADDAH   .D1   A_ptr_ln1_x, A_n_x,     A_ptr_ln2_x ;2nd line
||      MV      .S1   A_mod_hh,   A_hh                    ;copy new filter start

        STH     .D2T2 B_hh_i,     *B_mod_hh++             ;store rotated values
||      ADDAH   .D1   A_ptr_ln2_x, A_n_x,     A_ptr_ln3_x ;3rd line copy
||      MPY     .M2   0,          B_zero,     B_zero      ;const = 0
||      MV      .L1   A_mod_hh,   A_filter                ;filter = mod_hh;
        ; BRANCH OCCURS
*==============================================================================*
        LDDW    .D2T2 *B_ptr_ln0_x[0], B_x07x06:B_x05x04  ;x7654=*(ptr_ln0_x+ka)
||      LDDW    .D1T1 *A_ptr_ln0_x[0], A_x03x02:A_x01x00  ;x3210=*(ptr_ln0_x+ka)
||      SHRU    .S2   B_block,    2,          B_block     ;double words

        LDDW    .D2T2 *B_ptr_ln2_x[0], B_x27x26:B_x25x24  ;x7654=*(ptr_ln2_x+ka)
||      LDDW    .D1T1 *A_ptr_ln3_x[0], A_x33x32:A_x31x30  ;x3210=*(ptr_ln3_x+ka)
||      B       .S1   LOOPX7+20                           ;collpase prolog
||      SUB     .S2   B_block,    2,          B_block     ;block+=2 
||      MPY     .M2   B_l_hh,     B_n_x,      B_i         ;i=0;i<n_y*l_hh;i+=32)
;=
        LDDW    .D2T2 *B_ptr_ln1_x[0], B_x17x16:B_x15x14  ;x7654=*(ptr_ln1_x+ka)
||      LDDW    .D1T1 *A_ptr_ln2_x[0], A_x23x22:A_x21x20  ;x3210=*(ptr_ln2_x+ka)
||      B       .S2   LOOPX8+16                           ;collpase prolog

        LDDW    .D2T2 *B_ptr_ln3_x[0], B_x37x36:B_x35x34  ;x7654=*(ptr_ln3_x+ka)
||      B       .S1   LOOPX9+16                           ;collpase prolog
||      ADD     .S2   B_ptr_plane_y, 8,       B_ptr_ln_y  ;ln0_y = plane_y
||      ZERO    .L2   B_y7:B_y6                           ;y7 = y6 = 0x0
||      ZERO    .L1   A_y3:A_y2                           ;y3 = y2 = 0x0
;=
        LDDW    .D1T1 *A_ptr_ln1_x[0], A_x13x12:A_x11x10  ;x3210=*(ptr_ln1_x+ka)
||      B       .S1   LOOPX                               ;collpase prolog
||      MV      .L1X  B_ptr_plane_y,          A_ptr_ln_y  ;ptr_ln0_y=ptr_plane_y

        MVK     .S1   1,          A_taps_                 ;prologue code
||      MPY     .M1   0,          A_ka,       A_ka        ;ka = 0
||      ZERO    .L1   A_y1:A_y0                           ;y0 = y1 = 0x0;
||      ZERO    .L2   B_y5:B_y4                           ;y4 = y5 = 0x0;
||      SHRU    .S2   B_i,        5,          B_i         ;i = i/32
;=
        LDDW    .D1T1 *A_filter++[1],        A_h3h2:A_h1h0;h3h2h1h0=*filter++
||      SUB     .S1   A_l_hh,     4,          A_taps      ;taps_count=l_hh
||      MV      .L1   A_n_x,      A_n_x_                  ;copy cols 
||      ZERO    .D2   B_ka                                ;ka = 0
||      MVK     .S2   1,          B_pro                   ;prologue code
||      SUB     .L2   B_i,        1,          B_i         ;i++ 
*============================== PIPE LOOP KERNEL ==============================*
LOOPX:
 [!B_pro]ADD    .L2   B_y7,       B_p207,   B_y7          ;y7+=p207
||      PACK2   .S2   B_x17x16,   B_x07x06, B_x16x06      ;transpose 2x2 data
||      PACK2   .S1   A_x13x12,   A_x03x02, A_x12x02      ;transpose 2x2 data
||      PACK2   .L1   A_x31x30,   A_x21x20, A_x30x20      ;transpose 2x2 data
||      LDDW    .D2T2 *B_ptr_ln0_x[B_ka],B_x07x06:B_x05x04;x7654=*(ptr_ln0_x+ka)
||      LDDW    .D1T1 *A_ptr_ln0_x[A_ka],A_x03x02:A_x01x00;x3210=*(ptr_ln0_x+ka)

  [!A_taps_]MPY2.M2   B_zero,     B_zero,   B_y7:B_y6     ;if(!sample)y7y6=0:0
||      ADD     .S2   B_p26,      B_p06,    B_p206        ;4 pt filter 6 sum
||      ADD     .L2   B_p24,      B_p04,    B_p204        ;4 pt filter 4 sum
||      ADD     .L1   A_p20,      A_p00,    A_p200        ;4 pt filter 0 sum
||      DOTP2   .M1   A_x12x02,   A_h1h0,   A_p02         ;x[1,2]x[0,2]'h[1]h[0]
||      PACK2   .S1   A_x11x10,   A_x01x00, A_x10x00      ;transpose 2x2 data
||      LDDW    .D2T2 *B_ptr_ln2_x[B_ka],B_x27x26:B_x25x24;x7654=*(ptr_ln2_x+ka)
||      LDDW    .D1T1 *A_ptr_ln3_x[A_ka],A_x33x32:A_x31x30;x3210=*(ptr_ln3_x+ka)

  [!B_pro]ADD   .L2   B_y6,       B_p206,   B_y6          ;col 6 accumulater
||[!B_pro]ADD   .L1   A_y0,       A_p200,   A_y0          ;col 0 accumulater
||      DOTP2   .M2X  B_x36x26,   A_h3h2,   B_p26         ;x[3,6]x[2,6]'h[3]h[2]
||      PACK2   .S2   B_x15x14,   B_x05x04, B_x14x04      ;transpose 2x2 data
||      PACKH2  .S1   A_x33x32,   A_x23x22, A_x33x23      ;transpose 2x2 data
||      DOTP2   .M1   A_x10x00,   A_h1h0,   A_p00         ;x[1,0]x[0,0]'h[1]h[0]
||      LDDW    .D2T2 *B_ptr_ln1_x[B_ka],B_x17x16:B_x15x14;x7654=*(ptr_ln1_x+ka)
||      LDDW    .D1T1 *A_ptr_ln2_x[A_ka],A_x23x22:A_x21x20;x3210=*(ptr_ln2_x+ka)

        ADD     .S1   A_p23,      A_p03,    A_p203        ;4 pt filter 3 sum
||      ADD     .D1   A_p21,      A_p01,    A_p201        ;4 pt filter 1 sum
||      PACKH2  .S2   B_x37x36,   B_x27x26, B_x37x27      ;transpose 2x2 block
||      DOTP2   .M2X  B_x16x06,   A_h1h0,   B_p06         ;x[1,6]x[0,6]'h[1]h[0]
||      PACK2   .L2   B_x35x34,   B_x25x24, B_x34x24      ;transpose 2x2 block
||      DOTP2   .M1   A_x33x23,   A_h3h2,   A_p23         ;x[3,3]x[2,3]'h[3]h[2]
||      PACK2   .L1   A_x33x32,   A_x23x22, A_x32x22      ;transpose 2x2 block
||      LDDW    .D2T2 *B_ptr_ln3_x[B_ka],B_x37x36:B_x35x34;x7654=*(ptr_ln3_x+ka)

        BDEC    .S2   LOOPX,      B_i                     ;}
||      PACKH2  .L2   B_y7,       B_y6,     B_t_y76       ;data ready for store
||      ADD     .D2   B_p25,      B_p05,    B_p205        ;4 pt filter sum 5
||[!B_pro]ADD   .L1   A_y1,       A_p201,   A_y1          ;col 1 accumulater
||      DOTP2   .M2X  B_x37x27,   A_h3h2,   B_p27         ;x[3,7]x[2,7]'h[3]h[2]
||      DOTP2   .M1   A_x32x22,   A_h3h2,   A_p22         ;x[3,2]x[2,2]'h[3]h[2]
||      PACKH2  .S1   A_x11x10,   A_x01x00, A_x11x01      ;traspose 2x2 block
||      LDDW    .D1T1 *A_ptr_ln1_x[A_ka],A_x13x12:A_x11x10;x3210=*(ptr_ln1_x+ka)

        PACKH2  .S1   A_y1,       A_y0,     A_t_y10       ;data ready for store
||[!B_pro]ADD   .L2   B_y5,       B_p205,   B_y5          ;col 5 accumulater
||[!B_pro]ADD   .D2   B_y4,       B_p204,   B_y4          ;col 4 accumulater
||[!B_pro]ADD   .D1   A_y3,       A_p203,   A_y3          ;col 3 accumulater
||      DOTP2   .M2X  B_x17x07,   A_h1h0,   B_p07         ;x[1,7]x[0,7]'h[1]h[0]
||[!A_taps]MV   .L1   A_l_hh:A_hh,         A_taps:A_filter;if(!taps)filter=hh
||      MVD     .M1   A_taps,     A_taps_                 ;sample=taps_count

  [!A_taps_]ZERO.L2   B_y5:B_y4                           ;if(!sample)y5y4=0:0
||[!A_taps_]ZERO.L1   A_y1:A_y0                           ;if(!sample)y1y0=0:0
||      ZERO    .D2   B_pro                               ;collapse prolog
||      PACKH2  .S2   B_y5,       B_y4,     B_t_y54       ;data ready for store 
||      DOTP2   .M2X  B_x34x24,   A_h3h2,   B_p24         ;x[3,4]x[2,4]'h[3]h[2]
||      DOTP2   .M1   A_x30x20,   A_h3h2,   A_p20         ;x[3,0]x[2,0]'h[3]h[2]
||      ADD     .S1   A_taps,     -4,       A_taps        ;taps_count -= 4;   
||      LDDW    .D1T1 *A_filter++[1],       A_h3h2:A_h1h0 ;h3h2h1h0=*filter++
LOOPX7:
  [!A_taps_]STDW.D2T2 B_t_y76:B_t_y54, *B_ptr_ln_y++[2]   ;if(!samp)*ln_y++=y7-4
||[!A_taps_]ZERO.L1   A_y3:A_y2                           ;if(!samp)y3y2 = 0:0
||      PACKH2  .S1   A_y3,       A_y2,     A_t_y32       ;data ready for store
||      DOTP2   .M2X  B_x14x04,   A_h1h0,   B_p04         ;x[1,4]x[0,4]'h[1]h[0]
||      DOTP2   .M1   A_x31x21,   A_h3h2,   A_p21         ;x[3,1]x[2,1]'h[3]h[2]
||      PACKH2  .L2   B_x17x16,   B_x07x06, B_x17x07      ;transpose 2x2 block
||[!A_taps]SUB  .S2   B_ka,       B_block,  B_ka          ;if(!taps)ka-=block+8 
LOOPX8:
  [!A_taps_]STDW.D1T1 A_t_y32:A_t_y10, *A_ptr_ln_y++[2]   ;if(!samp)*ln_y=y3210
||      DOTP2   .M2X  B_x15x05,   A_h1h0,   B_p05         ;x[1,5]x[,.5]'h[1]h[0]
||      ADD     .L1   A_p22,      A_p02,    A_p202        ;4 pt filter sum col 2
||      DOTP2   .M1   A_x11x01,   A_h1h0,   A_p01         ;x[1,1]x[,.1]'h[1]h[0]
||      PACK2   .L2   B_x37x36,   B_x27x26, B_x36x26      ;transpose 2x2 block
||      PACKH2  .S2   B_x15x14,   B_x05x04, B_x15x05      ;transpose 2x2 block
||      PACKH2  .S1   A_x31x30,   A_x21x20, A_x31x21      ;transpose 2x2 block
LOOPX9:
        ADD     .L2   B_p27,      B_p07,    B_p207        ;4 pt filter sum col 7
||      DOTP2   .M2X  B_x35x25,   A_h3h2,   B_p25         ;x[3,5]x[2,5]'h[3]h[2]
||      ADD     .D1   A_y2,       A_p202,   A_y2          ;col 2 accumulater
||      DOTP2   .M1   A_x13x03,   A_h1h0,   A_p03         ;x[1,3]x[0,3]'h[1]h[0]
||      PACKH2  .S2   B_x35x34,   B_x25x24, B_x35x25      ;transpose 2x2 block
||      PACKH2  .S1   A_x13x12,   A_x03x02, A_x13x03      ;transpose 2x2 block 
||      ADD     .D2   B_ka,       B_n_x,    B_ka          ;ka+=4*n_x
||      ADD     .L1X  A_n_x_,     B_ka,     A_ka          ;ka+=4*n_x 
*============================= PIPE LOOP EPILOG ===============================*
        LDDW    .D2T2 *+B_SP[1],  B_ret:B_csr             ; Get rtn, CSR
||      MV            B_SP,       A_SP                    ; Twin Stack Ptr

        LDDW    .D1T1 *+A_SP[2],  A11:A10                 ; Restore A11, A10
||      LDDW    .D2T2 *+B_SP[3],  B11:B10                 ; Restore B11, B10

        LDDW    .D1T1 *+A_SP[4],  A13:A12                 ; Restore A13, A12
||      LDDW    .D2T2 *+B_SP[5],  B13:B12                 ; Restore B13, B12

        LDDW    .D1T1 *+A_SP[6],  A15:A14                 ; Restore A15, A14

        LDW     .D2T2 *++B_SP[14],B14                     ; Restore B14, ...
                                                          ; ...release stack
        BNOP    .S2   B_ret,      4                       ; Return to caller

        MVC     .S2   B_csr,      CSR                     ; Restore CSR

; ===== Branch Occurs =====
; ===== Interruptibility state restored here =====

* ========================================================================= *
*   End of file:  scale_vert_h.asm                                          *
* ------------------------------------------------------------------------- *
*             Copyright (c) 2000 Texas Instruments, Incorporated.           *
*                            All Rights Reserved.                           *
* ========================================================================= *

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -