📄 mpegimda-60.a
字号:
;------------------------------------------------------------------------------;; File : MPEGIMDA.a;; Author : St閜hane TAVENARD;; $VER: MPEGIMDA.a 0.1 (10/05/1997);; (C) Copyright 1997-1997 St閜hane TAVENARD; All Rights Reserved;; #Rev| Date | Comment; ----|----------|--------------------------------------------------------; 0 |04/03/1997| Initial revision ST; 1 |10/05/1997| use of link instead of static vars ST;; ------------------------------------------------------------------------;; MPEG IMDCT optimized !;;------------------------------------------------------------------------------ XDEF @MPEGIMDA_hybrid XDEF _MPEGIMDA_hybrid section ASMCODE,codeIMDCT_BITS equ 14; Perform an IMDCT;; a0: in array (16-bit); a1: out array (16-bit); a2: prev block (16-bit); d0.w: block type; d1.w: mixed (0 or 1); d2.w: sb_max;@MPEGIMDA_hybrid_MPEGIMDA_hybrid movem.l d2-d7/a2-a6,-(sp) move.l a2,a3 ; a3 = prev block clr.w d5 tst.w d1 beq.b MPEGIMDA_h1 ; mixed -> sb 0 & 1 to win 0 lea imdct_win0,a2 movem.w d0/d2,-(sp) bsr.w imdct_l movem.w (sp)+,d0/d2 add.l #2*18,a0 ; in += 18 addq.l #2,a1 ; out++; add.l #2*18,a3 ; prev += 18 addq.w #1,d5 cmp.w d2,d5 bge.w MPEGIMDA_h5 ; end of imdct lea imdct_win0_odd,a2 movem.w d0/d2,-(sp) bsr.w imdct_l movem.w (sp)+,d0/d2 add.l #2*18,a0 ; in += 18 addq.l #2,a1 ; out++; add.l #2*18,a3 ; prev += 18 addq.w #1,d5 cmp.w d2,d5 bge.w MPEGIMDA_h5 ; end of imdctMPEGIMDA_h1 cmp.w #2,d0 beq.b MPEGIMDA_h3 ; short blocks ; Long blocksMPEGIMDA_h2 lea imdct_win,a2 move.l (a2,d0.w*4),a2 movem.w d0/d2,-(sp) bsr.w imdct_l movem.w (sp)+,d0/d2 add.l #2*18,a0 ; in += 18 addq.l #2,a1 ; out++; add.l #2*18,a3 ; prev += 18 addq.w #1,d5 cmp.w d2,d5 bge.b MPEGIMDA_h5 ; end of imdct lea imdct_win_odd,a2 move.l (a2,d0.w*4),a2 movem.w d0/d2,-(sp) bsr.w imdct_l movem.w (sp)+,d0/d2 add.l #2*18,a0 ; in += 18 addq.l #2,a1 ; out++; add.l #2*18,a3 ; prev += 18 addq.w #1,d5 cmp.w d2,d5 bge.b MPEGIMDA_h5 ; end of imdct bra.b MPEGIMDA_h2 ; Short blocksMPEGIMDA_h3 lea imdct_win2,a2 movem.w d0/d2,-(sp) bsr.w imdct_s movem.w (sp)+,d0/d2 add.l #2*18,a0 ; in += 18 addq.l #2,a1 ; out++; add.l #2*18,a3 ; prev += 18 addq.w #1,d5 cmp.w d2,d5 bge.b MPEGIMDA_h5 ; end of imdct lea imdct_win2_odd,a2 movem.w d0/d2,-(sp) bsr.w imdct_s movem.w (sp)+,d0/d2 add.l #2*18,a0 ; in += 18 addq.l #2,a1 ; out++; add.l #2*18,a3 ; prev += 18 addq.w #1,d5 cmp.w d2,d5 bge.b MPEGIMDA_h5 ; end of imdct bra.b MPEGIMDA_h3 ; End of imdct -> overlap with 0 rest of bandsMPEGIMDA_h5 cmp.w #32,d5 bge.b MPEGIMDA_h7 clr.l d1MPEGIMDA_h6 move.w (a3),0*2*32(a1) move.w d1,(a3)+ move.w (a3),1*2*32(a1) move.w d1,(a3)+ move.w (a3),2*2*32(a1) move.w d1,(a3)+ move.w (a3),3*2*32(a1) move.w d1,(a3)+ move.w (a3),4*2*32(a1) move.w d1,(a3)+ move.w (a3),5*2*32(a1) move.w d1,(a3)+ move.w (a3),6*2*32(a1) move.w d1,(a3)+ move.w (a3),7*2*32(a1) move.w d1,(a3)+ move.w (a3),8*2*32(a1) move.w d1,(a3)+ move.w (a3),9*2*32(a1) move.w d1,(a3)+ move.w (a3),10*2*32(a1) move.w d1,(a3)+ move.w (a3),11*2*32(a1) move.w d1,(a3)+ move.w (a3),12*2*32(a1) move.w d1,(a3)+ move.w (a3),13*2*32(a1) move.w d1,(a3)+ move.w (a3),14*2*32(a1) move.w d1,(a3)+ move.w (a3),15*2*32(a1) move.w d1,(a3)+ move.w (a3),16*2*32(a1) move.w d1,(a3)+ move.w (a3),17*2*32(a1) move.w d1,(a3)+ addq.l #2,a1 ; out++ addq.w #1,d5 cmp.w #32,d5 blt.b MPEGIMDA_h6MPEGIMDA_h7 movem.l (sp)+,d2-d7/a2-a6 rtsK0 set 16368K1 set 16244K2 set 15996K3 set 15626K4 set 15137K5 set 14533K6 set 13818K7 set 12998K8 set 12080K9 set 11069K10 set 9974K11 set 8803K12 set 7565K13 set 6270K14 set 4927K15 set 3546K16 set 2139K17 set 715MUL32 MACRO muls.l \1,\2 ; ##7 asr.l d6,\2 ; ##7 ENDM; S a, <dummy reg>, <dest reg>; performs: (INT32)x[ a ] - (INT32)x[ 11-a ] - (INT32)x[ 12+a ];S MACRO move.w \1*2(a0),\3 ext.l \3 move.w 22-\1*2(a0),\2 ext.l \2 sub.l \2,\3 move.w 24+\1*2(a0),\2 ext.l \2 sub.l \2,\3 ENDM; M xi, Kx, <dest reg>; performs: ((INT32)x[ xi ] * (Kx));M MACRO move.w \1*2(a0),\3 muls.w #\2,\3 ENDM;; M_ADD xi, Kx; performs: M xi, Kx, d0; add.l d0,d3;M_ADD MACRO M \1,\2,d0 add.l d0,d3 ENDM;; M_SUB xi, Kx; performs: M xi, Kx, d0; sub.l d0,d3;M_SUB MACRO M \1,\2,d0 sub.l d0,d3 ENDM; MT ti, Kx, <dest reg>; performs: (t[ ti ] * (Kx));MT MACRO move.l \1*4(a3),\3 muls.w #\2,\3 ENDM;; MT_ADD ti, Kx; performs: M ti, Kx, d0; add.l d0,d3;MT_ADD MACRO MT \1,\2,d0 add.l d0,d3 ENDM;; MT_SUB ti, Kx; performs: MT ti, Kx, d0; sub.l d0,d3;MT_SUB MACRO MT \1,\2,d0 sub.l d0,d3 ENDM;; IMDCT_FIX <reg>; performs <reg> = <reg> >> IMDCT_BITS;IMDCT_FIX MACRO asr.l d6,\1 ENDM; W <reg>, wi -> <reg> -> out[ wi ]; performs: (<reg> * win[ wi ]) >> WIN_BITS + prev[ wi ] -> out[ wi ];W MACRO muls.w \2*2(a2),\1 asr.l d6,\1 add.w \2*2(a5),\1 move.w \1,\2*2*32(a1) ENDM; WP <reg>, wi -> <reg> -> prev[ wi ]; performs: (<reg> * win[ wi ]) >> WIN_BITS -> prev[ wi-18 ];WP MACRO muls.w \2*2(a2),\1 asr.l d6,\1 move.w \1,\2*2-36(a5) ENDM;; IMDCT for Long blocks;; a0: input x array (16-bit); a1: output out (16-bit); a2: window array (16-bit); a3: prev array (32-bit)imdct_l link a6,#-10*4 ; need 4+6 longs move.l a3,a5 lea -4*4(a6),a3 ; t needs 4 longs lea -6*4(a3),a4 ; s needs 6 longs; lea imdct_sum_t,a3; lea imdct_sum_s,a4 moveq.l #IMDCT_BITS,d6 M 4,K13,d1 M 13,K4,d0 sub.l d0,d1 ; k1 = M( 4, K13 ) - M( 13, K4 ) M 4,K4,d2 M 13,K13,d0 add.l d0,d2 ; k2 = M( 4, K4 ) + M( 13, K13 ); s[ 0 ] = -M( 1, K7 ) + k1 + M( 7, K1 ) + M( 10, K16 ) - M( 16, K10 ) M 7,K1,d3 M_SUB 1,K7 M_ADD 10,K16 M_SUB 16,K10 add.l d1,d3 move.l d3,0*4(a4); s[ 1 ] = -M( 1, K4 ) - k1 + M( 7, K13 ) + M( 10, K4 ) + M( 16, K13 ) M 7,K13,d3 M_SUB 1,K4 M_ADD 10,K4 M_ADD 16,K13 sub.l d1,d3 move.l d3,1*4(a4); s[ 2 ] = -M( 1, K1 ) - k2 - M( 7, K7 ) - M( 10, K10 ) - M( 16, K16 ) M 7,K7,d3 neg.l d3 M_SUB 1,K1 M_SUB 10,K10 M_SUB 16,K16 sub.l d2,d3 move.l d3,2*4(a4); s[ 3 ] = -M( 1, K10 ) + k2 + M( 7, K16 ) - M( 10, K1 ) + M( 16, K7 ) M 7,K16,d3 M_SUB 1,K10 M_SUB 10,K1 M_ADD 16,K7 add.l d2,d3 move.l d3,3*4(a4); s[ 4 ] = -M( 1, K13 ) + k2 - M( 7, K4 ) + M( 10, K13 ) - M( 16, K4 ) M 10,K13,d3 M_SUB 7,K4 M_SUB 1,K13 M_SUB 16,K4 add.l d2,d3 move.l d3,4*4(a4); s[ 5 ] = -M( 1, K16 ) + k1 - M( 7, K10 ) + M( 10, K7 ) + M( 16, K1 ) M 10,K7,d3 M_SUB 7,K10 M_SUB 1,K16 M_ADD 16,K1 add.l d1,d3 move.l d3,5*4(a4); S a, <dummy reg>, <dest reg> S 0,d0,d3 move.l d3,0*4(a3)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -