📄 coldfire_mpegimda.s
字号:
// W3 <reg>, wi -> <reg> * win[ wi ] + out[ wi ] -> out[ wi ]// performs: (<reg> * win[ wi ]) >> WIN_BITS + out[ wi ] -> out[ wi ]//#define W3(p1,p2) \ muls.w p2*2(a2),p1 ;\ asr.l d6,p1 ;\ add.w p1,p2*2(a1)// W31 <reg>, oi, wi -> <reg> * win[ wi ] + prev[ oi ] -> out[ oi*32 ]// performs: (<reg> * win[ wi ]) >> WIN_BITS + prev[ oi ] -> out[ oi*32 ]//#define W31(p1,p2,p3) \ muls.w p3*2(a2),p1 ;\ asr.l d6,p1 ;\ add.l p2*2(a5),p1 ;\ move.w p1,p2*2*32(a1)// W32 <reg>, oi, wi -> <reg> * win[ wi ] -> out[ oi*32 ]// performs: (<reg> * win[ wi ]) >> WIN_BITS -> out[ oi*32 ]//#define W32(p1,p2,p3) \ muls.w p3*2(a2),p1 ;\ asr.l d6,p1 ;\ move.w p1,p2*2*32(a1)// W33 <reg>, oi, wi -> <reg> * win[ wi ] + out[ oi*32 ] -> out[ oi*32 ]// performs: (<reg> * win[ wi ]) >> WIN_BITS + out[ oi*32 ] -> out[ oi*32 ]//#define W33(p1,p2,p3) \ muls.w p3*2(a2),p1 ; \ asr.l d6,p1 ; \ /* add.w p1,p2*2*32(a1) */ \ move.w p2*2*32(a5),d7; \ add.l p1, d7; \ move.w d7, p2*2*32(a5)// W34 <reg>, oi, wi -> <reg> * win[ wi ] -> prev[ oi ]// performs: (<reg> * win[ wi ]) >> WIN_BITS -> prev[ oi ]//#define W34(p1,p2,p3) \ muls.w p3*2(a2),p1 ;\ asr.l d6,p1 ;\ move.w p1,p2*2(a5)// W35 <reg>, oi, wi -> <reg> * win[ wi ] + prev[ oi ] -> prev[ oi ]// performs: (<reg> * win[ wi ]) >> WIN_BITS + prev[ oi ] -> prev[ oi ]//#define W35(p1,p2,p3) \ muls.w p3*2(a2),p1; \ asr.l d6,p1; \ /* add.w p1,p2*2(a5) */ \ move.w p2*2(a5),d7; \ add.l p1, d7; \ move.w d7,p2*2(a5)//// IMDCT for Short blocks//// a0: input x array (16-bit)// a1: output out (16-bit)// a2: window array (16-bit)// a3: prev array (32-bit)imdct_s:// move.w #$F00,$DFF180 link a6,#-4*4 // need 2+2 longs move.l a3,a5 lea -2*4(a6),a3 // t needs 2 longs lea -2*4(a3),a4 // s needs 2 longs// lea imdct_sum_t,a3// lea imdct_sum_s,a4 moveq.l #IMDCT_BITS,d6 // STEP 1 // O( 0..5 ) = prev[ 0.. 5 ] move.w 0*2(a5),0*2*32(a1) move.w 1*2(a5),1*2*32(a1) move.w 2*2(a5),2*2*32(a1) move.w 3*2(a5),3*2*32(a1) move.w 4*2(a5),4*2*32(a1) move.w 5*2(a5),5*2*32(a1) // Calc s[0..1], t[0..1] // M3 (1,K1,d3) M3_ADD (4,K4) move.l d3,0*4(a4) // s[ 0 ] = M3( 1, K1 ) + M3( 4, K4 ) M3 (1,K4,d3) M3_SUB (4,K1) move.l d3,1*4(a4) // s[ 1 ] = M3( 1, K4 ) - M3( 4, K1 ) move.w 0*6(a0),d3 ext.l d3 sub.l 3*6(a0),d3 move.l d3,0*4(a3) // t[ 0 ] = x[0*3] - x[3*3] move.w 2*6(a0),d3 ext.l d3 add.l 5*6(a0),d3 move.l d3,1*4(a3) // t[ 1 ] = x[2*3] + x[5*3] // 0 M3 (0,K3,d3) M3_SUB (2,K5) M3_ADD (3,K0) M3_SUB (5,K2) sub.l 0*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 neg.l d4 W31 (d3,6,0) W31 (d4,11,5) // 1 MT (0,K4,d3) MT_ADD (1,K1) sub.l 0*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 neg.l d4 W31 (d3,7,1) W31 (d4,10,4) // 2 M3 (0,K5,d3) M3_ADD (2,K3) M3_SUB (3,K2) M3_SUB (5,K0) sub.l 1*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 neg.l d4 W31 (d3,8,2) W31 (d4,9,3) // 3+3 M3 (0,K2,d3) neg.l d3 M3_ADD (2,K0) M3_ADD (3,K5) M3_SUB (5,K3) add.l 1*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 W32 (d3,12,6) W32 (d4,17,11) // 4+3 MT (0,K1,d3) neg.l d3 MT_ADD (1,K4) sub.l 1*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 W32 (d3,13,7) W32 (d4,16,10) // 5+3 M3 (0,K0,d3) neg.l d3 M3_SUB (2,K2) M3_SUB (3,K3) M3_SUB (5,K5) sub.l 0*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 W32 (d3,14,8) W32 (d4,15,9) // STEP 2 addq.l #2,a0 // in++; // Calc s[0..1], t[0..1] // M3 (1,K1,d3) M3_ADD (4,K4) move.l d3,0*4(a4) // s[ 0 ] = M3( 1, K1 ) + M3( 4, K4 ) M3 (1,K4,d3) M3_SUB (4,K1) move.l d3,1*4(a4) // s[ 1 ] = M3( 1, K4 ) - M3( 4, K1 ) move.w 0*6(a0),d3 ext.l d3 sub.l 3*6(a0),d3 move.l d3,0*4(a3) // t[ 0 ] = x[0*3] - x[3*3] move.w 2*6(a0),d3 ext.l d3 add.l 5*6(a0),d3 move.l d3,1*4(a3) // t[ 1 ] = x[2*3] + x[5*3] // 0 M3 (0,K3,d3) M3_SUB (2,K5) M3_ADD (3,K0) M3_SUB (5,K2) sub.l 0*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 neg.l d4 W33 (d3,12,0) W33 (d4,17,5) // 1 MT (0,K4,d3) MT_ADD (1,K1) sub.l 0*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 neg.l d4 W33 (d3,13,1) W33 (d4,16,4) // 2 M3 (0,K5,d3) M3_ADD (2,K3) M3_SUB (3,K2) M3_SUB (5,K0) sub.l 1*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 neg.l d4 W33 (d3,14,2) W33 (d4,15,3) // 3+3 M3 (0,K2,d3) neg.l d3 M3_ADD (2,K0) M3_ADD (3,K5) M3_SUB (5,K3) add.l 1*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 W34 (d3,0,6) W34 (d4,5,11) // 4+3 MT (0,K1,d3) neg.l d3 MT_ADD (1,K4) sub.l 1*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 W34 (d3,1,7) W34 (d4,4,10) // 5+3 M3 (0,K0,d3) neg.l d3 M3_SUB (2,K2) M3_SUB (3,K3) M3_SUB (5,K5) sub.l 0*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 W34 (d3,2,8) W34 (d4,3,9) // STEP 3 addq.l #2,a0 // in++; // Calc s[0..1], t[0..1] // M3 (1,K1,d3) M3_ADD (4,K4) move.l d3,0*4(a4) // s[ 0 ] = M3( 1, K1 ) + M3( 4, K4 ) M3 (1,K4,d3) M3_SUB (4,K1) move.l d3,1*4(a4) // s[ 1 ] = M3( 1, K4 ) - M3( 4, K1 ) move.w 0*6(a0),d3 ext.l d3 sub.l 3*6(a0),d3 move.l d3,0*4(a3) // t[ 0 ] = x[0*3] - x[3*3] move.w 2*6(a0),d3 ext.l d3 add.l 5*6(a0),d3 move.l d3,1*4(a3) // t[ 1 ] = x[2*3] + x[5*3] // 0 M3 (0,K3,d3) M3_SUB (2,K5) M3_ADD (3,K0) M3_SUB (5,K2) sub.l 0*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 neg.l d4 W35 (d3,0,0) W35 (d4,5,5) // 1 MT (0,K4,d3) MT_ADD (1,K1) sub.l 0*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 neg.l d4 W35 (d3,1,1) W35 (d4,4,4) // 2 M3 (0,K5,d3) M3_ADD (2,K3) M3_SUB (3,K2) M3_SUB (5,K0) sub.l 1*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 neg.l d4 W35 (d3,2,2) W35 (d4,3,3) // 3+3 M3 (0,K2,d3) neg.l d3 M3_ADD (2,K0) M3_ADD (3,K5) M3_SUB (5,K3) add.l 1*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 W34 (d3,6,6) W34 (d4,11,11) // 4+3 MT (0,K1,d3) neg.l d3 MT_ADD (1,K4) sub.l 1*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 W34 (d3,7,7) W34 (d4,10,10) // 5+3 M3 (0,K0,d3) neg.l d3 M3_SUB (2,K2) M3_SUB (3,K3) M3_SUB (5,K5) sub.l 0*4(a4),d3 IMDCT_FIX( d3) move.l d3,d4 W34 (d3,8,8) W34 (d4,9,9) subq.l #4,a0 // in -=2 (restore in) move.l a5,a3 unlk a6 rtsimdct_win0: dc.w 715, 2139, 3546, 4927, 6270, 7565 dc.w 8803, 9974, 11069, 12080, 12998, 13818 dc.w 14533, 15137, 15626, 15996, 16244, 16368 dc.w 16368, 16244, 15996, 15626, 15137, 14533 dc.w 13818, 12998, 12080, 11069, 9974, 8803 dc.w 7565, 6270, 4927, 3546, 2139, 715imdct_win0_odd:dc.w 715, -2139, 3546, -4927, 6270, -7565 dc.w 8803, -9974, 11069, -12080, 12998, -13818 dc.w 14533, -15137, 15626, -15996, 16244, -16368 dc.w 16368, -16244, 15996, -15626, 15137, -14533 dc.w 13818, -12998, 12080, -11069, 9974, -8803 dc.w 7565, -6270, 4927, -3546, 2139, -715imdct_win1: dc.w 715, 2139, 3546, 4927, 6270, 7565 dc.w 8803, 9974, 11069, 12080, 12998, 13818 dc.w 14533, 15137, 15626, 15996, 16244, 16368 dc.w 16384, 16384, 16384, 16384, 16384, 16384 dc.w 16244, 15137, 12998, 9974, 6270, 2139 dc.w 0, 0, 0, 0, 0, 0imdct_win1_odd:dc.w 715, -2139, 3546, -4927, 6270, -7565 dc.w 8803, -9974, 11069, -12080, 12998, -13818 dc.w 14533, -15137, 15626, -15996, 16244, -16368 dc.w 16384, -16384, 16384, -16384, 16384, -16384 dc.w 16244, -15137, 12998, -9974, 6270, -2139 dc.w 0, 0, 0, 0, 0, 0imdct_win3: dc.w 0, 0, 0, 0, 0, 0 dc.w 2139, 6270, 9974, 12998, 15137, 16244 dc.w 16384, 16384, 16384, 16384, 16384, 16384 dc.w 16368, 16244, 15996, 15626, 15137, 14533 dc.w 13818, 12998, 12080, 11069, 9974, 8803 dc.w 7565, 6270, 4927, 3546, 2139, 715imdct_win3_odd:dc.w 0, 0, 0, 0, 0, 0 dc.w 2139, -6270, 9974, -12998, 15137, -16244 dc.w 16384, -16384, 16384, -16384, 16384, -16384 dc.w 16368, -16244, 15996, -15626, 15137, -14533 dc.w 13818, -12998, 12080, -11069, 9974, -8803 dc.w 7565, -6270, 4927, -3546, 2139, -715imdct_win2: dc.w 2139, 6270, 9974, 12998, 15137, 16244 dc.w 16244, 15137, 12998, 9974, 6270, 2139imdct_win2_odd:dc.w 2139, -6270, 9974, -12998, 15137, -16244 dc.w 16244, -15137, 12998, -9974, 6270, -2139.section .rodataimdct_win: dc.l imdct_win0 dc.l imdct_win1 dc.l imdct_win2 dc.l imdct_win3imdct_win_odd: dc.l imdct_win0_odd dc.l imdct_win1_odd dc.l imdct_win2_odd dc.l imdct_win3_odd
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -