📄 coldfire_mpegimda.s
字号:
//------------------------------------------------------------------------------//// File : MPEGIMDA.a//// Author : St閜hane TAVENARD//// $VER: MPEGIMDA.a 0.1 (10/05/1997)//// (C) Copyright 1997-1997 St閜hane TAVENARD// All Rights Reserved//// #Rev| Date | Comment// ----|----------|--------------------------------------------------------// 0 |04/03/1997| Initial revision ST// 1 |10/05/1997| use of link instead of static vars ST//// ------------------------------------------------------------------------//// MPEG IMDCT optimized !////------------------------------------------------------------------------------ .globl MPEGIMDA_hybrid .text#define IMMED ##define IMDCT_BITS 14// Perform an IMDCT//// a0: in array (16-bit)// a1: out array (16-bit)// a2: prev block (16-bit)// d0.w: block type// d1.w: mixed (0 or 1)// d2.w: sb_max//MPEGIMDA_hybrid: sub.l #44, sp moveml d2-d7/a2-a6,sp@ move.l sp@(48), a0 move.l sp@(52), a1 move.l sp@(56), a2 move.l sp@(60), d0 move.l sp@(64), d1 move.l sp@(68), d2 move.l a2,a3 // a3 = prev block ext.l d0 // Get these as longs for sanity reasons ext.l d1 ext.l d2 clr.l d5 tst.l d1 jbeq MPEGIMDA_h1 // mixed -> sb 0 & 1 to win 0 lea pc@(imdct_win0),a2 move.l a5,-(sp) move.l d0,-(sp) move.l d2,-(sp) jbsr imdct_l move.l (sp)+,d2 move.l (sp)+,d0 move.l (sp)+,a5 add.l #2*18,a0 // in += 18 addq.l #2,a1 // out++; add.l #2*18,a3 // prev += 18 addq.l #1,d5 // addw cmp.l d2,d5 // cmpw jbge MPEGIMDA_h5 // end of imdct lea pc@(imdct_win0_odd),a2 move.l a5,-(sp) move.l d0,-(sp) move.l d2,-(sp) jbsr imdct_l move.l (sp)+,d2 move.l (sp)+,d0 move.l (sp)+,a5 add.l #2*18,a0 // in += 18 addq.l #2,a1 // out++; add.l #2*18,a3 // prev += 18 addq.l #1,d5 //addw cmp.l d2,d5 //cmpw jbge MPEGIMDA_h5 // end of imdctMPEGIMDA_h1: cmp.l #2,d0 jbeq MPEGIMDA_h3 // short blocks // Long blocksMPEGIMDA_h2: move.l imdct_win@GOT(%a5),a2 move.l (a2,d0.l*4),a2 move.l d0,-(sp) move.l d2,-(sp) move.l a5,-(sp) jbsr imdct_l move.l (sp)+,a5 move.l (sp)+,d2 move.l (sp)+,d0 add.l #2*18,a0 // in += 18 addq.l #2,a1 // out++; add.l #2*18,a3 // prev += 18 addq.l #1,d5 // addw cmp.l d2,d5 // cmpw jbge MPEGIMDA_h5 // end of imdct move.l imdct_win_odd@GOT(%a5),a2 move.l (a2,d0.l*4), a2 move.l d0,-(sp) move.l d2,-(sp) move.l a5,-(sp) jbsr imdct_l move.l (sp)+,a5 move.l (sp)+,d2 move.l (sp)+,d0 add.l #2*18,a0 // in += 18 addq.l #2,a1 // out++; add.l #2*18,a3 // prev += 18 addq.l #1,d5 // addw cmp.l d2,d5 // cmpw jbge MPEGIMDA_h5 // end of imdct jbra MPEGIMDA_h2 // Short blocksMPEGIMDA_h3: lea pc@(imdct_win2),a2 move.l d0,-(sp) move.l d2,-(sp) move.l a5,-(sp) jbsr imdct_s move.l (sp)+,a5 move.l (sp)+,d2 move.l (sp)+,d0 add.l #2*18,a0 // in += 18 addq.l #2,a1 // out++; add.l #2*18,a3 // prev += 18 addq.l #1,d5 //addw cmp.l d2,d5 //cmpw jbge MPEGIMDA_h5 // end of imdct lea pc@(imdct_win2_odd),a2 move.l d0,-(sp) move.l d2,-(sp) move.l a5,-(sp) jbsr imdct_s move.l (sp)+,a5 move.l (sp)+,d2 move.l (sp)+,d0 add.l #2*18,a0 // in += 18 addq.l #2,a1 // out++; add.l #2*18,a3 // prev += 18 addq.l #1,d5 //addw cmp.l d2,d5 //cmpw jbge MPEGIMDA_h5 // end of imdct jbra MPEGIMDA_h3 // End of imdct -> overlap with 0 rest of bandsMPEGIMDA_h5: cmp.l #32,d5 //cmpw jbge MPEGIMDA_h7 clr.l d1MPEGIMDA_h6: move.w (a3),0*2*32(a1) move.w d1,(a3)+ move.w (a3),1*2*32(a1) move.w d1,(a3)+ move.w (a3),2*2*32(a1) move.w d1,(a3)+ move.w (a3),3*2*32(a1) move.w d1,(a3)+ move.w (a3),4*2*32(a1) move.w d1,(a3)+ move.w (a3),5*2*32(a1) move.w d1,(a3)+ move.w (a3),6*2*32(a1) move.w d1,(a3)+ move.w (a3),7*2*32(a1) move.w d1,(a3)+ move.w (a3),8*2*32(a1) move.w d1,(a3)+ move.w (a3),9*2*32(a1) move.w d1,(a3)+ move.w (a3),10*2*32(a1) move.w d1,(a3)+ move.w (a3),11*2*32(a1) move.w d1,(a3)+ move.w (a3),12*2*32(a1) move.w d1,(a3)+ move.w (a3),13*2*32(a1) move.w d1,(a3)+ move.w (a3),14*2*32(a1) move.w d1,(a3)+ move.w (a3),15*2*32(a1) move.w d1,(a3)+ move.w (a3),16*2*32(a1) move.w d1,(a3)+ move.w (a3),17*2*32(a1) move.w d1,(a3)+ addq.l #2,a1 // out++ addq.l #1,d5 //addw cmp.l #32,d5 //cmpw jblt MPEGIMDA_h6MPEGIMDA_h7: movem.l (sp),d2-d7/a2-a6 add.l #44, sp rts#define K0 16368#define K1 16244#define K2 15996#define K3 15626#define K4 15137#define K5 14533#define K6 13818#define K7 12998#define K8 12080#define K9 11069#define K10 9974#define K11 8803#define K12 7565#define K13 6270#define K14 4927#define K15 3546#define K16 2139#define K17 715#define MUL32(p1, p2) \ muls.l p1,p2 ; \ asr.l d6,p2// S a, <dummy reg>, <dest reg>// performs: (INT32)x[ a ] - (INT32)x[ 11-a ] - (INT32)x[ 12+a ]//#define S(p1,p2,p3) \ move.w p1*2(a0),p3 ;\ ext.l p3 ;\ move.w 22-p1*2(a0),p2 ;\ ext.l p2 ;\ sub.l p2,p3 ;\ move.w 24+p1*2(a0),p2 ;\ ext.l p2 ;\ sub.l p2,p3// M xi, Kx, <dest reg>// performs: ((INT32)x[ xi ] * (Kx))//#define M(p1,p2,p3) \ move.w p1*2(a0),p3 ;\ muls.w IMMED p2,p3//// M_ADD xi, Kx// performs: M xi, Kx, d0// add.l d0,d3//#define M_ADD(p1, p2) \ M (p1,p2,d0) ;\ add.l d0,d3//// M_SUB xi, Kx// performs: M xi, Kx, d0// sub.l d0,d3//#define M_SUB(p1, p2) \ M (p1,p2,d0) ;\ sub.l d0,d3/* These macros are similar to the above, but they use the mac * unit to do the computation. This disadvantage is that the break * even is at 3 M_ADD/M_SUB operations and no access to the * intermediate sum is possible. */#ifdef COLDFIRE_MAC#define MC(p1, p2) \ dc.w 0xa13c; \ dc.l 0; \ move.w IMMED p2, d0; \ move.w p1*2(a0), d3; \ dc.w 0xa003; \ dc.w 0x0000#define MC_ADD(p1, p2) \ move.w IMMED p2, d0; \ move.w p1*2(a0), d3; \ dc.w 0xa003; \ dc.w 0x0000#define MC_SUB(p1, p2) \ move.w IMMED p2, d0; \ move.w p1*2(a0), d3; \ dc.w 0xa003; \ dc.w 0x0100#define MC_FIN dc.w 0xa183#else#define MC(p1, p2) M(p1, p2, d3)#define MC_ADD(p1, p2) M_ADD(p1, p2)#define MC_SUB(p1, p2) M_SUB(p1, p2)#define MC_FIN#endif// MT ti, Kx, <dest reg>// performs: (t[ ti ] * (Kx))//#define MT(p1, p2, p3) \ move.l p1*4(a3),p3 ;\ muls.w IMMED p2,p3//// MT_ADD ti, Kx// performs: M ti, Kx, d0// add.l d0,d3//#define MT_ADD(p1, p2) \ MT (p1,p2,d0) ;\ add.l d0,d3//// MT_SUB ti, Kx// performs: MT ti, Kx, d0// sub.l d0,d3//#define MT_SUB(p1, p2) \ MT (p1,p2,d0) ;\ sub.l d0,d3//// IMDCT_FIX <reg>// performs <reg> = <reg> >> IMDCT_BITS//#define IMDCT_FIX(p1) asr.l d6,p1// W <reg>, wi -> <reg> -> out[ wi ]// performs: (<reg> * win[ wi ]) >> WIN_BITS + prev[ wi ] -> out[ wi ]//#define W(p1, p2) \ muls.w p2*2(a2),p1 ; \ asr.l d6,p1 ; \ /* add.w p2*2(a5),p1 */ \ move.w p2*2(a5),d7; \ add.l d7, p1; \ move.w p1,p2*2*32(a1)// WP <reg>, wi -> <reg> -> prev[ wi ]// performs: (<reg> * win[ wi ]) >> WIN_BITS -> prev[ wi-18 ]//#define WP(p1, p2) \ muls.w p2*2(a2),p1 ;\ asr.l d6,p1 ;\ move.w p1,p2*2-36(a5)//// IMDCT for Long blocks//// a0: input x array (16-bit)// a1: output out (16-bit)// a2: window array (16-bit)// a3: prev array (32-bit)imdct_l: link a6,#-10*4 // need 4+6 longs move.l a3,a5 lea -4*4(a6),a3 // t needs 4 longs lea -6*4(a3),a4 // s needs 6 longs// lea imdct_sum_t,a3// lea imdct_sum_s,a4 moveq.l #IMDCT_BITS,d6 M (4,K13,d1) M (13,K4,d0) sub.l d0,d1 // k1 = M( 4, K13 ) - M( 13, K4 ) M (4,K4,d2) M (13,K13,d0) add.l d0,d2 // k2 = M( 4, K4 ) + M( 13, K13 )// s[ 0 ] = -M( 1, K7 ) + k1 + M( 7, K1 ) + M( 10, K16 ) - M( 16, K10 ) M (7,K1,d3) M_SUB (1,K7) M_ADD (10,K16) M_SUB (16,K10) add.l d1,d3 move.l d3,0*4(a4)// s[ 1 ] = -M( 1, K4 ) - k1 + M( 7, K13 ) + M( 10, K4 ) + M( 16, K13 ) M (7,K13,d3) M_SUB (1,K4) M_ADD (10,K4) M_ADD (16,K13) sub.l d1,d3 move.l d3,1*4(a4)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -