📄 coldfire_mpegsubb.s
字号:
//------------------------------------------------------------------------------//// File : MPEGSUBB.a//// Author : St閜hane TAVENARD//// (C) Copyright 1997-1998 St閜hane TAVENARD// All Rights Reserved//// #Rev| Date | Comment// ----|----------|--------------------------------------------------------// 0 |10/04/1997| Initial revision ST// 1 |18/05/1997| Optimized windowing for '060 ST// 2 |01/11/1997| Use link instead of static vars ST// 3 |08/04/1998| Use overflow optimization from ... (IRC man) ST// 4 |21/06/1998| Use external dewindow ST//// ------------------------------------------------------------------------//// MPEG SUBroutines optimized !////------------------------------------------------------------------------------ .globl MPEGSUBB_antialias .globl MPEGSUBB_filter_band .globl MPEGSUBB_window_band .text#define INT_FACTOR 14#define ALIAS_BITS 15#define IMMED ##define IMM ##define _IMM#define DBRA(x,y) \ subq.l IMM 1, x; \ jbge y/* I tried using the coldfire MAC unit to improve these and * it slowed things down. Cycle counting indicated 10 without * the MAC and 9 with so there must be some fun stuff going on *///// ALIAS_U bu,csi,bd,cai,<tmp reg>,<reg>// performs: (bu * csi) - (bd * cai) -> <reg>// <tmp reg> is used for calculation// d6 must be set to ALIAS_BITS//#define ALIAS_U(p1,p2,p3,p4,p5,p6) \ move.w p1,p6; \ muls.w IMM p2,p6; \ move.w p3,p5; \ muls.w IMM p4,p5; \ sub.l p5,p6; \ asr.l d6,p6//// ALIAS_D bd,csi,bu,cai,<tmp reg>,<reg>// performs: (bd * csi) + (bu * cai) -> <reg>// <tmp reg> is used for calculation// d6 must be set to ALIAS_BITS//#define ALIAS_D(p1,p2,p3,p4,p5,p6) \ move.w p1,p6; \ muls.w IMM p2,p6; \ move.w p3,p5; \ muls.w IMM p4,p5; \ add.l p5,p6; \ asr.l d6,p6#define bd a1#define bu a2//// ALIAS_B csi,cai// perform 1 alias butterfly// uses d0..d3//#define ALIAS_B(p1,p2) \ move.w -(bu),d2 ; \ move.w (bd),d3 ; \ ALIAS_U (d2,p1,d3,p2,d0,d1) ; \ move.w d1,(bu) ; \ ALIAS_D (d3,p1,d2,p2,d0,d1) ; \ move.w d1,(bd)+// Layer III antialiasing filter//// a0 = sample buffer xr (INT16)// d0 = sblimit//MPEGSUBB_antialias: move.l sp@(4), a0 move.l sp@(8), d0 sub.l #28,sp movem.l d2-d7/a2,(sp) clr.l d7 move.w d0,d7 subq.l #1,d7 jbmi antialias2 clr.l d5 // d5 =i = 0 moveq.l #ALIAS_BITS,d6 // for alias multantialias1: add.l #18,d5 // i += SSLIMIT lea (a0,d5.l*2),bu // bu = xr[ i ] move.l bu,bd // bd = bu ALIAS_B (_IMM 28098,_IMM -16858) ALIAS_B (_IMM 28892,_IMM -15457) ALIAS_B (_IMM 31117,_IMM -10268) ALIAS_B (_IMM 32221,_IMM -5960) ALIAS_B (_IMM 32621,_IMM -3099) ALIAS_B (_IMM 32740,_IMM -1342) ALIAS_B (_IMM 32764,_IMM -465) ALIAS_B (_IMM 32767,_IMM -121) DBRA (d7, antialias1)antialias2: movem.l (sp),d2-d7/a2 add.l #28,sp rts#define ps1 a3#define ps2 a4#define pd1 a5//#define pd2 a6 // #2//// Filter COS values for fast cosine transform//#define SH 1 // In order to preserve bits in multiply#define SHIFT(x) ((x)/2)#define MUL_SHIFT 16-SH#define COS1_64 0x4014 // 0x8028>>SH#define COS3_64 0x40b3 // 0x8167>>SH#define COS5_64 0x41fa // 0x83f4>>SH#define COS7_64 0x43f9 // 0x87f2>>SH#define COS9_64 0x46cc // 0x8d98>>SH#define COS11_64 0x4a9d // 0x953b>>SH#define COS13_64 0x4fae // 0x9f5c>>SH#define COS15_64 0x5660 // 0xacc0>>SH#define COS17_64 0x5f4d // 0xbe9a>>SH#define COS19_64 0x6b70 // 0xd6e0>>SH#define COS21_64 0x7c7d // 0xf8fa>>SH#define COS23_64 0x95b0 // 0x12b60>>SH#define COS25_64 0xbdf9 // 0x17bf2>>SH#define COS27_64 0x10765 // 0x20ecb>>SH#define COS29_64 0x1b42c // 0x36859>>SH#define COS31_64 0x51852 // 0xa30a4>>SH#define COS1_32 0x404f // 0x809f>>SH#define COS3_32 0x42e1 // 0x85c2>>SH#define COS5_32 0x4891 // 0x9123>>SH#define COS7_32 0x52cb // 0xa596>>SH#define COS9_32 0x64e2 // 0xc9c5>>SH#define COS11_32 0x87c4 // 0x10f89>>SH#define COS13_32 0xdc79 // 0x1b8f2>>SH#define COS15_32 0x28cf2 // 0x519e5>>SH#define COS1_16 0x4141 // 0x8282>>SH#define COS3_16 0x4cf9 // 0x99f2>>SH#define COS5_16 0x7332 // 0xe665>>SH#define COS7_16 0x1480d // 0x2901b>>SH#define COS1_8 0x4546 // 0x8a8c>>SH#define COS3_8 0xa73d // 0x14e7b>>SH#define COS1_4 0x5a82 // 0xb505>>SH// Multiply an immediate constant with a register appropriately scaled#define MUL32I(p1,p2) \ move.l p1, d5; \ muls.l d5, p2; \ asr.l d6, p2// Multiply against a register with appropriate scaling// We can do almost every addressing mode here except for immediate :-(#define MUL32AM(p1,p2) \ muls.l p1, p2; \ asr.l d6, p2#define FFF_MUL(p1,p2) \ move.l (ps1)+,d0 ; \ move.l d0,d1 ; \ move.l -(ps2),d2 ; \ add.l d2,d1 ; \ move.l d1,(pd1)+ ; \ sub.l d2,d0 ; \ MUL32I (IMMED p1,d0) ; \ move.l d0,p2*4-4(pd1)//// FF4_MUL s1,s2,s3,s4// s1..s4 must be registers// d0, s1, s2 modified//// s1 + s4 -> (pd1)+// s2 + s3 -> (pd1)+// (s1 - s4) * COS1_8 -> (pd1)+// (s2 - s3) * COS3_8 -> (pd1)+//#define FF4_MUL(p1,p2,p3,p4) \ move.l p1,d0 ; \ add.l p4,d0 ; \ move.l d0,(pd1)+ ; \ move.l p2,d0 ; \ add.l p3,d0 ; \ move.l d0,(pd1)+ ; \ sub.l p4,p1 ; \ MUL32I (IMMED COS1_8,p1) ; \ move.l p1,(pd1)+ ; \ sub.l p3,p2 ; \ MUL32I (IMMED COS3_8,p2) ; \ move.l p2,(pd1)+//// FF2_MUL s1,s2,COSx// s1..s2, COSx must be registers// d0, s1 modified//// s1 + s2 -> (pd1)+// (s1 - s2) * COSx -> (pd1)+//#define FF2_MUL(p1,p2,p3) \ move.l p1,d0 ; \ add.l p2,d0 ; \ move.l d0,(pd1)+ ; \ sub.l p2,p1 ; \ MUL32AM (p3,p1) ; \ move.l p1,(pd1)+fast_filter_sub:// lea filter_p,ps1 // ps1=@p(0) lea -32*4(a6),ps1 // ps1=@p(0) lea 16*4(ps1),ps2 // ps2=@p(16)// lea filter_pp,pd1 // pd1=@pp(0) lea -16*4(a6),pd1 // pd1=@pp(0) FFF_MUL (COS1_32,8) // pp(i=0..7) = p(i) + p(15-i) FFF_MUL (COS3_32,8) // pp(i=8..15) = COSx*[p(i) - p(15-i)] FFF_MUL (COS5_32,8) FFF_MUL (COS7_32,8) FFF_MUL (COS9_32,8) FFF_MUL (COS11_32,8) FFF_MUL (COS13_32,8) FFF_MUL (COS15_32,8)// lea filter_pp,ps1 // ps1=@pp(0) lea -16*4(a6),ps1 // ps1=@pp(0) lea 8*4(ps1),ps2 // ps2=@pp(8)// lea filter_p,pd1 // pd1=@p(0) lea -32*4(a6),pd1 // pd1=@p(0) FFF_MUL (COS1_16,4) // p(i=0..3) = pp(i) + pp(7-i) FFF_MUL (COS3_16,4) // p(i=4..7) = COSx*[pp(i) - pp(7-i)] FFF_MUL (COS5_16,4) FFF_MUL (COS7_16,4) lea 4*4(ps1),ps1 // ps1=@pp(8) lea 8*4(ps1),ps2 // ps2=@pp(16) lea 4*4(pd1),pd1 // pd1=@p(8) FFF_MUL (COS1_16,4) FFF_MUL (COS3_16,4) FFF_MUL (COS5_16,4) FFF_MUL (COS7_16,4)// lea filter_p,ps1 // ps1=@p(0) lea -32*4(a6),ps1 // ps1=@p(0)// lea filter_pp,pd1 // pd1=@pp(0) lea -16*4(a6),pd1 // pd1=@pp(0) move.l (ps1)+, d1 // get p0..p3 move.l (ps1)+, d2 move.l (ps1)+, d3 move.l (ps1)+, d4 FF4_MUL (d1,d2,d3,d4) move.l (ps1)+, d1 // get p4..p7 move.l (ps1)+, d2 move.l (ps1)+, d3 move.l (ps1)+, d4 FF4_MUL (d1,d2,d3,d4) move.l (ps1)+, d1 // get p8..p11 move.l (ps1)+, d2 move.l (ps1)+, d3 move.l (ps1)+, d4 FF4_MUL (d1,d2,d3,d4) move.l (ps1)+, d1 // get p12..p15 move.l (ps1)+, d2 move.l (ps1)+, d3 move.l (ps1)+, d4 FF4_MUL (d1,d2,d3,d4)// lea filter_pp,ps1 // ps1=@pp(0) lea -16*4(a6),ps1 // ps1=@pp(0)// lea filter_p,pd1 // pd1=@p(0) lea -32*4(a6),pd1 // pd1=@p(0) move.l #COS1_4,d3 move.l #8-1,d4fast_filter_s2: move.l (ps1)+, d1 move.l (ps1)+, d2 FF2_MUL (d1,d2,d3) DBRA (d4,fast_filter_s2) rts//// GET_P <index> <dest ea>// copy p[ index ] into a destination <ea>// p is ps1//#define GET_P(p1,p2) move.l p1*4(ps1),p2//// ADD_P <index> <dest ea>// add p[ index ] to a destination <ea>// p is ps1//#define ADD_P(p1,p2) add.l p1*4(ps1),p2//// SET_S0 <index> <src ea>// copy a source <ea> into s0[ index ]// s0 is a1//#define SET_S0(p1,p2) \ move.l d0, -(sp); \ move.w p2, d0; \ move.w d0,p1*2*16(a1); \ move.l (sp)+, d0//// SET_S1 <index> <src ea>// copy a source <ea> into s1[ index ]// s1 is a2//#define SET_S1(p1,p2) move.w p2,p1*2*16(a2)//// SET_S0_P <index> <src reg>// copy <reg> into s0[ index ]// copy -<reg> into s0[ 32-index ]// s0 is a1// <reg> is modified//#define SET_S0_P(p1,p2) \ move.w p2,p1*2*16(a1) ; \ neg.l p2 ; \ move.w p2,-p1*2*16+1024(a1)//// SET_S1_P <index> <src reg>// copy -<reg> into s1[ index ]// copy -<reg> into s1[ 32-index ]// s1 is a2// <reg> is modified//#define SET_S1_P(p1,p2) \ neg.l p2 ; \ move.w p2,p1*2*16(a2) ; \ move.w p2,-p1*2*16+1024(a2)// Apply the FAST synthesis filter to a sub band// Generate full frequency sample//// a0: bandPtr (=fraction)// a1: out_filter_buffer 0// a2: out_filter_buffer 1// d0.w: freq_div // #2//// registers allocation: d6=MUL_SHIFT
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -