📄 liba52_changes.diff

📁 自己移植的linux下的流媒体播放器原代码,支持mms协议,支持ftp和http协议.
💻 DIFF
📖 第 1 页 / 共 5 页
字号:
+    case CONVERT (A52_3F, A52_STEREO):+    case CONVERT (A52_3F, A52_DOLBY):+    mix_3to2_3dnow:+	mix3to2_3dnow (samples, bias);+	break;++    case CONVERT (A52_2F1R, A52_STEREO):+	if (slev == 0)+	    break;+	mix21to2_3dnow (samples, samples + 256, bias);+	break;++    case CONVERT (A52_2F1R, A52_DOLBY):+	mix21toS_3dnow (samples, bias);+	break;++    case CONVERT (A52_3F1R, A52_STEREO):+	if (slev == 0)+	    goto mix_3to2_3dnow;+	mix31to2_3dnow (samples, bias);+	break;++    case CONVERT (A52_3F1R, A52_DOLBY):+	mix31toS_3dnow (samples, bias);+	break;++    case CONVERT (A52_2F2R, A52_STEREO):+	if (slev == 0)+	    break;+	mix2to1_3dnow (samples, samples + 512, bias);+	mix2to1_3dnow (samples + 256, samples + 768, bias);+	break;++    case CONVERT (A52_2F2R, A52_DOLBY):+	mix22toS_3dnow (samples, bias);+	break;++    case CONVERT (A52_3F2R, A52_STEREO):+	if (slev == 0)+	    goto mix_3to2_3dnow;+	mix32to2_3dnow (samples, bias);+	break;++    case CONVERT (A52_3F2R, A52_DOLBY):+	mix32toS_3dnow (samples, bias);+	break;++    case CONVERT (A52_3F1R, A52_3F):+	if (slev == 0)+	    break;+	mix21to2_3dnow (samples, samples + 512, bias);+	break;++    case CONVERT (A52_3F2R, A52_3F):+	if (slev == 0)+	    break;+	mix2to1_3dnow (samples, samples + 768, bias);+	mix2to1_3dnow (samples + 512, samples + 1024, bias);+	break;++    case CONVERT (A52_3F1R, A52_2F1R):+	mix3to2_3dnow (samples, bias);+	memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));+	break;++    case CONVERT (A52_2F2R, A52_2F1R):+	mix2to1_3dnow (samples + 512, samples + 768, bias);+	break;++    case CONVERT (A52_3F2R, A52_2F1R):+	mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used)+	move2to1_3dnow (samples + 768, samples + 512, bias);+	break;++    case CONVERT (A52_3F2R, A52_3F1R):+	mix2to1_3dnow (samples + 768, samples + 1024, bias);+	break;++    case CONVERT (A52_2F1R, A52_2F2R):+	memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));+	break;++    case CONVERT (A52_3F1R, A52_2F2R):+	mix3to2_3dnow (samples, bias);+	memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));+	break;++    case CONVERT (A52_3F2R, A52_2F2R):+	mix3to2_3dnow (samples, bias);+	memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));+	memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));+	break;++    case CONVERT (A52_3F1R, A52_3F2R):+	memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));+	break;+    }+    __asm __volatile("femms":::"memory");+}++#endif //ARCH_X86--- liba52/imdct.c	2005-03-22 19:59:35.000000000 +0100+++ imdct.c	2004-04-26 22:00:57.000000000 +0200@@ -17,17 +23,32 @@  * You should have received a copy of the GNU General Public License  * along with this program; if not, write to the Free Software  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA+ *+ * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)+ * 3DNOW optimizations from Nick Kurshev <nickols_k@mail.ru>+ *   michael did port them from libac3 (untested, perhaps totally broken)+ * AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org)  */  #include "config.h" -#include <inttypes.h> #include <math.h> #include <stdio.h>+#ifndef M_PI+#define M_PI 3.1415926535897932384626433832795029+#endif+#include <inttypes.h>  #include "a52.h" #include "a52_internal.h" #include "mm_accel.h"+#include "mangle.h"++#ifdef RUNTIME_CPUDETECT+#undef HAVE_3DNOWEX+#endif++#define USE_AC3_C  void (* imdct_256) (sample_t data[], sample_t delay[], sample_t bias); void (* imdct_512) (sample_t data[], sample_t delay[], sample_t bias);@@ -37,9 +58,22 @@     sample_t imag; } complex_t; +static void fft_128p(complex_t *a);++static const int pm128[128] attribute_used __attribute__((aligned(16))) =+{+	0, 16, 32, 48, 64, 80,  96, 112,  8, 40, 72, 104, 24, 56,  88, 120,+	4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44,  60, 76, 92, 108, 124,+	2, 18, 34, 50, 66, 82,  98, 114, 10, 42, 74, 106, 26, 58,  90, 122,+	6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62,  94, 126,+	1, 17, 33, 49, 65, 81,  97, 113,  9, 41, 73, 105, 25, 57,  89, 121,+	5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45,  61, 77, 93, 109, 125,+	3, 19, 35, 51, 67, 83,  99, 115, 11, 43, 75, 107, 27, 59,  91, 123,+	7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47,  63, 79, 95, 111, 127+};   /* 128 point bit-reverse LUT */-static uint8_t bit_reverse_512[] = {+static uint8_t attribute_used bit_reverse_512[] = { 	0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,  	0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,  	0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, @@ -67,23 +101,42 @@ 	0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b,  	0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f}; -static complex_t buf[128];+#ifdef ARCH_X86+// NOTE: SSE needs 16byte alignment or it will segfault +// +static complex_t __attribute__((aligned(16))) buf[128];+static float __attribute__((aligned(16))) sseSinCos1c[256];+static float __attribute__((aligned(16))) sseSinCos1d[256];+static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1};+//static float __attribute__((aligned(16))) sseW0[4];+static float __attribute__((aligned(16))) sseW1[8];+static float __attribute__((aligned(16))) sseW2[16];+static float __attribute__((aligned(16))) sseW3[32];+static float __attribute__((aligned(16))) sseW4[64];+static float __attribute__((aligned(16))) sseW5[128];+static float __attribute__((aligned(16))) sseW6[256];+static float __attribute__((aligned(16))) *sseW[7]=+	{NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6};+static float __attribute__((aligned(16))) sseWindow[512];+#else+static complex_t  __attribute__((aligned(16))) buf[128];+#endif  /* Twiddle factor LUT */-static complex_t w_1[1];-static complex_t w_2[2];-static complex_t w_4[4];-static complex_t w_8[8];-static complex_t w_16[16];-static complex_t w_32[32];-static complex_t w_64[64];-static complex_t * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64};+static complex_t __attribute__((aligned(16))) w_1[1];+static complex_t __attribute__((aligned(16))) w_2[2];+static complex_t __attribute__((aligned(16))) w_4[4];+static complex_t __attribute__((aligned(16))) w_8[8];+static complex_t __attribute__((aligned(16))) w_16[16];+static complex_t __attribute__((aligned(16))) w_32[32];+static complex_t __attribute__((aligned(16))) w_64[64];+static complex_t __attribute__((aligned(16))) * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64};  /* Twiddle factors for IMDCT */-static sample_t xcos1[128];-static sample_t xsin1[128];-static sample_t xcos2[64];-static sample_t xsin2[64];+static sample_t __attribute__((aligned(16))) xcos1[128];+static sample_t __attribute__((aligned(16))) xsin1[128];+static sample_t __attribute__((aligned(16))) xcos2[64];+static sample_t __attribute__((aligned(16))) xsin2[64];  /* Windowing function for Modified DCT - Thank you acroread */ sample_t imdct_window[] = {@@ -145,16 +198,19 @@ void imdct_do_512(sample_t data[],sample_t delay[], sample_t bias) {-    int i,k;+    int i;+#ifndef USE_AC3_C+	int k;     int p,q;     int m;     int two_m;     int two_m_plus_one; -    sample_t tmp_a_i;-    sample_t tmp_a_r;     sample_t tmp_b_i;     sample_t tmp_b_r;+#endif+    sample_t tmp_a_i;+    sample_t tmp_a_r;      sample_t *data_ptr;     sample_t *delay_ptr;@@ -162,22 +218,21 @@ 	     /* 512 IMDCT with source and dest data in 'data' */ 	-    /* Pre IFFT complex multiply plus IFFT cmplx conjugate */+    /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/     for( i=0; i < 128; i++) { 	/* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ -	buf[i].real =         (data[256-2*i-1] * xcos1[i])  -  (data[2*i]       * xsin1[i]);-	buf[i].imag = -1.0 * ((data[2*i]       * xcos1[i])  +  (data[256-2*i-1] * xsin1[i]));-    }--    /* Bit reversed shuffling */-    for(i=0; i<128; i++) {-	k = bit_reverse_512[i];-	if (k < i)-	    swap_cmplx(&buf[i],&buf[k]);+#ifdef USE_AC3_C+	int j= pm128[i];+#else+	int j= bit_reverse_512[i];+#endif	+	buf[i].real =         (data[256-2*j-1] * xcos1[j])  -  (data[2*j]       * xsin1[j]);+	buf[i].imag = -1.0 * ((data[2*j]       * xcos1[j])  +  (data[256-2*j-1] * xsin1[j]));     }      /* FFT Merge */-    for (m=0; m < 7; m++) {+/* unoptimized variant+    for (m=1; m < 7; m++) { 	if(m) 	    two_m = (1 << m); 	else@@ -185,8 +240,8 @@  	two_m_plus_one = (1 << (m+1)); -	for(k = 0; k < two_m; k++) {-	    for(i = 0; i < 128; i += two_m_plus_one) {+	for(i = 0; i < 128; i += two_m_plus_one) {+	    for(k = 0; k < two_m; k++) { 		p = k + i; 		q = p + two_m; 		tmp_a_r = buf[p].real;@@ -200,7 +255,102 @@ 	    } 	}     }+*/+#ifdef USE_AC3_C+	fft_128p (&buf[0]);+#else++    /* 1. iteration */+    for(i = 0; i < 128; i += 2) {+	tmp_a_r = buf[i].real;+	tmp_a_i = buf[i].imag;+	tmp_b_r = buf[i+1].real;+	tmp_b_i = buf[i+1].imag;+	buf[i].real = tmp_a_r + tmp_b_r;+	buf[i].imag =  tmp_a_i + tmp_b_i;+	buf[i+1].real = tmp_a_r - tmp_b_r;+	buf[i+1].imag =  tmp_a_i - tmp_b_i;+    }+        +    /* 2. iteration */+	// Note w[1]={{1,0}, {0,-1}}+    for(i = 0; i < 128; i += 4) {+	tmp_a_r = buf[i].real;+	tmp_a_i = buf[i].imag;+	tmp_b_r = buf[i+2].real;+	tmp_b_i = buf[i+2].imag;+	buf[i].real = tmp_a_r + tmp_b_r;+	buf[i].imag =  tmp_a_i + tmp_b_i;+	buf[i+2].real = tmp_a_r - tmp_b_r;+	buf[i+2].imag =  tmp_a_i - tmp_b_i;+	tmp_a_r = buf[i+1].real;+	tmp_a_i = buf[i+1].imag;+	tmp_b_r = buf[i+3].imag;+	tmp_b_i = buf[i+3].real;+	buf[i+1].real = tmp_a_r + tmp_b_r;+	buf[i+1].imag =  tmp_a_i - tmp_b_i;+	buf[i+3].real = tmp_a_r - tmp_b_r;+	buf[i+3].imag =  tmp_a_i + tmp_b_i;+    } +    /* 3. iteration */+    for(i = 0; i < 128; i += 8) {+		tmp_a_r = buf[i].real;+		tmp_a_i = buf[i].imag;+		tmp_b_r = buf[i+4].real;+		tmp_b_i = buf[i+4].imag;+		buf[i].real = tmp_a_r + tmp_b_r;+		buf[i].imag =  tmp_a_i + tmp_b_i;+		buf[i+4].real = tmp_a_r - tmp_b_r;+		buf[i+4].imag =  tmp_a_i - tmp_b_i;+		tmp_a_r = buf[1+i].real;+		tmp_a_i = buf[1+i].imag;+		tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real;+		tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real;+		buf[1+i].real = tmp_a_r + tmp_b_r;+		buf[1+i].imag =  tmp_a_i + tmp_b_i;+		buf[i+5].real = tmp_a_r - tmp_b_r;+		buf[i+5].imag =  tmp_a_i - tmp_b_i;+		tmp_a_r = buf[i+2].real;+		tmp_a_i = buf[i+2].imag;+		tmp_b_r = buf[i+6].imag;+		tmp_b_i = - buf[i+6].real;+		buf[i+2].real = tmp_a_r + tmp_b_r;+		buf[i+2].imag =  tmp_a_i + tmp_b_i;+		buf[i+6].real = tmp_a_r - tmp_b_r;+		buf[i+6].imag =  tmp_a_i - tmp_b_i;+		tmp_a_r = buf[i+3].real;+		tmp_a_i = buf[i+3].imag;+		tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag;+		tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag;+		buf[i+3].real = tmp_a_r + tmp_b_r;+		buf[i+3].imag =  tmp_a_i + tmp_b_i;+		buf[i+7].real = tmp_a_r - tmp_b_r;+		buf[i+7].imag =  tmp_a_i - tmp_b_i;+     }+    +    /* 4-7. iterations */+    for (m=3; m < 7; m++) {+        two_m = (1 << m);++	two_m_plus_one = two_m<<1;++	for(i = 0; i < 128; i += two_m_plus_one) {+	    for(k = 0; k < two_m; k++) {+		int p = k + i;+		int q = p + two_m;+		tmp_a_r = buf[p].real;+		tmp_a_i = buf[p].imag;+		tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;+		tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;+		buf[p].real = tmp_a_r + tmp_b_r;+		buf[p].imag =  tmp_a_i + tmp_b_i;+		buf[q].real = tmp_a_r - tmp_b_r;+		buf[q].imag =  tmp_a_i - tmp_b_i;+	    }+	}+    }+#endif         /* Post IFFT complex multiply  plus IFFT complex conjugate*/     for( i=0; i < 128; i++) { 	/* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */@@ -219,12 +369,12 @@ 	*data_ptr++   = -buf[64+i].imag   * *window_ptr++ + *delay_ptr++ + bias;  	*data_ptr++   =  buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;      }-+         for(i=0; i< 64; i++) {  	*data_ptr++  = -buf[i].real       * *window_ptr++ + *delay_ptr++ + bias;  	*data_ptr++  =  buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;      }-+         /* The trailing edge of the window goes into the delay line */     delay_ptr = delay; @@ -232,13 +382,717 @@ 	*delay_ptr++  = -buf[64+i].real   * *--window_ptr;  	*delay_ptr++  =  buf[64-i-1].imag * *--window_ptr;      }-+         for(i=0; i<64; i++) { 	*delay_ptr++  =  buf[i].imag       * *--window_ptr;  	*delay_ptr++  = -buf[128-i-1].real * *--window_ptr;      } } +#ifdef HAVE_ALTIVEC++#ifndef SYS_DARWIN+#include <altivec.h>+#endif++// used to build registers permutation vectors (vcprm)+// the 's' are for words in the _s_econd vector+#define WORD_0 0x00,0x01,0x02,0x03+#define WORD_1 0x04,0x05,0x06,0x07+#define WORD_2 0x08,0x09,0x0a,0x0b+#define WORD_3 0x0c,0x0d,0x0e,0x0f+#define WORD_s0 0x10,0x11,0x12,0x13+#define WORD_s1 0x14,0x15,0x16,0x17+#define WORD_s2 0x18,0x19,0x1a,0x1b+#define WORD_s3 0x1c,0x1d,0x1e,0x1f++#ifdef SYS_DARWIN+#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d)+#else+#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}+#endif++// vcprmle is used to keep the same index as in the SSE version.+// it's the same as vcprm, with the index inversed+// ('le' is Little Endian)+#define vcprmle(a,b,c,d) vcprm(d,c,b,a)++// used to build inverse/identity vectors (vcii)+// n is _n_egative, p is _p_ositive+#define FLOAT_n -1.+#define FLOAT_p 1.++#ifdef SYS_DARWIN+#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d)+#else+#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -