📄 liba52_changes.diff

📁 自己移植的linux下的流媒体播放器原代码,支持mms协议,支持ftp和http协议.
💻 DIFF
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
+#endif++#ifdef SYS_DARWIN+#define FOUROF(a) (a)+#else+#define FOUROF(a) {a,a,a,a}+#endif+++void+imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)+{+  int i;+  int k;+  int p,q;+  int m;+  int two_m;+  int two_m_plus_one;++  sample_t tmp_b_i;+  sample_t tmp_b_r;+  sample_t tmp_a_i;+  sample_t tmp_a_r;++  sample_t *data_ptr;+  sample_t *delay_ptr;+  sample_t *window_ptr;+	+  /* 512 IMDCT with source and dest data in 'data' */+	+  /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/+  for( i=0; i < 128; i++) {+    /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ +    int j= bit_reverse_512[i];+    buf[i].real =         (data[256-2*j-1] * xcos1[j])  -  (data[2*j]       * xsin1[j]);+    buf[i].imag = -1.0 * ((data[2*j]       * xcos1[j])  +  (data[256-2*j-1] * xsin1[j]));+  }+  +  /* 1. iteration */+  for(i = 0; i < 128; i += 2) {+#if 0+    tmp_a_r = buf[i].real;+    tmp_a_i = buf[i].imag;+    tmp_b_r = buf[i+1].real;+    tmp_b_i = buf[i+1].imag;+    buf[i].real = tmp_a_r + tmp_b_r;+    buf[i].imag =  tmp_a_i + tmp_b_i;+    buf[i+1].real = tmp_a_r - tmp_b_r;+    buf[i+1].imag =  tmp_a_i - tmp_b_i;+#else+    vector float temp, bufv; ++    bufv = vec_ld(i << 3, (float*)buf);+    temp = vec_perm(bufv, bufv, vcprm(2,3,0,1));+    bufv = vec_madd(bufv, vcii(p,p,n,n), temp);+    vec_st(bufv, i << 3, (float*)buf);+#endif+  }+        +  /* 2. iteration */+  // Note w[1]={{1,0}, {0,-1}}+  for(i = 0; i < 128; i += 4) {+#if 0+    tmp_a_r = buf[i].real;+    tmp_a_i = buf[i].imag;+    tmp_b_r = buf[i+2].real;+    tmp_b_i = buf[i+2].imag;+    buf[i].real = tmp_a_r + tmp_b_r;+    buf[i].imag =  tmp_a_i + tmp_b_i;+    buf[i+2].real = tmp_a_r - tmp_b_r;+    buf[i+2].imag =  tmp_a_i - tmp_b_i;+    tmp_a_r = buf[i+1].real;+    tmp_a_i = buf[i+1].imag;+    /* WARNING: im <-> re here ! */+    tmp_b_r = buf[i+3].imag;+    tmp_b_i = buf[i+3].real;+    buf[i+1].real = tmp_a_r + tmp_b_r;+    buf[i+1].imag =  tmp_a_i - tmp_b_i;+    buf[i+3].real = tmp_a_r - tmp_b_r;+    buf[i+3].imag =  tmp_a_i + tmp_b_i;+#else+    vector float buf01, buf23, temp1, temp2;+	+    buf01 = vec_ld((i + 0) << 3, (float*)buf);+    buf23 = vec_ld((i + 2) << 3, (float*)buf);+    buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2));++    temp1 = vec_madd(buf23, vcii(p,p,p,n), buf01);+    temp2 = vec_madd(buf23, vcii(n,n,n,p), buf01);++    vec_st(temp1, (i + 0) << 3, (float*)buf);+    vec_st(temp2, (i + 2) << 3, (float*)buf);+#endif+  }++  /* 3. iteration */+  for(i = 0; i < 128; i += 8) {+#if 0+    tmp_a_r = buf[i].real;+    tmp_a_i = buf[i].imag;+    tmp_b_r = buf[i+4].real;+    tmp_b_i = buf[i+4].imag;+    buf[i].real = tmp_a_r + tmp_b_r;+    buf[i].imag =  tmp_a_i + tmp_b_i;+    buf[i+4].real = tmp_a_r - tmp_b_r;+    buf[i+4].imag =  tmp_a_i - tmp_b_i;+    tmp_a_r = buf[1+i].real;+    tmp_a_i = buf[1+i].imag;+    tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real;+    tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real;+    buf[1+i].real = tmp_a_r + tmp_b_r;+    buf[1+i].imag =  tmp_a_i + tmp_b_i;+    buf[i+5].real = tmp_a_r - tmp_b_r;+    buf[i+5].imag =  tmp_a_i - tmp_b_i;+    tmp_a_r = buf[i+2].real;+    tmp_a_i = buf[i+2].imag;+    /* WARNING re <-> im & sign */+    tmp_b_r = buf[i+6].imag;+    tmp_b_i = - buf[i+6].real;+    buf[i+2].real = tmp_a_r + tmp_b_r;+    buf[i+2].imag =  tmp_a_i + tmp_b_i;+    buf[i+6].real = tmp_a_r - tmp_b_r;+    buf[i+6].imag =  tmp_a_i - tmp_b_i;+    tmp_a_r = buf[i+3].real;+    tmp_a_i = buf[i+3].imag;+    tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag;+    tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag;+    buf[i+3].real = tmp_a_r + tmp_b_r;+    buf[i+3].imag =  tmp_a_i + tmp_b_i;+    buf[i+7].real = tmp_a_r - tmp_b_r;+    buf[i+7].imag =  tmp_a_i - tmp_b_i;+#else+    vector float buf01, buf23, buf45, buf67;++    buf01 = vec_ld((i + 0) << 3, (float*)buf);+    buf23 = vec_ld((i + 2) << 3, (float*)buf);++    tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real;+    tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real;+    buf[i+5].real = tmp_b_r;+    buf[i+5].imag = tmp_b_i;+    tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag;+    tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag;+    buf[i+7].real = tmp_b_r;+    buf[i+7].imag = tmp_b_i;++    buf23 = vec_ld((i + 2) << 3, (float*)buf);+    buf45 = vec_ld((i + 4) << 3, (float*)buf);+    buf67 = vec_ld((i + 6) << 3, (float*)buf);+    buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3));+	+    vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf);+    vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf);+    vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf);+    vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf);+#endif+  }+    +  /* 4-7. iterations */+  for (m=3; m < 7; m++) {+    two_m = (1 << m);++    two_m_plus_one = two_m<<1;++    for(i = 0; i < 128; i += two_m_plus_one) {+      for(k = 0; k < two_m; k+=2) {+#if 0+        int p = k + i;+        int q = p + two_m;+        tmp_a_r = buf[p].real;+        tmp_a_i = buf[p].imag;+        tmp_b_r =+          buf[q].real * w[m][k].real -+          buf[q].imag * w[m][k].imag;+        tmp_b_i =+          buf[q].imag * w[m][k].real ++          buf[q].real * w[m][k].imag;+        buf[p].real = tmp_a_r + tmp_b_r;+        buf[p].imag =  tmp_a_i + tmp_b_i;+        buf[q].real = tmp_a_r - tmp_b_r;+        buf[q].imag =  tmp_a_i - tmp_b_i;++        tmp_a_r = buf[(p + 1)].real;+        tmp_a_i = buf[(p + 1)].imag;+        tmp_b_r =+          buf[(q + 1)].real * w[m][(k + 1)].real -+          buf[(q + 1)].imag * w[m][(k + 1)].imag;+        tmp_b_i =+          buf[(q + 1)].imag * w[m][(k + 1)].real ++          buf[(q + 1)].real * w[m][(k + 1)].imag;+        buf[(p + 1)].real = tmp_a_r + tmp_b_r;+        buf[(p + 1)].imag =  tmp_a_i + tmp_b_i;+        buf[(q + 1)].real = tmp_a_r - tmp_b_r;+        buf[(q + 1)].imag =  tmp_a_i - tmp_b_i;+#else+        int p = k + i;+        int q = p + two_m;+        vector float vecp, vecq, vecw, temp1, temp2, temp3, temp4;+        const vector float vczero = (const vector float)FOUROF(0.);+        // first compute buf[q] and buf[q+1]+        vecq = vec_ld(q << 3, (float*)buf);+        vecw = vec_ld(0, (float*)&(w[m][k]));+        temp1 = vec_madd(vecq, vecw, vczero);+        temp2 = vec_perm(vecq, vecq, vcprm(1,0,3,2));+        temp2 = vec_madd(temp2, vecw, vczero);+        temp3 = vec_perm(temp1, temp2, vcprm(0,s0,2,s2));+        temp4 = vec_perm(temp1, temp2, vcprm(1,s1,3,s3));+        vecq = vec_madd(temp4, vcii(n,p,n,p), temp3);+        // then butterfly with buf[p] and buf[p+1]+        vecp = vec_ld(p << 3, (float*)buf);+        +        temp1 = vec_add(vecp, vecq);+        temp2 = vec_sub(vecp, vecq);+                +        vec_st(temp1, p << 3, (float*)buf);+        vec_st(temp2, q << 3, (float*)buf);+#endif+      }+    }+  }++  /* Post IFFT complex multiply  plus IFFT complex conjugate*/+  for( i=0; i < 128; i+=4) {+    /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */+#if 0+    tmp_a_r =        buf[(i + 0)].real;+    tmp_a_i = -1.0 * buf[(i + 0)].imag;+    buf[(i + 0)].real =+      (tmp_a_r * xcos1[(i + 0)])  -  (tmp_a_i  * xsin1[(i + 0)]);+    buf[(i + 0)].imag =+      (tmp_a_r * xsin1[(i + 0)])  +  (tmp_a_i  * xcos1[(i + 0)]);++    tmp_a_r =        buf[(i + 1)].real;+    tmp_a_i = -1.0 * buf[(i + 1)].imag;+    buf[(i + 1)].real =+      (tmp_a_r * xcos1[(i + 1)])  -  (tmp_a_i  * xsin1[(i + 1)]);+    buf[(i + 1)].imag =+      (tmp_a_r * xsin1[(i + 1)])  +  (tmp_a_i  * xcos1[(i + 1)]);++    tmp_a_r =        buf[(i + 2)].real;+    tmp_a_i = -1.0 * buf[(i + 2)].imag;+    buf[(i + 2)].real =+      (tmp_a_r * xcos1[(i + 2)])  -  (tmp_a_i  * xsin1[(i + 2)]);+    buf[(i + 2)].imag =+      (tmp_a_r * xsin1[(i + 2)])  +  (tmp_a_i  * xcos1[(i + 2)]);++    tmp_a_r =        buf[(i + 3)].real;+    tmp_a_i = -1.0 * buf[(i + 3)].imag;+    buf[(i + 3)].real =+      (tmp_a_r * xcos1[(i + 3)])  -  (tmp_a_i  * xsin1[(i + 3)]);+    buf[(i + 3)].imag =+      (tmp_a_r * xsin1[(i + 3)])  +  (tmp_a_i  * xcos1[(i + 3)]);+#else+    vector float bufv_0, bufv_2, cosv, sinv, temp1, temp2;+    vector float temp0022, temp1133, tempCS01;+    const vector float vczero = (const vector float)FOUROF(0.);++    bufv_0 = vec_ld((i + 0) << 3, (float*)buf);+    bufv_2 = vec_ld((i + 2) << 3, (float*)buf);++    cosv = vec_ld(i << 2, xcos1);+    sinv = vec_ld(i << 2, xsin1);++    temp0022 = vec_perm(bufv_0, bufv_0, vcprm(0,0,2,2));+    temp1133 = vec_perm(bufv_0, bufv_0, vcprm(1,1,3,3));+    tempCS01 = vec_perm(cosv, sinv, vcprm(0,s0,1,s1));+    temp1 = vec_madd(temp0022, tempCS01, vczero);+    tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1));+    temp2 = vec_madd(temp1133, tempCS01, vczero);+    bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1);+    +    vec_st(bufv_0, (i + 0) << 3, (float*)buf);++    /* idem with bufv_2 and high-order cosv/sinv */++    temp0022 = vec_perm(bufv_2, bufv_2, vcprm(0,0,2,2));+    temp1133 = vec_perm(bufv_2, bufv_2, vcprm(1,1,3,3));+    tempCS01 = vec_perm(cosv, sinv, vcprm(2,s2,3,s3));+    temp1 = vec_madd(temp0022, tempCS01, vczero);+    tempCS01 = vec_perm(cosv, sinv, vcprm(s2,2,s3,3));+    temp2 = vec_madd(temp1133, tempCS01, vczero);+    bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1);++    vec_st(bufv_2, (i + 2) << 3, (float*)buf);+    +#endif+  }+  +  data_ptr = data;+  delay_ptr = delay;+  window_ptr = imdct_window;++  /* Window and convert to real valued signal */+  for(i=0; i< 64; i++) { +    *data_ptr++   = -buf[64+i].imag   * *window_ptr++ + *delay_ptr++ + bias; +    *data_ptr++   =  buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; +  }+    +  for(i=0; i< 64; i++) { +    *data_ptr++  = -buf[i].real       * *window_ptr++ + *delay_ptr++ + bias; +    *data_ptr++  =  buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; +  }+    +  /* The trailing edge of the window goes into the delay line */+  delay_ptr = delay;++  for(i=0; i< 64; i++) { +    *delay_ptr++  = -buf[64+i].real   * *--window_ptr; +    *delay_ptr++  =  buf[64-i-1].imag * *--window_ptr; +  }+    +  for(i=0; i<64; i++) {+    *delay_ptr++  =  buf[i].imag       * *--window_ptr; +    *delay_ptr++  = -buf[128-i-1].real * *--window_ptr; +  }+}+#endif+++// Stuff below this line is borrowed from libac3+#include "srfftp.h"+#ifdef ARCH_X86+#ifndef HAVE_3DNOW+#define HAVE_3DNOW 1+#endif+#include "srfftp_3dnow.h"++const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; +const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; +const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 };++#undef HAVE_3DNOWEX+#include "imdct_3dnow.h"+#define HAVE_3DNOWEX+#include "imdct_3dnow.h"++void+imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)+{+/*	int i,k;+    int p,q;*/+    int m;+    int two_m;+    int two_m_plus_one;++/*  sample_t tmp_a_i;+    sample_t tmp_a_r;+    sample_t tmp_b_i;+    sample_t tmp_b_r;*/++    sample_t *data_ptr;+    sample_t *delay_ptr;+    sample_t *window_ptr;+	+    /* 512 IMDCT with source and dest data in 'data' */+    /* see the c version (dct_do_512()), its allmost identical, just in C */ ++    /* Pre IFFT complex multiply plus IFFT cmplx conjugate */+    /* Bit reversed shuffling */+	asm volatile(+		"xorl %%esi, %%esi			\n\t"+		"leal "MANGLE(bit_reverse_512)", %%eax	\n\t"+		"movl $1008, %%edi			\n\t"+		"pushl %%ebp				\n\t" //use ebp without telling gcc+		".balign 16				\n\t"+		"1:					\n\t"+		"movlps (%0, %%esi), %%xmm0		\n\t" // XXXI+		"movhps 8(%0, %%edi), %%xmm0		\n\t" // RXXI+		"movlps 8(%0, %%esi), %%xmm1		\n\t" // XXXi+		"movhps (%0, %%edi), %%xmm1		\n\t" // rXXi+		"shufps $0x33, %%xmm1, %%xmm0		\n\t" // irIR+		"movaps "MANGLE(sseSinCos1c)"(%%esi), %%xmm2\n\t"+		"mulps %%xmm0, %%xmm2			\n\t"+		"shufps $0xB1, %%xmm0, %%xmm0		\n\t" // riRI+		"mulps "MANGLE(sseSinCos1d)"(%%esi), %%xmm0\n\t"+		"subps %%xmm0, %%xmm2			\n\t"+		"movzbl (%%eax), %%edx			\n\t"+		"movzbl 1(%%eax), %%ebp			\n\t"+		"movlps %%xmm2, (%1, %%edx,8)		\n\t"+		"movhps %%xmm2, (%1, %%ebp,8)		\n\t"+		"addl $16, %%esi			\n\t"+		"addl $2, %%eax				\n\t" // avoid complex addressing for P4 crap+		"subl $16, %%edi			\n\t"+		" jnc 1b				\n\t"+		"popl %%ebp				\n\t"//no we didnt touch ebp *g*+		:: "b" (data), "c" (buf)+		: "%esi", "%edi", "%eax", "%edx"+	);+++    /* FFT Merge */+/* unoptimized variant+    for (m=1; m < 7; m++) {+	if(m)+	    two_m = (1 << m);+	else+	    two_m = 1;++	two_m_plus_one = (1 << (m+1));++	for(i = 0; i < 128; i += two_m_plus_one) {+	    for(k = 0; k < two_m; k++) {+		p = k + i;+		q = p + two_m;+		tmp_a_r = buf[p].real;+		tmp_a_i = buf[p].imag;+		tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;+		tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;+		buf[p].real = tmp_a_r + tmp_b_r;+		buf[p].imag =  tmp_a_i + tmp_b_i;+		buf[q].real = tmp_a_r - tmp_b_r;+		buf[q].imag =  tmp_a_i - tmp_b_i;+	    }+	}+    }+*/+    +    /* 1. iteration */+	// Note w[0][0]={1,0}+	asm volatile(+		"xorps %%xmm1, %%xmm1	\n\t"+		"xorps %%xmm2, %%xmm2	\n\t"+		"movl %0, %%esi		\n\t"+		".balign 16				\n\t"+		"1:			\n\t"+		"movlps (%%esi), %%xmm0	\n\t" //buf[p]+		"movlps 8(%%esi), %%xmm1\n\t" //buf[q]+		"movhps (%%esi), %%xmm0	\n\t" //buf[p]+		"movhps 8(%%esi), %%xmm2\n\t" //buf[q]+		"addps %%xmm1, %%xmm0	\n\t"+		"subps %%xmm2, %%xmm0	\n\t"+		"movaps %%xmm0, (%%esi)	\n\t"+		"addl $16, %%esi	\n\t"+		"cmpl %1, %%esi		\n\t"+		" jb 1b			\n\t"+		:: "g" (buf), "r" (buf + 128)+		: "%esi"+	);+        +    /* 2. iteration */+	// Note w[1]={{1,0}, {0,-1}}+	asm volatile(+		"movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1+		"movl %0, %%esi			\n\t"+		".balign 16				\n\t"+		"1:				\n\t"+		"movaps 16(%%esi), %%xmm2	\n\t" //r2,i2,r3,i3+		"shufps $0xB4, %%xmm2, %%xmm2	\n\t" //r2,i2,i3,r3+		"mulps %%xmm7, %%xmm2		\n\t" //r2,i2,i3,-r3+		"movaps (%%esi), %%xmm0		\n\t" //r0,i0,r1,i1+		"movaps (%%esi), %%xmm1
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -