📄 imdct.c
字号:
#define WORD_0 0x00,0x01,0x02,0x03#define WORD_1 0x04,0x05,0x06,0x07#define WORD_2 0x08,0x09,0x0a,0x0b#define WORD_3 0x0c,0x0d,0x0e,0x0f#define WORD_s0 0x10,0x11,0x12,0x13#define WORD_s1 0x14,0x15,0x16,0x17#define WORD_s2 0x18,0x19,0x1a,0x1b#define WORD_s3 0x1c,0x1d,0x1e,0x1f#ifdef SYS_DARWIN#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d)#else#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}#endif// vcprmle is used to keep the same index as in the SSE version.// it's the same as vcprm, with the index inversed// ('le' is Little Endian)#define vcprmle(a,b,c,d) vcprm(d,c,b,a)// used to build inverse/identity vectors (vcii)// n is _n_egative, p is _p_ositive#define FLOAT_n -1.#define FLOAT_p 1.#ifdef SYS_DARWIN#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d)#else#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}#endif#ifdef SYS_DARWIN#define FOUROF(a) (a)#else#define FOUROF(a) {a,a,a,a}#endifvoidimdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias){ int i; int k; int p,q; int m; int two_m; int two_m_plus_one; sample_t tmp_b_i; sample_t tmp_b_r; sample_t tmp_a_i; sample_t tmp_a_r; sample_t *data_ptr; sample_t *delay_ptr; sample_t *window_ptr; /* 512 IMDCT with source and dest data in 'data' */ /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/ for( i=0; i < 128; i++) { /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ int j= bit_reverse_512[i]; buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j])); } /* 1. iteration */ for(i = 0; i < 128; i += 2) {#if 0 tmp_a_r = buf[i].real; tmp_a_i = buf[i].imag; tmp_b_r = buf[i+1].real; tmp_b_i = buf[i+1].imag; buf[i].real = tmp_a_r + tmp_b_r; buf[i].imag = tmp_a_i + tmp_b_i; buf[i+1].real = tmp_a_r - tmp_b_r; buf[i+1].imag = tmp_a_i - tmp_b_i;#else vector float temp, bufv; bufv = vec_ld(i << 3, (float*)buf); temp = vec_perm(bufv, bufv, vcprm(2,3,0,1)); bufv = vec_madd(bufv, vcii(p,p,n,n), temp); vec_st(bufv, i << 3, (float*)buf);#endif } /* 2. iteration */ // Note w[1]={{1,0}, {0,-1}} for(i = 0; i < 128; i += 4) {#if 0 tmp_a_r = buf[i].real; tmp_a_i = buf[i].imag; tmp_b_r = buf[i+2].real; tmp_b_i = buf[i+2].imag; buf[i].real = tmp_a_r + tmp_b_r; buf[i].imag = tmp_a_i + tmp_b_i; buf[i+2].real = tmp_a_r - tmp_b_r; buf[i+2].imag = tmp_a_i - tmp_b_i; tmp_a_r = buf[i+1].real; tmp_a_i = buf[i+1].imag; /* WARNING: im <-> re here ! */ tmp_b_r = buf[i+3].imag; tmp_b_i = buf[i+3].real; buf[i+1].real = tmp_a_r + tmp_b_r; buf[i+1].imag = tmp_a_i - tmp_b_i; buf[i+3].real = tmp_a_r - tmp_b_r; buf[i+3].imag = tmp_a_i + tmp_b_i;#else vector float buf01, buf23, temp1, temp2; buf01 = vec_ld((i + 0) << 3, (float*)buf); buf23 = vec_ld((i + 2) << 3, (float*)buf); buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2)); temp1 = vec_madd(buf23, vcii(p,p,p,n), buf01); temp2 = vec_madd(buf23, vcii(n,n,n,p), buf01); vec_st(temp1, (i + 0) << 3, (float*)buf); vec_st(temp2, (i + 2) << 3, (float*)buf);#endif } /* 3. iteration */ for(i = 0; i < 128; i += 8) {#if 0 tmp_a_r = buf[i].real; tmp_a_i = buf[i].imag; tmp_b_r = buf[i+4].real; tmp_b_i = buf[i+4].imag; buf[i].real = tmp_a_r + tmp_b_r; buf[i].imag = tmp_a_i + tmp_b_i; buf[i+4].real = tmp_a_r - tmp_b_r; buf[i+4].imag = tmp_a_i - tmp_b_i; tmp_a_r = buf[1+i].real; tmp_a_i = buf[1+i].imag; tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real; tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real; buf[1+i].real = tmp_a_r + tmp_b_r; buf[1+i].imag = tmp_a_i + tmp_b_i; buf[i+5].real = tmp_a_r - tmp_b_r; buf[i+5].imag = tmp_a_i - tmp_b_i; tmp_a_r = buf[i+2].real; tmp_a_i = buf[i+2].imag; /* WARNING re <-> im & sign */ tmp_b_r = buf[i+6].imag; tmp_b_i = - buf[i+6].real; buf[i+2].real = tmp_a_r + tmp_b_r; buf[i+2].imag = tmp_a_i + tmp_b_i; buf[i+6].real = tmp_a_r - tmp_b_r; buf[i+6].imag = tmp_a_i - tmp_b_i; tmp_a_r = buf[i+3].real; tmp_a_i = buf[i+3].imag; tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag; tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag; buf[i+3].real = tmp_a_r + tmp_b_r; buf[i+3].imag = tmp_a_i + tmp_b_i; buf[i+7].real = tmp_a_r - tmp_b_r; buf[i+7].imag = tmp_a_i - tmp_b_i;#else vector float buf01, buf23, buf45, buf67; buf01 = vec_ld((i + 0) << 3, (float*)buf); buf23 = vec_ld((i + 2) << 3, (float*)buf); tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real; tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real; buf[i+5].real = tmp_b_r; buf[i+5].imag = tmp_b_i; tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag; tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag; buf[i+7].real = tmp_b_r; buf[i+7].imag = tmp_b_i; buf23 = vec_ld((i + 2) << 3, (float*)buf); buf45 = vec_ld((i + 4) << 3, (float*)buf); buf67 = vec_ld((i + 6) << 3, (float*)buf); buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3)); vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf); vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf); vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf); vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf);#endif } /* 4-7. iterations */ for (m=3; m < 7; m++) { two_m = (1 << m); two_m_plus_one = two_m<<1; for(i = 0; i < 128; i += two_m_plus_one) { for(k = 0; k < two_m; k+=2) {#if 0 int p = k + i; int q = p + two_m; tmp_a_r = buf[p].real; tmp_a_i = buf[p].imag; tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag; tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag; buf[p].real = tmp_a_r + tmp_b_r; buf[p].imag = tmp_a_i + tmp_b_i; buf[q].real = tmp_a_r - tmp_b_r; buf[q].imag = tmp_a_i - tmp_b_i; tmp_a_r = buf[(p + 1)].real; tmp_a_i = buf[(p + 1)].imag; tmp_b_r = buf[(q + 1)].real * w[m][(k + 1)].real - buf[(q + 1)].imag * w[m][(k + 1)].imag; tmp_b_i = buf[(q + 1)].imag * w[m][(k + 1)].real + buf[(q + 1)].real * w[m][(k + 1)].imag; buf[(p + 1)].real = tmp_a_r + tmp_b_r; buf[(p + 1)].imag = tmp_a_i + tmp_b_i; buf[(q + 1)].real = tmp_a_r - tmp_b_r; buf[(q + 1)].imag = tmp_a_i - tmp_b_i;#else int p = k + i; int q = p + two_m; vector float vecp, vecq, vecw, temp1, temp2, temp3, temp4; const vector float vczero = (const vector float)FOUROF(0.); // first compute buf[q] and buf[q+1] vecq = vec_ld(q << 3, (float*)buf); vecw = vec_ld(0, (float*)&(w[m][k])); temp1 = vec_madd(vecq, vecw, vczero); temp2 = vec_perm(vecq, vecq, vcprm(1,0,3,2)); temp2 = vec_madd(temp2, vecw, vczero); temp3 = vec_perm(temp1, temp2, vcprm(0,s0,2,s2)); temp4 = vec_perm(temp1, temp2, vcprm(1,s1,3,s3)); vecq = vec_madd(temp4, vcii(n,p,n,p), temp3); // then butterfly with buf[p] and buf[p+1] vecp = vec_ld(p << 3, (float*)buf); temp1 = vec_add(vecp, vecq); temp2 = vec_sub(vecp, vecq); vec_st(temp1, p << 3, (float*)buf); vec_st(temp2, q << 3, (float*)buf);#endif } } } /* Post IFFT complex multiply plus IFFT complex conjugate*/ for( i=0; i < 128; i+=4) { /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */#if 0 tmp_a_r = buf[(i + 0)].real; tmp_a_i = -1.0 * buf[(i + 0)].imag; buf[(i + 0)].real = (tmp_a_r * xcos1[(i + 0)]) - (tmp_a_i * xsin1[(i + 0)]); buf[(i + 0)].imag = (tmp_a_r * xsin1[(i + 0)]) + (tmp_a_i * xcos1[(i + 0)]); tmp_a_r = buf[(i + 1)].real; tmp_a_i = -1.0 * buf[(i + 1)].imag; buf[(i + 1)].real = (tmp_a_r * xcos1[(i + 1)]) - (tmp_a_i * xsin1[(i + 1)]); buf[(i + 1)].imag = (tmp_a_r * xsin1[(i + 1)]) + (tmp_a_i * xcos1[(i + 1)]); tmp_a_r = buf[(i + 2)].real; tmp_a_i = -1.0 * buf[(i + 2)].imag; buf[(i + 2)].real = (tmp_a_r * xcos1[(i + 2)]) - (tmp_a_i * xsin1[(i + 2)]); buf[(i + 2)].imag = (tmp_a_r * xsin1[(i + 2)]) + (tmp_a_i * xcos1[(i + 2)]); tmp_a_r = buf[(i + 3)].real; tmp_a_i = -1.0 * buf[(i + 3)].imag; buf[(i + 3)].real = (tmp_a_r * xcos1[(i + 3)]) - (tmp_a_i * xsin1[(i + 3)]); buf[(i + 3)].imag = (tmp_a_r * xsin1[(i + 3)]) + (tmp_a_i * xcos1[(i + 3)]);#else vector float bufv_0, bufv_2, cosv, sinv, temp1, temp2; vector float temp0022, temp1133, tempCS01; const vector float vczero = (const vector float)FOUROF(0.); bufv_0 = vec_ld((i + 0) << 3, (float*)buf); bufv_2 = vec_ld((i + 2) << 3, (float*)buf); cosv = vec_ld(i << 2, xcos1); sinv = vec_ld(i << 2, xsin1); temp0022 = vec_perm(bufv_0, bufv_0, vcprm(0,0,2,2)); temp1133 = vec_perm(bufv_0, bufv_0, vcprm(1,1,3,3)); tempCS01 = vec_perm(cosv, sinv, vcprm(0,s0,1,s1)); temp1 = vec_madd(temp0022, tempCS01, vczero); tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1)); temp2 = vec_madd(temp1133, tempCS01, vczero); bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1); vec_st(bufv_0, (i + 0) << 3, (float*)buf); /* idem with bufv_2 and high-order cosv/sinv */ temp0022 = vec_perm(bufv_2, bufv_2, vcprm(0,0,2,2)); temp1133 = vec_perm(bufv_2, bufv_2, vcprm(1,1,3,3)); tempCS01 = vec_perm(cosv, sinv, vcprm(2,s2,3,s3)); temp1 = vec_madd(temp0022, tempCS01, vczero); tempCS01 = vec_perm(cosv, sinv, vcprm(s2,2,s3,3)); temp2 = vec_madd(temp1133, tempCS01, vczero); bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1); vec_st(bufv_2, (i + 2) << 3, (float*)buf); #endif } data_ptr = data; delay_ptr = delay; window_ptr = imdct_window; /* Window and convert to real valued signal */ for(i=0; i< 64; i++) { *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias; *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias; } for(i=0; i< 64; i++) { *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias; *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias; } /* The trailing edge of the window goes into the delay line */ delay_ptr = delay; for(i=0; i< 64; i++) { *delay_ptr++ = -buf[64+i].real * *--window_ptr; *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; } for(i=0; i<64; i++) { *delay_ptr++ = buf[i].imag * *--window_ptr; *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; }}#endif// Stuff below this line is borrowed from libac3#include "srfftp.h"#ifdef ARCH_X86#ifndef HAVE_3DNOW#define HAVE_3DNOW 1#endif#include "srfftp_3dnow.h"const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }}; const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 };#undef HAVE_3DNOWEX#include "imdct_3dnow.h"#define HAVE_3DNOWEX#include "imdct_3dnow.h"voidimdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias){/* int i,k; int p,q;*/ int m; int two_m; int two_m_plus_one;/* sample_t tmp_a_i; sample_t tmp_a_r; sample_t tmp_b_i; sample_t tmp_b_r;*/ sample_t *data_ptr; sample_t *delay_ptr; sample_t *window_ptr; /* 512 IMDCT with source and dest data in 'data' */ /* see the c version (dct_do_512()), its allmost identical, just in C */ /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ /* Bit reversed shuffling */ asm volatile( "xorl %%esi, %%esi \n\t" "leal "MANGLE(bit_reverse_512)", %%eax \n\t" "movl $1008, %%edi \n\t" "pushl %%ebp \n\t" //use ebp without telling gcc ".balign 16 \n\t" "1: \n\t" "movlps (%0, %%esi), %%xmm0 \n\t" // XXXI "movhps 8(%0, %%edi), %%xmm0 \n\t" // RXXI "movlps 8(%0, %%esi), %%xmm1 \n\t" // XXXi "movhps (%0, %%edi), %%xmm1 \n\t" // rXXi
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -