📄 liba52_changes.diff
字号:
--- include/a52.h 2005-03-22 19:58:53.000000000 +0100+++ a52.h 2004-03-19 01:15:49.000000000 +0100@@ -19,6 +25,9 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#ifndef A52_H+#define A52_H+ #ifndef LIBA52_DOUBLE typedef float sample_t; #else@@ -113,3 +122,10 @@ void a52_dynrng (a52_state_t * state, sample_t (* call) (sample_t, void *), void * data); int a52_block (a52_state_t * state, sample_t * samples);++void* a52_resample_init(uint32_t mm_accel,int flags,int chans);+extern int (* a52_resample) (float * _f, int16_t * s16);++uint16_t crc16_block(uint8_t *data,uint32_t num_bytes);++#endif /* A52_H */--- liba52/a52_internal.h 2005-03-22 19:59:35.000000000 +0100+++ a52_internal.h 2004-03-19 01:15:49.000000000 +0100@@ -41,11 +43,12 @@ int downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev);+void downmix_accel_init(uint32_t mm_accel); int downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, sample_t clev, sample_t slev);-void downmix (sample_t * samples, int acmod, int output, sample_t bias,+extern void (*downmix) (sample_t * samples, int acmod, int output, sample_t bias, sample_t clev, sample_t slev);-void upmix (sample_t * samples, int acmod, int output);+extern void (*upmix) (sample_t * samples, int acmod, int output); void imdct_init (uint32_t mm_accel); extern void (* imdct_256) (sample_t * data, sample_t * delay, sample_t bias);--- liba52/bitstream.c 2005-03-22 19:59:35.000000000 +0100+++ bitstream.c 2004-03-19 01:15:49.000000000 +0100@@ -29,7 +35,12 @@ #define BUFFER_SIZE 4096 +#ifdef ALT_BITSTREAM_READER+int indx=0;+uint32_t * buffer_start;+#else static uint32_t * buffer_start;+#endif uint32_t bits_left; uint32_t current_word;@@ -41,6 +52,9 @@ align = (int)buf & 3; buffer_start = (uint32_t *) (buf - align); bits_left = 0;+#ifdef ALT_BITSTREAM_READER+ indx=0;+#endif bitstream_get (align * 8); } --- liba52/bitstream.h 2005-03-22 19:59:35.000000000 +0100+++ bitstream.h 2004-03-19 01:15:49.000000000 +0100@@ -19,6 +25,48 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* code from ffmpeg/libavcodec */+#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC_ == 3 && __GNUC_MINOR__ > 0)+# define always_inline __attribute__((always_inline)) inline+#else+# define always_inline inline+#endif++#if defined(__sparc__) || defined(hpux)+/*+ * the alt bitstream reader performs unaligned memory accesses; that doesn't work+ * on sparc/hpux. For now, disable ALT_BITSTREAM_READER.+ */+#undef ALT_BITSTREAM_READER+#else+// alternative (faster) bitstram reader (reades upto 3 bytes over the end of the input)+#define ALT_BITSTREAM_READER++/* used to avoid missaligned exceptions on some archs (alpha, ...) */+#if defined (ARCH_X86) || defined(ARCH_ARMV4L)+# define unaligned32(a) (*(uint32_t*)(a))+#else+# ifdef __GNUC__+static always_inline uint32_t unaligned32(const void *v) {+ struct Unaligned {+ uint32_t i;+ } __attribute__((packed));++ return ((const struct Unaligned *) v)->i;+}+# elif defined(__DECC)+static inline uint32_t unaligned32(const void *v) {+ return *(const __unaligned uint32_t *) v;+}+# else+static inline uint32_t unaligned32(const void *v) {+ return *(const uint32_t *) v;+}+# endif+#endif //!ARCH_X86++#endif+ /* (stolen from the kernel) */ #ifdef WORDS_BIGENDIAN @@ -29,7 +77,7 @@ # if defined (__i386__) # define swab32(x) __i386_swab32(x)- static inline const uint32_t __i386_swab32(uint32_t x)+ static always_inline const uint32_t __i386_swab32(uint32_t x) { __asm__("bswap %0" : "=r" (x) : "0" (x)); return x;@@ -37,25 +85,42 @@ # else -# define swab32(x)\-((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | \- (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3]))-+# define swab32(x) __generic_swab32(x)+ static always_inline const uint32_t __generic_swab32(uint32_t x)+ {+ return ((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) |+ (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3]));+ } # endif #endif +#ifdef ALT_BITSTREAM_READER+extern uint32_t *buffer_start; +extern int indx;+#else extern uint32_t bits_left; extern uint32_t current_word;+#endif void bitstream_set_ptr (uint8_t * buf); uint32_t bitstream_get_bh(uint32_t num_bits); int32_t bitstream_get_bh_2(uint32_t num_bits); + static inline uint32_t -bitstream_get(uint32_t num_bits)+bitstream_get(uint32_t num_bits) // note num_bits is practically a constant due to inlineing {+#ifdef ALT_BITSTREAM_READER+ uint32_t result= swab32( unaligned32(((uint8_t *)buffer_start)+(indx>>3)) );++ result<<= (indx&0x07);+ result>>= 32 - num_bits;+ indx+= num_bits;+ + return result;+#else uint32_t result;- + if(num_bits < bits_left) { result = (current_word << (32 - bits_left)) >> (32 - num_bits); bits_left -= num_bits;@@ -63,11 +128,30 @@ } return bitstream_get_bh(num_bits);+#endif+}++static inline void bitstream_skip(int num_bits)+{+#ifdef ALT_BITSTREAM_READER+ indx+= num_bits;+#else+ bitstream_get(num_bits);+#endif } static inline int32_t bitstream_get_2(uint32_t num_bits) {+#ifdef ALT_BITSTREAM_READER+ int32_t result= swab32( unaligned32(((uint8_t *)buffer_start)+(indx>>3)) );++ result<<= (indx&0x07);+ result>>= 32 - num_bits;+ indx+= num_bits;+ + return result;+#else int32_t result; if(num_bits < bits_left) {@@ -77,4 +161,5 @@ } return bitstream_get_bh_2(num_bits);+#endif }--- liba52/downmix.c 2005-03-22 19:59:35.000000000 +0100+++ downmix.c 2004-04-12 18:42:14.000000000 +0200@@ -17,18 +23,46 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA+ *+ * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) */ #include "config.h" -#include <inttypes.h> #include <string.h>+#include <inttypes.h> #include "a52.h" #include "a52_internal.h"+#include "mm_accel.h" #define CONVERT(acmod,output) (((output) << 3) + (acmod)) ++void (*downmix)(sample_t * samples, int acmod, int output, sample_t bias,+ sample_t clev, sample_t slev)= NULL;+void (*upmix)(sample_t * samples, int acmod, int output)= NULL;++static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,+ sample_t clev, sample_t slev);+static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,+ sample_t clev, sample_t slev);+static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,+ sample_t clev, sample_t slev);+static void upmix_MMX (sample_t * samples, int acmod, int output);+static void upmix_C (sample_t * samples, int acmod, int output);++void downmix_accel_init(uint32_t mm_accel)+{+ upmix= upmix_C;+ downmix= downmix_C;+#ifdef ARCH_X86 + if(mm_accel & MM_ACCEL_X86_MMX) upmix= upmix_MMX;+ if(mm_accel & MM_ACCEL_X86_SSE) downmix= downmix_SSE;+ if(mm_accel & MM_ACCEL_X86_3DNOW) downmix= downmix_3dnow;+#endif+}+ int downmix_init (int input, int flags, sample_t * level, sample_t clev, sample_t slev) {@@ -61,7 +95,7 @@ output = flags & A52_CHANNEL_MASK; if (output > A52_DOLBY) return -1;-+ output = table[output][input & 7]; if ((output == A52_STEREO) &&@@ -145,7 +179,6 @@ *level *= 1 / (1 + 3 * LEVEL_3DB); break; }- return output; } @@ -440,12 +473,11 @@ static void zero (sample_t * samples) { int i;- for (i = 0; i < 256; i++) samples[i] = 0; } -void downmix (sample_t * samples, int acmod, int output, sample_t bias,+static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, sample_t clev, sample_t slev) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {@@ -557,7 +589,7 @@ break; case CONVERT (A52_3F2R, A52_2F1R):- mix3to2 (samples, bias);+ mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) move2to1 (samples + 768, samples + 512, bias); break; @@ -581,12 +613,12 @@ break; case CONVERT (A52_3F1R, A52_3F2R):- memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t));+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); break; } } -void upmix (sample_t * samples, int acmod, int output)+static void upmix_C (sample_t * samples, int acmod, int output) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { @@ -651,3 +683,1137 @@ goto mix_31to21; } }++#ifdef ARCH_X86+static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)+{+ asm volatile(+ "movlps %2, %%xmm7 \n\t"+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"+ "movl $-1024, %%esi \n\t"+ ".balign 16\n\t"+ "1: \n\t"+ "movaps (%0, %%esi), %%xmm0 \n\t" + "movaps 16(%0, %%esi), %%xmm1 \n\t" + "addps (%1, %%esi), %%xmm0 \n\t" + "addps 16(%1, %%esi), %%xmm1 \n\t" + "addps %%xmm7, %%xmm0 \n\t"+ "addps %%xmm7, %%xmm1 \n\t"+ "movaps %%xmm0, (%1, %%esi) \n\t"+ "movaps %%xmm1, 16(%1, %%esi) \n\t"+ "addl $32, %%esi \n\t"+ " jnz 1b \n\t"+ :: "r" (src+256), "r" (dest+256), "m" (bias)+ : "%esi"+ );+}++static void mix3to1_SSE (sample_t * samples, sample_t bias)+{+ asm volatile(+ "movlps %1, %%xmm7 \n\t"+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"+ "movl $-1024, %%esi \n\t"+ ".balign 16\n\t"+ "1: \n\t"+ "movaps (%0, %%esi), %%xmm0 \n\t" + "movaps 1024(%0, %%esi), %%xmm1 \n\t" + "addps 2048(%0, %%esi), %%xmm0 \n\t" + "addps %%xmm7, %%xmm1 \n\t"+ "addps %%xmm1, %%xmm0 \n\t"+ "movaps %%xmm0, (%0, %%esi) \n\t"+ "addl $16, %%esi \n\t"+ " jnz 1b \n\t"+ :: "r" (samples+256), "m" (bias)+ : "%esi"+ );+}++static void mix4to1_SSE (sample_t * samples, sample_t bias)+{+ asm volatile(+ "movlps %1, %%xmm7 \n\t"+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"+ "movl $-1024, %%esi \n\t"+ ".balign 16\n\t"+ "1: \n\t"+ "movaps (%0, %%esi), %%xmm0 \n\t" + "movaps 1024(%0, %%esi), %%xmm1 \n\t" + "addps 2048(%0, %%esi), %%xmm0 \n\t" + "addps 3072(%0, %%esi), %%xmm1 \n\t" + "addps %%xmm7, %%xmm0 \n\t"+ "addps %%xmm1, %%xmm0 \n\t"+ "movaps %%xmm0, (%0, %%esi) \n\t"+ "addl $16, %%esi \n\t"+ " jnz 1b \n\t"+ :: "r" (samples+256), "m" (bias)+ : "%esi"+ );+}++static void mix5to1_SSE (sample_t * samples, sample_t bias)+{+ asm volatile(+ "movlps %1, %%xmm7 \n\t"+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"+ "movl $-1024, %%esi \n\t"+ ".balign 16\n\t"+ "1: \n\t"+ "movaps (%0, %%esi), %%xmm0 \n\t" + "movaps 1024(%0, %%esi), %%xmm1 \n\t" + "addps 2048(%0, %%esi), %%xmm0 \n\t" + "addps 3072(%0, %%esi), %%xmm1 \n\t" + "addps %%xmm7, %%xmm0 \n\t"+ "addps 4096(%0, %%esi), %%xmm1 \n\t" + "addps %%xmm1, %%xmm0 \n\t"+ "movaps %%xmm0, (%0, %%esi) \n\t"+ "addl $16, %%esi \n\t"+ " jnz 1b \n\t"+ :: "r" (samples+256), "m" (bias)+ : "%esi"+ );+}++static void mix3to2_SSE (sample_t * samples, sample_t bias)+{+ asm volatile(+ "movlps %1, %%xmm7 \n\t"+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"+ "movl $-1024, %%esi \n\t"+ ".balign 16\n\t"+ "1: \n\t"+ "movaps 1024(%0, %%esi), %%xmm0 \n\t" + "addps %%xmm7, %%xmm0 \n\t" //common+ "movaps (%0, %%esi), %%xmm1 \n\t" + "movaps 2048(%0, %%esi), %%xmm2 \n\t"+ "addps %%xmm0, %%xmm1 \n\t"+ "addps %%xmm0, %%xmm2 \n\t"+ "movaps %%xmm1, (%0, %%esi) \n\t"+ "movaps %%xmm2, 1024(%0, %%esi) \n\t"+ "addl $16, %%esi \n\t"+ " jnz 1b \n\t"+ :: "r" (samples+256), "m" (bias)+ : "%esi"+ );+}++static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)+{+ asm volatile(+ "movlps %2, %%xmm7 \n\t"+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"+ "movl $-1024, %%esi \n\t"+ ".balign 16\n\t"+ "1: \n\t"+ "movaps 1024(%1, %%esi), %%xmm0 \n\t" + "addps %%xmm7, %%xmm0 \n\t" //common+ "movaps (%0, %%esi), %%xmm1 \n\t" + "movaps (%1, %%esi), %%xmm2 \n\t"+ "addps %%xmm0, %%xmm1 \n\t"+ "addps %%xmm0, %%xmm2 \n\t"+ "movaps %%xmm1, (%0, %%esi) \n\t"+ "movaps %%xmm2, (%1, %%esi) \n\t"+ "addl $16, %%esi \n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -