📄 decode_altivec.c
字号:
/* decode.c: decoding samples... copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1 see COPYING and AUTHORS files in distribution or http://mpg123.de initially written by Michael Hipp altivec optimization by tmkk*/#include <stdlib.h>#include <math.h>#include <string.h>#include "config.h"#include "mpg123.h"#ifndef __APPLE__#include <altivec.h>#endif#define WRITE_SAMPLE(samples,sum,clip) \ if( (sum) > REAL_PLUS_32767) { *(samples) = 0x7fff; (clip)++; } \ else if( (sum) < REAL_MINUS_32768) { *(samples) = -0x8000; (clip)++; } \ else { *(samples) = REAL_TO_SHORT(sum); }int synth_1to1_8bit(real *bandPtr,int channel,unsigned char *samples,int *pnt){ short samples_tmp[64]; short *tmp1 = samples_tmp + channel; int i,ret; int pnt1=0; ret = synth_1to1(bandPtr,channel,(unsigned char *) samples_tmp,&pnt1); samples += channel + *pnt; for(i=0;i<32;i++) { *samples = conv16to8[*tmp1>>AUSHIFT]; samples += 2; tmp1 += 2; } *pnt += 64; return ret;}int synth_1to1_8bit_mono(real *bandPtr,unsigned char *samples,int *pnt){ short samples_tmp[64]; short *tmp1 = samples_tmp; int i,ret; int pnt1 = 0; ret = synth_1to1(bandPtr,0,(unsigned char *) samples_tmp,&pnt1); samples += *pnt; for(i=0;i<32;i++) { *samples++ = conv16to8[*tmp1>>AUSHIFT]; tmp1 += 2; } *pnt += 32; return ret;}int synth_1to1_8bit_mono2stereo(real *bandPtr,unsigned char *samples,int *pnt){ short samples_tmp[64]; short *tmp1 = samples_tmp; int i,ret; int pnt1 = 0; ret = synth_1to1(bandPtr,0,(unsigned char *) samples_tmp,&pnt1); samples += *pnt; for(i=0;i<32;i++) { *samples++ = conv16to8[*tmp1>>AUSHIFT]; *samples++ = conv16to8[*tmp1>>AUSHIFT]; tmp1 += 2; } *pnt += 64; return ret;}int synth_1to1_mono(real *bandPtr,unsigned char *samples,int *pnt){ short samples_tmp[64]; short *tmp1 = samples_tmp; int i,ret; int pnt1 = 0; ret = synth_1to1(bandPtr,0,(unsigned char *) samples_tmp,&pnt1); samples += *pnt; for(i=0;i<32;i++) { *( (short *)samples) = *tmp1; samples += 2; tmp1 += 2; } *pnt += 64; return ret;}int synth_1to1_mono2stereo(real *bandPtr,unsigned char *samples,int *pnt){ int i,ret; ret = synth_1to1(bandPtr,0,samples,pnt); samples = samples + *pnt - 128; for(i=0;i<32;i++) { ((short *)samples)[1] = ((short *)samples)[0]; samples+=4; } return ret;}int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt){ static real __attribute__ ((aligned (16))) buffs[4][4][0x110]; static const int step = 2; static int bo = 1; short *samples = (short *) (out+*pnt); real *b0,(*buf)[0x110]; int clip = 0; int bo1; if(have_eq_settings) do_equalizer(bandPtr,channel); if(!channel) { bo--; bo &= 0xf; buf = buffs[0]; } else { samples++; buf = buffs[1]; } if(bo & 0x1) { b0 = buf[0]; bo1 = bo; dct64(buf[1]+((bo+1)&0xf),buf[0]+bo,bandPtr); } else { b0 = buf[1]; bo1 = bo+1; dct64(buf[0]+bo,buf[1]+bo+1,bandPtr); } { register int j; real *window = decwin + 16 - bo1; int __attribute__ ((aligned (16))) clip_tmp[4]; vector float v1,v2,v3,v4,v5,v6,v7,v8,v9; vector unsigned char vperm1,vperm2,vperm3,vperm4,vperm5; vector float vsum,vsum2,vsum3,vsum4,vmin,vmax; vector signed int vclip; vector signed short vsample1,vsample2; vclip = vec_xor(vclip,vclip);#ifdef __APPLE__ vmax = (vector float)(32767.0f); vmin = (vector float)(-32768.0f); vperm5 = (vector unsigned char)(0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31);#else vmax = (vector float){32767.0f,32767.0f,32767.0f,32767.0f}; vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f}; vperm5 = (vector unsigned char){0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31};#endif vperm1 = vec_lvsl(0,window); vperm3 = vec_lvsl(0,samples); vperm4 = vec_lvsr(0,samples); for (j=4;j;j--) { vsum = vec_xor(vsum,vsum); vsum2 = vec_xor(vsum2,vsum2); vsum3 = vec_xor(vsum3,vsum3); vsum4 = vec_xor(vsum4,vsum4); v1 = vec_ld(0,window); v2 = vec_ld(16,window); v3 = vec_ld(32,window); v4 = vec_ld(48,window); v5 = vec_ld(64,window); v1 = vec_perm(v1,v2,vperm1); v6 = vec_ld(0,b0); v2 = vec_perm(v2,v3,vperm1); v7 = vec_ld(16,b0); v3 = vec_perm(v3,v4,vperm1); v8 = vec_ld(32,b0); v4 = vec_perm(v4,v5,vperm1); v9 = vec_ld(48,b0); vsum = vec_madd(v1,v6,vsum); vsum = vec_madd(v2,v7,vsum); vsum = vec_madd(v3,v8,vsum); vsum = vec_madd(v4,v9,vsum); window += 32; b0 += 16; v1 = vec_ld(0,window); v2 = vec_ld(16,window); v3 = vec_ld(32,window); v4 = vec_ld(48,window); v5 = vec_ld(64,window); v1 = vec_perm(v1,v2,vperm1); v6 = vec_ld(0,b0); v2 = vec_perm(v2,v3,vperm1); v7 = vec_ld(16,b0); v3 = vec_perm(v3,v4,vperm1); v8 = vec_ld(32,b0); v4 = vec_perm(v4,v5,vperm1); v9 = vec_ld(48,b0); vsum2 = vec_madd(v1,v6,vsum2); vsum2 = vec_madd(v2,v7,vsum2); vsum2 = vec_madd(v3,v8,vsum2); vsum2 = vec_madd(v4,v9,vsum2); window += 32; b0 += 16; v1 = vec_ld(0,window); v2 = vec_ld(16,window); v3 = vec_ld(32,window); v4 = vec_ld(48,window); v5 = vec_ld(64,window); v1 = vec_perm(v1,v2,vperm1); v6 = vec_ld(0,b0); v2 = vec_perm(v2,v3,vperm1); v7 = vec_ld(16,b0); v3 = vec_perm(v3,v4,vperm1); v8 = vec_ld(32,b0); v4 = vec_perm(v4,v5,vperm1); v9 = vec_ld(48,b0); vsum3 = vec_madd(v1,v6,vsum3); vsum3 = vec_madd(v2,v7,vsum3); vsum3 = vec_madd(v3,v8,vsum3); vsum3 = vec_madd(v4,v9,vsum3); window += 32; b0 += 16; v1 = vec_ld(0,window); v2 = vec_ld(16,window); v3 = vec_ld(32,window); v4 = vec_ld(48,window); v5 = vec_ld(64,window); v1 = vec_perm(v1,v2,vperm1); v6 = vec_ld(0,b0); v2 = vec_perm(v2,v3,vperm1); v7 = vec_ld(16,b0); v3 = vec_perm(v3,v4,vperm1); v8 = vec_ld(32,b0); v4 = vec_perm(v4,v5,vperm1); v9 = vec_ld(48,b0); vsum4 = vec_madd(v1,v6,vsum4); vsum4 = vec_madd(v2,v7,vsum4); vsum4 = vec_madd(v3,v8,vsum4); vsum4 = vec_madd(v4,v9,vsum4); window += 32; b0 += 16; v1 = vec_mergeh(vsum,vsum3); v2 = vec_mergeh(vsum2,vsum4); v3 = vec_mergel(vsum,vsum3); v4 = vec_mergel(vsum2,vsum4); v5 = vec_mergeh(v1,v2); v6 = vec_mergel(v1,v2); v7 = vec_mergeh(v3,v4); v8 = vec_mergel(v3,v4); vsum = vec_sub(v5,v6); v9 = vec_sub(v7,v8); vsum = vec_add(vsum,v9); v3 = (vector float)vec_cts(vsum,0); v1 = (vector float)vec_cmpgt(vsum,vmax); v2 = (vector float)vec_cmplt(vsum,vmin); vsample1 = vec_ld(0,samples); vsample2 = vec_ld(15,samples);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -