📄 filtbank.c
字号:
#ifdef PROFILE int64_t count = faad_get_ts();#endif#ifdef LD_DEC if (object_type == LD) { window_long = fb->ld_window[window_shape]; window_long_prev = fb->ld_window[window_shape_prev]; } else {#endif window_long = fb->long_window[window_shape]; window_long_prev = fb->long_window[window_shape_prev]; window_short = fb->short_window[window_shape]; window_short_prev = fb->short_window[window_shape_prev];#ifdef LD_DEC }#endif switch (window_sequence) { case ONLY_LONG_SEQUENCE: imdct_long_sse(fb, freq_in, transf_buf, 2*nlong); for (i = 0; i < nlong; i+=4) { __m128 m1, m2, m3, m4, m5, m6, m7, m8; m1 = _mm_load_ps(&transf_buf[i]); m2 = _mm_load_ps(&window_long_prev[i]); m6 = _mm_load_ps(&window_long[nlong-4-i]); m3 = _mm_load_ps(&time_out[nlong+i]); m5 = _mm_load_ps(&transf_buf[nlong+i]); m4 = _mm_mul_ps(m1, m2); m7 = _mm_shuffle_ps(m6, m6, _MM_SHUFFLE(0, 1, 2, 3)); m4 = _mm_add_ps(m4, m3); m8 = _mm_mul_ps(m5, m7); _mm_store_ps(&time_out[i], m4); _mm_store_ps(&time_out[nlong+i], m8); } break; case LONG_START_SEQUENCE: imdct_long_sse(fb, freq_in, transf_buf, 2*nlong); for (i = 0; i < nlong; i+=4) { __m128 m1 = _mm_load_ps(&transf_buf[i]); __m128 m2 = _mm_load_ps(&window_long_prev[i]); __m128 m3 = _mm_load_ps(&time_out[nlong+i]); __m128 m4 = _mm_mul_ps(m1, m2); m4 = _mm_add_ps(m4, m3); _mm_store_ps(&time_out[i], m4); } for (i = 0; i < nflat_ls; i+=4) { __m128 m1 = _mm_load_ps(&transf_buf[nlong+i]); _mm_store_ps(&time_out[nlong+i], m1); } for (i = 0; i < nshort; i+=4) { __m128 m1 = _mm_load_ps(&transf_buf[nlong+nflat_ls+i]); __m128 m2 = _mm_load_ps(&window_short[nshort-4-i]); __m128 m3, m4; m3 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); m4 = _mm_mul_ps(m1, m3); _mm_store_ps(&time_out[nlong+nflat_ls+i], m4); } for (i = 0; i < nflat_ls; i+=4) { __m128 m1 = _mm_setzero_ps(); _mm_store_ps(&time_out[nlong+nflat_ls+nshort+i], m1); } break; case EIGHT_SHORT_SEQUENCE: faad_imdct_sse(fb->mdct256, &freq_in[0*nshort], &transf_buf[2*nshort*0]); faad_imdct_sse(fb->mdct256, &freq_in[1*nshort], &transf_buf[2*nshort*1]); faad_imdct_sse(fb->mdct256, &freq_in[2*nshort], &transf_buf[2*nshort*2]); faad_imdct_sse(fb->mdct256, &freq_in[3*nshort], &transf_buf[2*nshort*3]); faad_imdct_sse(fb->mdct256, &freq_in[4*nshort], &transf_buf[2*nshort*4]); faad_imdct_sse(fb->mdct256, &freq_in[5*nshort], &transf_buf[2*nshort*5]); faad_imdct_sse(fb->mdct256, &freq_in[6*nshort], &transf_buf[2*nshort*6]); faad_imdct_sse(fb->mdct256, &freq_in[7*nshort], &transf_buf[2*nshort*7]); for (i = 0; i < nflat_ls; i+=4) { __m128 m1 = _mm_load_ps(&time_out[nlong+i]); _mm_store_ps(&time_out[i], m1); } for (i = 0; i < nshort; i+=4) { __m128 m1 = _mm_load_ps(&transf_buf[nshort*0+i]); __m128 m2 = _mm_load_ps(&window_short_prev[i]); __m128 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+i]); __m128 m4 = _mm_mul_ps(m1, m2); m4 = _mm_add_ps(m4, m3); _mm_store_ps(&time_out[nflat_ls+i], m4); } for (i = 0; i < nshort; i+=4) { __m128 m1, m2, m3, m4, m5, m6, m7, m8; m1 = _mm_load_ps(&transf_buf[nshort*1+i]); m2 = _mm_load_ps(&window_short[nshort-4-i]); m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*1+i]); m6 = _mm_load_ps(&transf_buf[nshort*2+i]); m7 = _mm_load_ps(&window_short[i]); m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); m4 = _mm_mul_ps(m1, m5); m8 = _mm_mul_ps(m6, m7); m4 = _mm_add_ps(m4, m3); m4 = _mm_add_ps(m4, m8); _mm_store_ps(&time_out[nflat_ls+1*nshort+i], m4); } for (i = 0; i < nshort; i+=4) { __m128 m1, m2, m3, m4, m5, m6, m7, m8; m1 = _mm_load_ps(&transf_buf[nshort*3+i]); m2 = _mm_load_ps(&window_short[nshort-4-i]); m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*2+i]); m6 = _mm_load_ps(&transf_buf[nshort*4+i]); m7 = _mm_load_ps(&window_short[i]); m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); m4 = _mm_mul_ps(m1, m5); m8 = _mm_mul_ps(m6, m7); m4 = _mm_add_ps(m4, m3); m4 = _mm_add_ps(m4, m8); _mm_store_ps(&time_out[nflat_ls+2*nshort+i], m4); } for (i = 0; i < nshort; i+=4) { __m128 m1, m2, m3, m4, m5, m6, m7, m8; m1 = _mm_load_ps(&transf_buf[nshort*5+i]); m2 = _mm_load_ps(&window_short[nshort-4-i]); m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*3+i]); m6 = _mm_load_ps(&transf_buf[nshort*6+i]); m7 = _mm_load_ps(&window_short[i]); m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); m4 = _mm_mul_ps(m1, m5); m8 = _mm_mul_ps(m6, m7); m4 = _mm_add_ps(m4, m3); m4 = _mm_add_ps(m4, m8); _mm_store_ps(&time_out[nflat_ls+3*nshort+i], m4); } for(i = 0; i < trans; i+=4) { __m128 m1, m2, m3, m4, m5, m6, m7, m8; m1 = _mm_load_ps(&transf_buf[nshort*7+i]); m2 = _mm_load_ps(&window_short[nshort-4-i]); m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*4+i]); m6 = _mm_load_ps(&transf_buf[nshort*8+i]); m7 = _mm_load_ps(&window_short[i]); m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); m4 = _mm_mul_ps(m1, m5); m8 = _mm_mul_ps(m6, m7); m4 = _mm_add_ps(m4, m3); m4 = _mm_add_ps(m4, m8); _mm_store_ps(&time_out[nflat_ls+4*nshort+i], m4); } for (i = trans; i < nshort; i+=4) { __m128 m1, m2, m3, m4, m5, m6, m7, m8; m1 = _mm_load_ps(&transf_buf[nshort*7+i]); m2 = _mm_load_ps(&window_short[nshort-4-i]); m6 = _mm_load_ps(&transf_buf[nshort*8+i]); m7 = _mm_load_ps(&window_short[i]); m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); m4 = _mm_mul_ps(m1, m5); m8 = _mm_mul_ps(m6, m7); m3 = _mm_add_ps(m4, m8); _mm_store_ps(&time_out[nflat_ls+4*nshort+i], m3); } for (i = 0; i < nshort; i+=4) { __m128 m1, m2, m3, m4, m5, m6, m7, m8; m1 = _mm_load_ps(&transf_buf[nshort*9+i]); m2 = _mm_load_ps(&window_short[nshort-4-i]); m6 = _mm_load_ps(&transf_buf[nshort*10+i]); m7 = _mm_load_ps(&window_short[i]); m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); m4 = _mm_mul_ps(m1, m5); m8 = _mm_mul_ps(m6, m7); m3 = _mm_add_ps(m4, m8); _mm_store_ps(&time_out[nflat_ls+5*nshort+i], m3); } for (i = 0; i < nshort; i+=4) { __m128 m1, m2, m3, m4, m5, m6, m7, m8; m1 = _mm_load_ps(&transf_buf[nshort*11+i]); m2 = _mm_load_ps(&window_short[nshort-4-i]); m6 = _mm_load_ps(&transf_buf[nshort*12+i]); m7 = _mm_load_ps(&window_short[i]); m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); m4 = _mm_mul_ps(m1, m5); m8 = _mm_mul_ps(m6, m7); m3 = _mm_add_ps(m4, m8); _mm_store_ps(&time_out[nflat_ls+6*nshort+i], m3); } for (i = 0; i < nshort; i+=4) { __m128 m1, m2, m3, m4, m5, m6, m7, m8; m1 = _mm_load_ps(&transf_buf[nshort*13+i]); m2 = _mm_load_ps(&window_short[nshort-4-i]); m6 = _mm_load_ps(&transf_buf[nshort*14+i]); m7 = _mm_load_ps(&window_short[i]); m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); m4 = _mm_mul_ps(m1, m5); m8 = _mm_mul_ps(m6, m7); m3 = _mm_add_ps(m4, m8); _mm_store_ps(&time_out[nflat_ls+7*nshort+i], m3); } for (i = 0; i < nshort; i+=4) { __m128 m1, m2, m3, m5; m1 = _mm_load_ps(&transf_buf[nshort*15+i]); m2 = _mm_load_ps(&window_short[nshort-4-i]); m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); m3 = _mm_mul_ps(m1, m5); _mm_store_ps(&time_out[nflat_ls+8*nshort+i], m3); } for (i = 0; i < nflat_ls; i+=4) { __m128 m1 = _mm_setzero_ps(); _mm_store_ps(&time_out[nlong+nflat_ls+nshort+i], m1); } break; case LONG_STOP_SEQUENCE: imdct_long_sse(fb, freq_in, transf_buf, 2*nlong); for (i = 0; i < nflat_ls; i+=4) { __m128 m1 = _mm_load_ps(&time_out[nlong+i]); _mm_store_ps(&time_out[i], m1); } for (i = 0; i < nshort; i+=4) { __m128 m1 = _mm_load_ps(&transf_buf[nflat_ls+i]); __m128 m2 = _mm_load_ps(&window_short_prev[i]); __m128 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+i]); __m128 m4 = _mm_mul_ps(m1, m2); m4 = _mm_add_ps(m4, m3); _mm_store_ps(&time_out[nflat_ls+i], m4); } for (i = 0; i < nflat_ls; i+=4) { __m128 m1 = _mm_load_ps(&transf_buf[nflat_ls+nshort+i]); __m128 m2 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort+i]); __m128 m3 = _mm_add_ps(m1, m2); _mm_store_ps(&time_out[nflat_ls+nshort+i], m3); } for (i = 0; i < nlong; i+=4) { __m128 m1 = _mm_load_ps(&transf_buf[nlong+i]); __m128 m2 = _mm_load_ps(&window_long[nlong-4-i]); __m128 m3, m4; m3 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); m4 = _mm_mul_ps(m1, m3); _mm_store_ps(&time_out[nlong+i], m4); } break; }#ifdef PROFILE count = faad_get_ts() - count; fb->cycles += count;#endif}#endif#ifdef LTP_DEC/* only works for LTP -> no overlapping, no short blocks */void filter_bank_ltp(fb_info *fb, uint8_t window_sequence, uint8_t window_shape, uint8_t window_shape_prev, real_t *in_data, real_t *out_mdct, uint8_t object_type, uint16_t frame_len){ int16_t i; ALIGN real_t windowed_buf[2*1024] = {0}; const real_t *window_long = NULL; const real_t *window_long_prev = NULL; const real_t *window_short = NULL; const real_t *window_short_prev = NULL; uint16_t nlong = frame_len; uint16_t nshort = frame_len/8; uint16_t nflat_ls = (nlong-nshort)/2; assert(window_sequence != EIGHT_SHORT_SEQUENCE);#ifdef LD_DEC if (object_type == LD) { window_long = fb->ld_window[window_shape]; window_long_prev = fb->ld_window[window_shape_prev]; } else {#endif window_long = fb->long_window[window_shape]; window_long_prev = fb->long_window[window_shape_prev]; window_short = fb->short_window[window_shape]; window_short_prev = fb->short_window[window_shape_prev];#ifdef LD_DEC }#endif switch(window_sequence) { case ONLY_LONG_SEQUENCE: for (i = nlong-1; i >= 0; i--) { windowed_buf[i] = MUL_F(in_data[i], window_long_prev[i]); windowed_buf[i+nlong] = MUL_F(in_data[i+nlong], window_long[nlong-1-i]); } mdct(fb, windowed_buf, out_mdct, 2*nlong); break; case LONG_START_SEQUENCE: for (i = 0; i < nlong; i++) windowed_buf[i] = MUL_F(in_data[i], window_long_prev[i]); for (i = 0; i < nflat_ls; i++) windowed_buf[i+nlong] = in_data[i+nlong]; for (i = 0; i < nshort; i++) windowed_buf[i+nlong+nflat_ls] = MUL_F(in_data[i+nlong+nflat_ls], window_short[nshort-1-i]); for (i = 0; i < nflat_ls; i++) windowed_buf[i+nlong+nflat_ls+nshort] = 0; mdct(fb, windowed_buf, out_mdct, 2*nlong); break; case LONG_STOP_SEQUENCE: for (i = 0; i < nflat_ls; i++) windowed_buf[i] = 0; for (i = 0; i < nshort; i++) windowed_buf[i+nflat_ls] = MUL_F(in_data[i+nflat_ls], window_short_prev[i]); for (i = 0; i < nflat_ls; i++) windowed_buf[i+nflat_ls+nshort] = in_data[i+nflat_ls+nshort]; for (i = 0; i < nlong; i++) windowed_buf[i+nlong] = MUL_F(in_data[i+nlong], window_long[nlong-1-i]); mdct(fb, windowed_buf, out_mdct, 2*nlong); break; }}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -