📄 cfft.c
字号:
/*** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com**** This program is free software; you can redistribute it and/or modify** it under the terms of the GNU General Public License as published by** the Free Software Foundation; either version 2 of the License, or** (at your option) any later version.**** This program is distributed in the hope that it will be useful,** but WITHOUT ANY WARRANTY; without even the implied warranty of** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the** GNU General Public License for more details.**** You should have received a copy of the GNU General Public License** along with this program; if not, write to the Free Software** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.**** Any non-GPL usage of this software or parts of this software is strictly** forbidden.**** Commercial non-GPL licensing of this software is possible.** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.**** $Id: cfft.c,v 1.27 2004/06/30 12:45:55 menno Exp $**//* * Algorithmically based on Fortran-77 FFTPACK * by Paul N. Swarztrauber(Version 4, 1985). * * Does even sized fft only *//* isign is +1 for backward and -1 for forward transforms */#include "common.h"#include "structs.h"#include <stdlib.h>#include "cfft.h"#include "cfft_tab.h"/* static function declarations */#ifdef USE_SSEstatic void passf2pos_sse(const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa);static void passf2pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa);static void passf4pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3);#endifstatic void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa);static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa);static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const int8_t isign);static void passf4pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3);static void passf4neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3);static void passf5(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3, const complex_t *wa4, const int8_t isign);INLINE void cfftf1(uint16_t n, complex_t *c, complex_t *ch, const uint16_t *ifac, const complex_t *wa, const int8_t isign);static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac);/*---------------------------------------------------------------------- passf2, passf3, passf4, passf5. Complex FFT passes fwd and bwd. ----------------------------------------------------------------------*/#if 0 //def USE_SSEstatic void passf2pos_sse(const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa){ uint16_t k, ah, ac; for (k = 0; k < l1; k++) { ah = 2*k; ac = 4*k; RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); }}static void passf2pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa){ uint16_t i, k, ah, ac; for (k = 0; k < l1; k++) { ah = k*ido; ac = 2*k*ido; for (i = 0; i < ido; i+=4) { __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14; __m128 m15, m16, m17, m18, m19, m20, m21, m22, m23, m24; __m128 w1, w2, w3, w4; m1 = _mm_load_ps(&RE(cc[ac+i])); m2 = _mm_load_ps(&RE(cc[ac+ido+i])); m5 = _mm_load_ps(&RE(cc[ac+i+2])); m6 = _mm_load_ps(&RE(cc[ac+ido+i+2])); w1 = _mm_load_ps(&RE(wa[i])); w3 = _mm_load_ps(&RE(wa[i+2])); m3 = _mm_add_ps(m1, m2); m15 = _mm_add_ps(m5, m6); m4 = _mm_sub_ps(m1, m2); m16 = _mm_sub_ps(m5, m6); _mm_store_ps(&RE(ch[ah+i]), m3); _mm_store_ps(&RE(ch[ah+i+2]), m15); w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1)); w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1)); m7 = _mm_mul_ps(m4, w1); m17 = _mm_mul_ps(m16, w3); m8 = _mm_mul_ps(m4, w2); m18 = _mm_mul_ps(m16, w4); m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0)); m19 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(2, 0, 2, 0)); m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1)); m20 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(3, 1, 3, 1)); m11 = _mm_add_ps(m9, m10); m21 = _mm_add_ps(m19, m20); m12 = _mm_sub_ps(m9, m10); m22 = _mm_sub_ps(m19, m20); m13 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(0, 0, 3, 2)); m23 = _mm_shuffle_ps(m21, m21, _MM_SHUFFLE(0, 0, 3, 2)); m14 = _mm_unpacklo_ps(m12, m13); m24 = _mm_unpacklo_ps(m22, m23); _mm_store_ps(&RE(ch[ah+i+l1*ido]), m14); _mm_store_ps(&RE(ch[ah+i+2+l1*ido]), m24); } }}#endifstatic void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa){ uint16_t i, k, ah, ac; if (ido == 1) { for (k = 0; k < l1; k++) { ah = 2*k; ac = 4*k; RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); } } else { for (k = 0; k < l1; k++) { ah = k*ido; ac = 2*k*ido; for (i = 0; i < ido; i++) { complex_t t2; RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]); RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]); IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]); IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]);#if 1 ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), IM(t2), RE(t2), RE(wa[i]), IM(wa[i]));#else ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), RE(t2), IM(t2), RE(wa[i]), IM(wa[i]));#endif } } }}static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa){ uint16_t i, k, ah, ac; if (ido == 1) { for (k = 0; k < l1; k++) { ah = 2*k; ac = 4*k; RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); } } else { for (k = 0; k < l1; k++) { ah = k*ido; ac = 2*k*ido; for (i = 0; i < ido; i++) { complex_t t2; RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]); RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]); IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]); IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]);#if 1 ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), RE(t2), IM(t2), RE(wa[i]), IM(wa[i]));#else ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), IM(t2), RE(t2), RE(wa[i]), IM(wa[i]));#endif } } }}static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, const complex_t *wa1, const complex_t *wa2, const int8_t isign){ static real_t taur = FRAC_CONST(-0.5); static real_t taui = FRAC_CONST(0.866025403784439); uint16_t i, k, ac, ah; complex_t c2, c3, d2, d3, t2; if (ido == 1) { if (isign == 1) { for (k = 0; k < l1; k++) { ac = 3*k+1; ah = k; RE(t2) = RE(cc[ac]) + RE(cc[ac+1]); IM(t2) = IM(cc[ac]) + IM(cc[ac+1]); RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),taur); IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),taur); RE(ch[ah]) = RE(cc[ac-1]) + RE(t2); IM(ch[ah]) = IM(cc[ac-1]) + IM(t2); RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+1])), taui); IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+1])), taui); RE(ch[ah+l1]) = RE(c2) - IM(c3); IM(ch[ah+l1]) = IM(c2) + RE(c3); RE(ch[ah+2*l1]) = RE(c2) + IM(c3); IM(ch[ah+2*l1]) = IM(c2) - RE(c3); } } else { for (k = 0; k < l1; k++) { ac = 3*k+1; ah = k; RE(t2) = RE(cc[ac]) + RE(cc[ac+1]); IM(t2) = IM(cc[ac]) + IM(cc[ac+1]); RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),taur); IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),taur); RE(ch[ah]) = RE(cc[ac-1]) + RE(t2); IM(ch[ah]) = IM(cc[ac-1]) + IM(t2); RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+1])), taui); IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+1])), taui); RE(ch[ah+l1]) = RE(c2) + IM(c3); IM(ch[ah+l1]) = IM(c2) - RE(c3); RE(ch[ah+2*l1]) = RE(c2) - IM(c3); IM(ch[ah+2*l1]) = IM(c2) + RE(c3); } } } else { if (isign == 1) { for (k = 0; k < l1; k++) { for (i = 0; i < ido; i++) { ac = i + (3*k+1)*ido; ah = i + k * ido; RE(t2) = RE(cc[ac]) + RE(cc[ac+ido]); RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),taur); IM(t2) = IM(cc[ac]) + IM(cc[ac+ido]); IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),taur); RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2); IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2); RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+ido])), taui); IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+ido])), taui); RE(d2) = RE(c2) - IM(c3); IM(d3) = IM(c2) - RE(c3); RE(d3) = RE(c2) + IM(c3); IM(d2) = IM(c2) + RE(c3);#if 1 ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i]));#else ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i]));#endif } } } else { for (k = 0; k < l1; k++) { for (i = 0; i < ido; i++) { ac = i + (3*k+1)*ido;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -