📄 fft_1024.asm
字号:
/*****************************************************************************Copyright (c) 2005 Analog Devices. All Rights Reserved.Developed by Analog Devices Australia - Unit 3, 97 Lewis Road,Wantirna, Victoria, Australia, 3152. Email: ada.info@analog.comTHIS SOFTWARE IS PROPRIETARY & CONFIDENTIAL. By using this module youagree to the terms of the associated Analog Devices License Agreement.******************************************************************************$Revision: 2438 $$Date: 2005-09-13 15:51:40 +1000 (Tue, 13 Sep 2005) $Project: IEEE 802.16 LibraryTitle: 1024-FFTAuthor(s): Andrew Heale (andrew.heale@analog.com)Revised by: Marek Pendrakowski (marek.pendrakowski@analog.com)Description: Radix-4 algorithm.******************************************************************************Target Processor: ADSP-TS201Target Tools Revision: easmts 1.6.1.11*****************************************************************************//***** Globals***/.global _fft_1024;/***** Code***/.section program;// void fft_1024(const cfract32 *input, cfract32 *output);// J4 Input, 1024 aligned// J5 Output// J6 fft_config. Actually a pointer to the twiddle table..align_code 4;_fft_1024: // Preserve R24..31 on the stack... Q[J27 += -4] = XR27:24; Q[K27 += -4] = YR27:24;; Q[J27 += -4] = XR31:28; Q[K27 += -4] = YR31:28;;// --------------------------------------------------------------------------//stage 1 begins here//-------------------// This loop reads the fft data from the input buffer in bit-reverse order,// performs the first stage radix-4 butterfly without scaling,// and writes to the output buffer.K14 = [J6+10];; // A value from twiddles to decide fft or ifftJ0 = J4 + 0; J1 = 512;;R31 = 0xFFFF0000;; // R31 = -j used as twiddle factorK14 = K14 - K31;;if nkle; do, SR31 = -R31; K6 = J5;; // Pointers used to save data into int_buffer R1:0 = BR Q[J0+=J1]; K7 = K6 + 256;; R5:4 = BR Q[J0+=J1]; K8 = K6 + 512;; R3:2 = BR Q[J0+=J1]; K9 = K6 + 768;; R7:6 = BR Q[J0+=J1]; SR1:0 = R1:0+R5:4, SR5:4 = R1:0-R5:4;; LC0 = 31; SR3:2 = R3:2+R7:6, SR7:6 = R3:2-R7:6;; R09:08 = BR Q[J0+=J1]; R1 = lshift R4 by 0; R4 = R1;; R13:12 = BR Q[J0+=J1]; MR1:0 += R6 ** R31 (CI);; R11:10 = BR Q[J0+=J1]; R6 = pass R3; R3 = MR1:0, MR1:0 += R7 ** R31 (CI);; R15:14 = BR Q[J0+=J1]; SR09:08 = R09:08+R13:12, SR13:12 = R09:08-R13:12; R7 = MR1:0, MR1:0 += R7 ** R31 (CI);; SR11:10 = R11:10+R15:14, SR15:14 = R11:10-R15:14;;.align_code 4;stage_1_loop: R1:0 = BR Q[J0+=J1]; SR17:16 = R1:0+R3:2, SR19:18 = R1:0-R3:2; R9 = lshift R12 by 0; R12 = R9;; R5:4 = BR Q[J0+=J1]; SR21:20 = R5:4+R7:6, SR23:22 = R5:4-R7:6; MR1:0 += R14 ** R31 (CI); Q[K6 += 4] = YR19:16;; R3:2 = BR Q[J0+=J1]; R14 = pass R11; R11 = MR1:0, MR1:0 += R15 ** R31 (CI); Q[K7 += 4] = XR19:16;; R7:6 = BR Q[J0+=J1]; SR1:0 = R1:0+R5:4, SR5:4 = R1:0-R5:4; R15 = MR1:0, MR1:0 += R15 ** R31 (CI); Q[K8 += 4] = YR23:20;; SR3:2 = R3:2+R7:6, SR7:6 = R3:2-R7:6; Q[K9 += 4] = XR23:20;; R09:08 = BR Q[J0+=J1]; SR17:16 = R09:08+R11:10, SR19:18 = R09:08-R11:10; R1 = lshift R4 by 0; R4 = R1;; R13:12 = BR Q[J0+=J1]; SR21:20 = R13:12+R15:14, SR23:22 = R13:12-R15:14; MR1:0 += R6 ** R31 (CI); Q[K6 += 4] = YR19:16;; R11:10 = BR Q[J0+=J1]; R6 = pass R3; R3 = MR1:0, MR1:0 += R7 ** R31 (CI); Q[K7 += 4] = XR19:16;; R15:14 = BR Q[J0+=J1]; SR09:08 = R09:08+R13:12, SR13:12 = R09:08-R13:12; R7 = MR1:0, MR1:0 += R7 ** R31 (CI); Q[K8 += 4] = YR23:20;;.align_code 4;if nlc0e, jump stage_1_loop; SR11:10 = R11:10+R15:14, SR15:14 = R11:10-R15:14; Q[K9 += 4] = XR23:20;; SR17:16 = R1:0+R3:2, SR19:18 = R1:0-R3:2; R9 = lshift R12 by 0; R12 = R9;; SR21:20 = R5:4+R7:6, SR23:22 = R5:4-R7:6; MR1:0 += R14 ** R31 (CI); Q[K6 += 4] = YR19:16;; R14 = pass R11; R11 = MR1:0, MR1:0 += R15 ** R31 (CI); Q[K7 += 4] = XR19:16;; R15 = MR1:0, MR1:0 += R15 ** R31 (CI); Q[K8 += 4] = YR23:20;; Q[K9 += 4] = XR23:20;; SR17:16 = R09:08+R11:10, SR19:18 = R09:08-R11:10;; Q[K6 += 4] = YR19:16;; SR21:20 = R13:12+R15:14, SR23:22 = R13:12-R15:14; Q[K7 += 4] = XR19:16;; Q[K8 += 4] = YR23:20;; Q[K9 += 4] = XR23:20;;// --------------------------------------------------------------------------// All stages 2 to 5 use the same code design: stages 3 to 5 run in a loop// but stage 2 is optimised separately. Each stage has an inner loop that// processes four radix-4 butterflies in parallel.//// Processing each butterfly does// - read data (a,b,c,d) and twiddles (f,g,h) (e is constant)// - multiply by twiddles, which applies scaling, giving (k,l,m,n)// - sum/diff for first butterfly (o,p,q,r)// - permute to get rotation of r by -j (s)// - sum/diff for second butterfly (t,u,v,w)// - write data back to same buffer//// a * e = k o t// \/ \ /// b * f = l /\ p \/ u// \/\/// c * g = m q /\/\ v// \/ /\// d * h = n /\r->s/ \ w//// The operations are arranged in a loop, with data passing through the loop// three times to complete its processing.// |Regs 11111222223// -- First pass ---------------|-- Second pass -----------|-- Third pass --|-0246802468024680// 1 read d, read h | k = a*e (1), q:r = m+:-n | | a De Q LHRtuvw// 2 read c, read g | k = a*e (2) | | Cde GqKlhrtuvw// 3 read b, read f, n = d*h (1) | k = ... (3) | | BcdeFgqKlhrtuvw// 4 read a, n = d*h (2) | s = perm(r) | D2 = D1 | AbcNefgqklSStuvw
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -