📄 cfftrad2x2non_scaled.asm
字号:
// instant)
MIDDLE_STAGE:
P1 = P1 >> 1;
CC = P1 <= 2;
IF CC JUMP LAST_STAGE;
R0 = B2;
R0 = R0 << 2 || R1 = [SP + 60];
M0 = R0; // Offset for the I row Butterfly inputs (4*2^stage)
R2 = R0.L * R1.L (IS) || [SP + 8] = P1;
M1 = R2; // Offset for the II row Butterfly inputs
// (4*N*2^stage)
R2 <<= 1;
B0 = R2; // Offset in bytes for the first element of every
// group
R2 = R0 << 1;
R1 = R1 << 2 || [SP + 4] = R2;
// 4*(2*2^stage)
R2 = R1 - R0; // Offset to bring the butterfly pointer to the
// start ...
M2 = R2; // of the next row 4*(N - 2^stage)
R2 += -4;
M3 = R2;
P2 = P1 >> 1; // group count (N/2*2^stage)
P0 = P1 << 1; // Twiddle factor offset 4*(N/2*2^stage)
R2 = P2; // group count
R3 = R3 - R3 (NS) || [SP + 16] = R2;
P1 = B2;
REP_GROUP_1:
CC = R2 <= R3; // Outer group loop
IF CC JUMP GROUP1_END;
P3 = -1;
REP_GROUP_2:
P3 += 1; // Inner group loop
CC = P2 <= P3;
IF CC JUMP GROUP2_END;
R0 = B0;
R0 = R0.L*R3.L (IS) || R1 = [SP + 4];
R4 = P3;
R1 = R1.L * R4.L (IS) || P4 = [SP];
R0 = R0 + R1 (NS) || [SP + 12] = P3;
R1 = B3;
R0 = R0 + R1;
I0 = R0;
I1 = R0;
I3 = R0;
LSETUP(ST_BFLY_OUT, END_BFLY_OUT) LC0 = P1;
// Loop count - 2^stage
I2 = I0;
R0 = [I1 ++ M1] || I2 -= M2;
I3 += M1;
I3 -= M2 || NOP;
ST_BFLY_OUT:
P3 = P4;
R6 = [I1 ++ M0] || R0 = [P4 ++ P0];
R5 = R5 << 0 || R7 = [I1++] || P5 = [SP];
A1 = R6.L * R0.H,A0 = R6.L * R0.L || R4 = [I0 ++ M0] || R2 = [P3 ++ P0];
// Twiddle factor mult. for the ...
R6.H = (A1 += R6.H * R0.L), R6.L = (A0 -= R6.H * R0.H) || R5 = [I0++]
|| I1 -= M0;
// second and third butterfly inputs
A1 = R7.L * R2.H, A0 = R7.L * R2.L || I0 -= M0 || R1 = [I3 ++ M2];
// Dummy fetch to increment output pointer
R7.H =(A1 += R7.H * R2.L), R7.L = (A0 -= R7.H * R2.H) || R1 = [P5 ++ P0]
|| R2 = [I2 ++ M2];
// Dummy fetch to increment the twiddle factor
// pointer
LSETUP(ST_BFLY_IN, END_BFLY_IN) LC1 = P1;
// Loop count - 2^stage
ST_BFLY_IN:
R4 = R4 +|+ R5, R5 = R4 -|- R5 (ASR);
// Compute butterfly outputs
R6 = R6 +|+ R7, R7 = R6 -|- R7 (ASR) || NOP;
R4 = R4 +|+ R6, R2 = R4 -|- R6 (ASR) || R6 = [I1 ++ M0]
|| R1 = [P5 ++ P0];
R5 = R5 +|+ R7, R7 = R5 -|- R7 (ASR) || [I3 ++ M0] = R2
|| R2 = [P3 ++ P0];
A1 = R6.L * R0.H, A0 = R6.L * R0.L || [I3++] = R7 || I2 += M0;
R6.H = (A1 += R6.H * R0.L), R6.L = (A0 -= R6.H * R0.H)
|| R7 = [I1++] || I0 += M0;
A1 = R7.L * R2.H, A0 = R7.L * R2.L || [I2] = R5 || I2 -= M0;
R7.H = (A1 += R7.H * R2.L), R7.L = (A0 -= R7.H * R2.H) || R5 = [I0]
|| I0 -= M0;
A1 = R5.L * R1.H, A0 = R5.L * R1.L || [I2++] = R4 || I3 -= M0;
// Twiddle factor mult. for the ...
END_BFLY_IN:
R5.H = (A1 += R5.H * R1.L), R5.L = (A0 -= R5.H * R1.H)
|| R4 = [I0++] || I1 -= M0;
// second butterfly input
END_BFLY_OUT:
I1 += M3 || R1 = [I0 ++ M3];
P3 = [SP + 12];
JUMP REP_GROUP_2;
GROUP2_END:
R2 = [SP + 16];
R3 += 1;
JUMP REP_GROUP_1;
GROUP1_END:
R0 = B2;
R0 = R0 << 1 || P1 = [SP + 8];
B2 = R0;
JUMP MIDDLE_STAGE;
/*********************** LAST STAGE ********************************************
This section does the butterfly computation for the last stage of the FFT.
The reason for doing this stage separately rather than with all other stages is
that the output should not be scaled here unlike the middle stages
*******************************************************************************/
LAST_STAGE:
I0 = B3; // I0 and I1 are used for reading the input ...
I1 = B3; // to the butterfly
I3 = B3; // I2 and I3 are used for writing the output
R0 = B2;
R0 = R0 << 2 || R1 = [SP + 60];
M0 = R0; // 4*(2^stage)
R2 = R0.L * R1.L (IS) || P4 = [SP];
M1 = R2; // 4*N*2^stage
P0 = P1 << 1; // Twiddle factor offset
P1 = B2; // Loop count
R1 <<= 2;
R2 = R1 - R0 (NS) || I1 += M1;
M2 = R2; // 4*(N-2^stage)
R2 += -4;
M3 = R2;
I2 = I0;
I2 -= M2 || R0 = [I3 ++ M1];
I3 -= M2 || NOP;
LSETUP(ST_LAST_STAGE_OUT, END_LAST_STAGE_OUT) LC0 = P1;
// Loop count - 2^stage
ST_LAST_STAGE_OUT:
P3 = P4;
R6 = [I1 ++ M0] || R0 = [P4 ++ P0];
R5 = R5 << 0 || R7 = [I1++] || P5 = [SP];
A1 = R6.L * R0.H, A0 = R6.L * R0.L || R4 = [I0 ++ M0]
|| R2 = [P3 ++ P0]; // Twiddle factor mult. for the ...
R6.H = (A1 += R6.H * R0.L), R6.L = (A0 -= R6.H * R0.H) || R5 = [I0++]
|| I1 -= M0; // second and third butterfly inputs
A1 = R7.L * R2.H, A0 = R7.L * R2.L || I0 -= M0 || R1 = [I3 ++ M2];
// Dummy fetch to increment output pointer
R7.H =(A1 += R7.H * R2.L), R7.L = (A0 -= R7.H * R2.H) || R1 = [P5 ++ P0]
|| R2 = [I2 ++ M2]; // Dummy fetch to increment the twiddle factor
// pointer
LSETUP(ST_LAST_STAGE_IN, END_LAST_STAGE_IN) LC1 = P1;
// Loop count - 2^stage
ST_LAST_STAGE_IN:
R4 = R4 +|+ R5, R5 = R4 -|- R5 (S);
// Compute butterfly outputs
R6 = R6 +|+ R7, R7 = R6 -|- R7 (S) || NOP;
R4 = R4 +|+ R6, R2 = R4 -|- R6 (S) || R6 = [I1 ++ M0]
|| R1 = [P5 ++ P0];
R5 = R5 +|+ R7, R7 = R5 -|- R7 (S) || [I3 ++ M0] = R2
|| R2 = [P3 ++ P0];
A1 = R6.L * R0.H, A0 = R6.L * R0.L || [I3++] = R7 || I2 += M0;
R6.H = (A1 += R6.H * R0.L), R6.L = (A0 -= R6.H * R0.H)
|| R7 = [I1++] || I0 += M0;
A1 = R7.L * R2.H, A0 = R7.L * R2.L || [I2] = R5 || I2 -= M0;
R7.H = (A1 += R7.H * R2.L), R7.L = (A0 -= R7.H * R2.H) || R5 = [I0]
|| I0 -= M0;
A1 = R5.L * R1.H, A0 = R5.L * R1.L || [I2++] = R4 || I3 -= M0;
// Twiddle factor mult. for the ...
END_LAST_STAGE_IN:
R5.H = (A1 += R5.H * R1.L), R5.L = (A0 -= R5.H * R1.H)
|| R4 = [I0++] || I1 -= M0;
// second butterfly input
END_LAST_STAGE_OUT:
I1 += M3 || R1 = [I0 ++ M3];
FINISH:
SP += 20;
(R7:4,P5:3) = [SP++]; // Retrieve Call preserved registers
RTS;
NOP; //to avoid one stall if LINK or UNLINK happens to be
//the next instruction after RTS in the memory.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -