📄 cr2fftn_outplace.asm
字号:
I0 += M0 (BREV) || R2 = [I0];
//R2 reads the data
I0 += M0 (BREV)|| R3 = [I0];
I0 += M0 (BREV)|| R4 = [I0];
CC = P0 <= 0 (IU);
If CC Jump ESC_12;
lsetup(Stage12_strt, Stage12_end) LC0 = P0;
Stage12_strt:
R2 = R2 +|+ R3, R3 = R2 -|- R3 (ASR) || I0 += M0 (BREV) || R5 = [I0];
R4 = R4 +|+ R5, R5 = R4 -|- R5 (ASR, CO)|| I0 += M0 (BREV) || R0 = [I0];
R2 = R2 +|+ R4, R4 = R2 -|- R4 (ASR) || I0 += M0 (BREV) || R1 = [I0];
R5 = R3 +|- R5, R3 = R3 -|+ R5 (ASR) || I0 += M0 (BREV) || R6 = [I0];
R0 = R0 +|+ R1, R1 = R0 -|- R1 (ASR) || R7 = [I0] || [I2++] = R2;
R6 = R6 +|+ R7, R7 = R6 -|- R7 (ASR, CO)|| I0 += M0 (BREV)
|| [I2++] = R3;
R0 = R0 +|+ R6, R6 = R0 -|- R6 (ASR) || R2 = [I0] || [I2++] = R4;
R7 = R1 +|- R7, R1 = R1 -|+ R7 (ASR) || I0 += M0 (BREV) || [I2++] = R5;
R3 = [I0] || [I2++] = R0;
I0 += M0 (BREV) || [I2++] = R1;
R4 = [I0] || [I2++] = R6;
Stage12_end:
I0 += M0 (BREV) || [I2++] = R7;
ESC_12:
R2 = R2 +|+ R3, R3 = R2 -|- R3 (ASR) || I0 += M0 (BREV)|| R5 = [I0];
R4 = R4 +|+ R5, R5 = R4 -|- R5 (ASR, CO) || I0 += M0 (BREV)|| R0 = [I0];
R2 = R2 +|+ R4, R4 = R2 -|- R4 (ASR) || I0 += M0 (BREV)|| R1 = [I0];
R5 = R3 +|- R5, R3 = R3 -|+ R5 (ASR) || I0 += M0 (BREV)|| R6 = [I0];
R0 = R0 +|+ R1, R1 = R0 -|- R1 (ASR) || [I2++] = R2 || R7 = [I0];
R6 = R6 +|+ R7, R7 = R6 -|- R7 (ASR, CO) || [I2++] = R3;
R0 = R0 +|+ R6, R6 = R0 -|- R6 (ASR) || [I2++] = R4;
R7 = R1 +|- R7, R1 = R1 -|+ R7 (ASR) || [I2++] = R5;
[I2++] = R0;
[I2++] = R1;
[I2++] = R6;
[I2++] = R7;
/*
* Here the register values are modified for the execution of middle stage.
This stage is escaped through Esc_mid label if the input size is equal to 8.
*/
R1 = P1; //R1 = wst.
R1 = R1 << 2; //R1 = wst * 4
P3 = 4; //P3 holds the number of lines in each butterfly at
// stage 3.
R7 = P5;
R7.L = R1.L * R7.L (IS);//R7 = wst * 4 * twiddle offset
R2 = P4;
R3 = 8;
CC = R2 == R3; //If input array size is equal to 8, then go to last
// stage, because
If CC Jump Esc_mid; //middle stages does n't occur.
R0 = 0; //Counter for number of stages.
Find_m: //The computation of number of stages is done here.
R2 >>= 1;
R0 += 1;
CC = R2 == R3;
If !CC Jump Find_m (BP);//R0 holds the value of m-3 and is never free
/************************* Implementation of Middle Part **********************/
/*
* First of all, a loop for the number of stages - 3 is set. It is a general
implementation of butterfly computation. The first nested loop is set for half
of the number of butterflies at each stage. The second nested loop is set for
the number of lines in each butterfly. The computation is done on the output
buffer. The output is stored after dividing by 2 for scaling purpose. In one
loop two butterfly data are read and processed.
* The input is read from output buffer and after the computation at this stage
the output is written back to output buffer.
*/
//B2 Input, B0 output.
Loopfor_m:
I0 = B0;
I2 = B2;
I3 = B2; //Address of output array.
P0 = P3 << 2;
M2 = P0; //M2 holds the offset of counterpart line.
P0 += -4;
M0 = P0;
P5 = P5 >> 1;
R7 = R7 >>> 1 || I3 += M2;
M1 = R7;
P3 += -1;
lsetup(Loop1_strt, Loop1_end) LC0 = P5;
//Loop is set for number of the butterfly
Loop1_strt:
I1 = B3; //Address of twiddle factor.
R2 = [I2++];
R3 = [I1++M1] || R4 = [I3++];
lsetup(Loop2_strt, Loop2_end) LC1 = P3;
//Loop is set for the number of lines
Loop2_strt:
R5 = R2 +|+ R4, R6 = R2 -|- R4 (ASR) || R3 = [I1++M1]
|| R4 = [I3++];
A1 = R3.L * R4.H, A0 = R3.L * R4.L || [I0++M2] = R5 || R2 = [I2++];
Loop2_end: R4.H = (A1 += R3.H * R4.L), R4.L = ( A0 -= R3.H * R4.H)
|| I0 -= M0 || [I0] = R6;
R5 = R2 +|+ R4, R6 = R2 -|- R4 (ASR) || I2 += M2;
I3 += M2 || [I0++M2] = R5;
Loop1_end:
[I0++] = R6;
P3 += 1;
P3 = P3 << 1;
R0 += -1;
B1 = B0;
B0 = B2;
B2 = B1;
CC = R0 == 0;
If !CC Jump Loopfor_m (BP);
//Loop for m.
/************************* Implementation of Last Part *********************/
/*
* This part implements the last stage of the butterfly. The label Esc_mid is
used when the size of input data is 8. In this case the computation of middle
stages have to be escaped. The increment in the twiddle factor offset is just
1. In the last stage there is only one butterfly. The loop is set for n/4. 4
data are read and processed at the same time.
*
* The input is read from the output buffer and after the computation, the final
output is written to output buffer.
*/
//B2 Input , B0 Output.
Esc_mid:
I0 = B0;
I2 = B2;
I3 = B2; //Address of output array.
P0 = P3 << 2;
M2 = P0; //M2 holds the offset of counterpart line.
P0 += -4;
M0 = P0;
R7 = R7 >>> 1 || I3 += M2;
M1 = R7;
P3 += -1;
I1 = B3; //Address of twiddle factor.
R2 = [I2++];
R3 = [I1++M1] || R4 = [I3++];
lsetup(Last_strt, Last_end) LC1 = P3;
//Loop is set for the number of lines
Last_strt:
R5 = R2 +|+ R4, R6 = R2 -|- R4 (ASR) || R3 = [I1++M1] || R4 = [I3++];
A1 = R3.L * R4.H, A0 = R3.L * R4.L|| [I0++M2] = R5 || R2 = [I2++];
Last_end:
R4.H = (A1 += R3.H * R4.L), R4.L = ( A0 -= R3.H * R4.H) || I0 -= M0
|| [I0] = R6;
R5 = R2 +|+ R4, R6 = R2 -|- R4 (ASR);
[I0++M2] = R5;
[I0] = R6;
R0 = B0;
R1 = M3;
CC = R0 == R1;
If CC Jump Terminate;
I0 = B0;
I1 = M3;
R0 = [I0++];
lsetup(Copy_strt, Copy_strt) LC0 = P4;
Copy_strt:
[I1++] = R0 || R0 = [I0++];
Terminate:
(R7:4, P5:3) = [SP++]; //Pop the registers before returning.
RTS; //Return.
NOP; // If link or unlink happens to be the next
// instruction after RTS in memory, RTS takes one
// extra cycle than expected. NOP is put to avoid
// this.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -