📄 r8x8invdct_ieee.asm
字号:
R7 = (A1 -= R7.H * R6.L), R6 = (A0 -= R7.L * R6.L) (IS) || I2 += M1;
/*
* The following 8 instructions do the Stage 1 computation and writes the data
to the buffer in transposed way.
* Mean while it reads the input data for next iteration.
* The following operations are performed.
*
* Element 0 = Element 0 + Element 7.
* Element 7 = Element 7 - Element 0.
* Element 2 = Element 2 + Element 5.
* Element 5 = Element 2 - Element 5.
* Element 1 = Element 6 - Element 1.
* Element 6 = Element 6 + Element 1.
* Element 3 = Element 3 - Element 4.
* Element 4 = Element 3 + Element 4.
*/
R5.H = R4 + R2 (RND12);
// Compute 0
R5.L = R4 - R2 (RND12) || R4 = [SP + 36];
// Compute 7
R2.H = R1 + R7 (RND12) || W[P0++P3] = R5.H;
// Compute 2, Write 0
R2.L = R1 - R7 (RND12) || W[P1++P4] = R5.L || R7 = [I3++];
// Compute 5, Write 7
R5.H = R0 - R3 (RND12) || W[P0++P3] = R2.H || R1.L = W[I1++];
// Compute 1, Write 2
R5.L = R0 + R3 (RND12) || W[P1++P4] = R2.L || R0.H = W[I0++];
// Compute 6, Write 5
R3.H = R4 - R6 (RND12) || W[P0++P3] = R5.H || R0.L = W[I2++];
// Compute 3, Write 1
R3.L = R4 + R6 (RND12) || W[P1++P4] = R5.L || R1.H = W[I0++];
// Compute 4, Write 6
/*
* This is the same operation as that of initial two operation before the loop.
* The element 3 and 4 are written
*/
A1 = R7.H * R0.H, A0 = R7.H * R0.H (IS) || W[P0++P2] = R3.H
|| R1.H = W[I0--];
ROW1_END:
R3 = (A1 += R7.H * R0.L), R2 = ( A0 -= R7.H * R0.L) (IS)
|| W[P1++P5] = R3.L || R7 = [I3++];
I0 = B2; // I0 points to Input Element (0, 0)
I2 = B2; // I2 points to Input Element (0, 0)
I2 += M3 || R0.H = W[I0];
// Element 0 is read in R0.H
I1 = I2; // I1 points to input Element (0, 6)
I1 += 4 || R0.L = W[I2++];
// I2 points to input Element (0, 4)
// Element 4 is read in R0.L
P2 = 8 (X);
I3 = B3; // I3 points to Coefficients
P0 = B0; // P0 points to array Element (0, 0) for writing
// output
P1 = B0;
R7 = [I3++]; // R7.H = C4 and R7.L = C4
NOP;
/*
* The following operation is done in 2 instructions.
* A1 = Element 0 * cos(pi/4)
* A0 = Element 0 * cos(pi/4)
* A1 = A1 + Element 4 * cos(pi/4)
* A0 = A0 - Element 4 * cos(pi/4)
* At the same time the value of Element 2 and 6 are read in R1.H and R1.L
respectively.
* The coefficient C2 and C6 are read in R7.H and R7.L
* In the end R3 holds 0th and R2 holds 4th Element.
*/
A1 = R7.H * R0.H, A0 = R7.H * R0.H (IS) || I0 += 4 || R1.L = W[I1++];
R3 = (A1 += R7.H * R0.L), R2 = ( A0 -= R7.H * R0.L) (IS) || R1.H = W[I0--]
|| R7 = [I3++];
LSETUP (ROW2_START, ROW2_END) LC0 = P2;
//Loop for 8 rows
P2 = 112 (X);
P1 = P1 + P2;
P2 = -94(X);
ROW2_START:
/*
* The following two instructions do -
* A1 = Element 2 * cos(3pi/8)
* A0 = Element 2 * cos(pi/8)
* A1 = A1 - Element 6 * cos(pi/8)
* A0 = A0 + Element 6 * cos(3pi/8)
* Element 1 and 7 are read in R5.H and R5.L.
* Coefficients C1 and C7 are in register R7.H and R7.L respectively.
* In the end R1 holds 2nd and R0 hols 6th element.
*/
A1= R7.L * R1.H, A0 = R7.H * R1.H (IS) || I0 += 4 || R5.H = W[I0];
R1 = (A1 -= R7.H * R1.L) , R0 = (A0 += R7.L * R1.L) (IS)
|| R5.L = W[I1--] || R7 = [I3++];
/*
* The following two instructions do -
* Element 0 = Element 0 + Element 6.
* Element 4 = Element 4 + Element 2.
* Element 2 = Element 4 - Element 2.
* Element 6 = Element 0 - Element 6.
* The register R3 is saved to make it free. Element 3 is read in R6.L
* At this stage Element 0 is in R3, 4 is in R2, 2 is in R1 and 6 is in R0
*/
R3 = R3 + R0, R0 = R3 - R0;
R2 = R2 + R1, R1 = R2 - R1 || [SP + 32] = R3 || R6.L = W[I0--];
/*
* In the following 8 instructions the Stage 4, 3 and 2 computation of butterfly
* for elements 1, 5, 3 and 7 has been combined.
* R5.H and R5.L has data 1 and 7 respectively.
* R6.H and R6.L has data 5 and 3 respectively.
* For the first two instructions R7.H has C1 and R7.L has C7.
* For the next four instructions R7.H has C3 and R7.L has C5.
* For the last two instructions R7.H has C1 and R7.L has C7 again.
* After combining the stage 4, 3, and 2 the final four equations are
* obtained. These give the output of Stage 2 straight way.
*
* Element 1 = C7 * Element1 - C1 * Element 7 + C3 * Element 5 - C5 * Element 3.
* Element 7 = C1 * Element1 + C7 * Element 7 + C5 * Element 5 + C3 * Element 3.
* Element 5 = C5 * Element1 + C3 * Element 7 + C7 * Element 5 - C1 * Element 3.
* Element 3 = C3 * Element1 - C5 * Element 7 - C1 * Element 5 - C7 * Element 3.
*
* The first 4 instructions implement the first two equations. The next four
* instructions implement last two equations.
* In the last the Element 1 is in R3, 7 in R2, 5 in R7 and 3 in R6.
* Mean while the address offsets are modified.
*/
A1 = R7.L * R5.H, A0 = R7.H * R5.H (IS) || [SP + 36] = R2
|| R6.H = W[I2--];
A1 -= R7.H * R5.L, A0 += R7.L * R5.L (IS) || I0 -= 4 || R7 = [I3++];
A1 += R7.H * R6.H, A0 += R7.L * R6.H (IS) || I0 += M1;
R3 = (A1 -= R7.L * R6.L), R2 = (A0 += R7.H * R6.L) (IS);
A1 = R7.L * R5.H, A0 = R7.H * R5.H (IS) || R4 = [SP + 32];
A1 += R7.H * R5.L, A0 -= R7.L * R5.L (IS) || I1 += M1 || R7 = [I3++];
A1 += R7.L * R6.H, A0 -= R7.H * R6.H (IS);
R7 = (A1 -= R7.H * R6.L), R6 = (A0 -= R7.L * R6.L) (IS) || I2 += M1;
/*
* The following 8 instructions do the Stage 1 computation and writes the data
to the
* buffer in transposed way.
* Mean while it reads the input data for next iteration.
* The following operations are performed.
*
* Element 0 = Element 0 + Element 7.
* Element 7 = Element 7 - Element 0.
* Element 2 = Element 2 + Element 5.
* Element 5 = Element 2 - Element 5.
* Element 1 = Element 6 - Element 1.
* Element 6 = Element 6 + Element 1.
* Element 3 = Element 3 - Element 4.
* Element 4 = Element 3 + Element 4.
*/
R5.H = R4 + R2 (RND20);
// Compute 0
R5.L = R4 - R2 (RND20) || R4 = [SP + 36];
// Compute 7
R2.H = R1 + R7 (RND20) || W[P0++P3] = R5.H;
// Compute 2, Write 0
R2.L = R1 - R7 (RND20) || W[P1++P4] = R5.L || R7 = [I3++];
// Compute 5, Write 7
R5.H = R0 - R3 (RND20) || W[P0++P3] = R2.H || R1.L = W[I1++];
// Compute 1, Write 2
R5.L = R0 + R3 (RND20) || W[P1++P4] = R2.L || R0.H = W[I0++];
// Compute 6, Write 5
R3.H = R4 - R6 (RND20) || W[P0++P3] = R5.H || R0.L = W[I2++];
// Compute 3, Write 1
R3.L = R4 + R6 (RND20) || W[P1++P4] = R5.L || R1.H = W[I0++];
// Compute 4, Write 6
/*
* This is the same operation as that of initial two operation before the loop.
* The element 3 and 4 are written
*/
A1 = R7.H * R0.H, A0 = R7.H * R0.H (IS) || W[P0++P2] = R3.H
|| R1.H = W[I0--];
ROW2_END:
R3 = (A1 += R7.H * R0.L), R2 = ( A0 -= R7.H * R0.L) (IS)
|| W[P1++P5] = R3.L || R7 = [I3++];
TERMINATE:
(R7:4,P5:3)=[SP++]; //Pop the registers before returning.
RTS; //Return.
NOP; //to avoid one stall if LINK or UNLINK happens to be
//the next instruction after RTS in the memory.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -