📄 imdct12.asm
字号:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs ( Micro Signal Architecture 1.0 specification).
By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software.
********************************************************************************
Module Name : imdct12.asm
Label name : __imdct12
Version : 1.3
Change History :
Version Date Author Comments
1.3 11/18/2002 Swarnalatha Tested with VDSP++ 3.0
compiler 6.2.2 on
ADSP-21535 Rev.0.2
1.2 11/13/2002 Swarnalatha Tested with VDSP++ 3.0
on ADSP-21535 Rev. 0.2
1.1 01/22/2002 Vijay Modified to match silicon
cycle count
1.0 06/26/2001 Vijay Original
Description : This asm routine is a 16-bit implementation of the 12-point
inverse modified discrete cosine transform (IMDCT) which is
used in the MPEG Layer III audio codec. The equation of the
IMDCT for a sequence X of length N/2 is given below :
y(n) = (2/N)*sum 0 to N/2-1 { X(k)cos(pi*(2k + 1)*(2n + 1
+ N/2)/2N)
k = 0 to N/2-1
n = 0 to N-1
Note : The MDCT exhibits antisymmetry. So the first six MDCT
values are enough to compute the 12 point IMDCT
The algorithm used to implement the IMDCT is given as a
pseudocode below :
First stage computations :
X1 = sqrt(2)*[X(0) X(2) X(4) -X(5) -X(3) -X(1)];
y1(0) = X1(0); y2(0) = X1(3);
y1(1) = X1(5) - X1(1);y2(1) = -(X1(2) + X1(4));
y1(2) = X1(2) - X1(4);y2(2) = X1(5) + X1(1);
3 Point IDCT :
a(2) = y1(0) + y1(1)*sqrt(3)/2 + y1(2)/2;
a(1) = y1(0) - y1(2);
a(0) = y1(0) - y1(1)*sqrt(3)/2 + y1(2)/2;
3 Point IDST :
b(0) = y2(0) + y2(1)*sqrt(3)/2 + y2(2)/2;
b(1) = -(y2(0) - y2(2));
b(2) = y2(0) - y2(1)*sqrt(3)/2 + y2(2)/2;
Twiddle factor multiplications :
W1 = cos(pi/24*(5:-2:1));
W2 = sin(pi/24*(5:-2:1));
c(0) = a(0)*W1(0) + b(2)*W2(0);
c(1) = a(1)*W1(1) + b(1)*W2(1);
c(2) = a(2)*W1(2) + b(0)*W2(2);
d(2) = -a(0)*W2(0) + b(2)*W1(0);
d(1) = -a(1)*W2(1) + b(1)*W1(1);
d(0) = -a(2)*W2(2) + b(0)*W1(2);
Last stage :
x(0) = d(0) + c(2);
x(11) = d(0) - c(2);
x(1) = d(1) + c(1);
x(10) = d(1) - c(1);
x(2) = d(2) + c(0);
x(9) = d(2) - c(0);
x(3) = -(d(2) + c(0));
x(8) = d(2) - c(0);
x(4) = -(d(1) + c(1));
x(7) = d(1) - c(1);
x(5) = -(d(0) + c(2));
x(6) = d(0) - c(2);
x = (x/12)
Prototype : void _imdct12(fract16 *input, fract16 *output);
Assumptions : The input buffer is assumed to be aligned to a 4 byte boundary
The length of the input and output buffers are always 6 and
12, respectively
Registers Used : A0, A1, R0-R3, I0, I1, L0, L1.
Performance :
Code size : 228 bytes
Cycle count : 52 cycles
*******************************************************************************/
.section L1_code;
.global __imdct12;
.align 8;
__imdct12:
L0 = 0;
I0 = R0; //Input pointer
L1 = 0;
I1 = R1; //Output pointer
/************************* FIRST STAGE *************************************/
R3.L = 0x3C56; // sqrt(2)/3
R3.H = 0xC3AA; // -sqrt(2)/3
A1 = A0 = 0 || R0 = [I0++];
// Fetch the first two data
R0.H = R0.H * R3.H, R0.L = R0.L * R3.L || R1 = [I0++];
// Scale by +/- sqrt(2)/3 and fetch the next data
R1.H = R1.H * R3.H, R1.L = R1.L * R3.L || R2 = [I0++];
// Scale by +/- sqrt(2)/3 and fetch the next data
R2.H = R2.H * R3.H, R2.L = R2.L * R3.L;
// Scale by +/-sqrt(2)/3
R3 = PACK(R2.H, R0.L);
R3 = R3 >>> 1 (V);
//y1(0)=X1(0),y2(0)=X1(3) & divide by 2
R0 = PACK(R2.L, R0.H);
R0 = R0 +|+ R1, R1 = R0 -|- R1 (ASR);
//y1(1)=X1(5)-X1(1),y2(1)=X1(2)+X1(4),
//y1(2)=X1(2)-X1(4),y2(2)=X1(5)+X1(1) & divide by 2
R2 = PACK(R0.H, R1.L);
R1 = PACK(R0.L, R1.H);
/********************************* 3 POINT IDCT & IDST ************************/
R0.L = 0x6ED9; //sqrt(3)/2
R2.H = (A1 -= R2.H*R0.L), R2.L = (A0 = R2.L*R0.L);
//y2(1)*sqrt(3)/2, y2(2)*sqrt(3)/2
R0 = R1 >>> 1 (V); //y3(1)/2, y3(1)/2
R1 = R3 -|- R1 (S); //-b(1), a(1)
R2 = R2 +|+ R0, R0 = R2 -|- R0 (S);
R2 = R3 +|+ R2 (S); //b(0), a(2)
R0 = R3 -|- R0 (S); //b(2), a(0)
/************************ TWIDDLE FACTOR MULTIPLICATION ***********************/
R3.L = 0x32C6; //Cos(5*pi/24)/2
R3.H = 0x26F6; //Sin(5*pi/24)/2
A1 = R0.H * R3.L, A0 = R0.H * R3.H;
R0.H = (A1 -= R0.L * R3.H), R0.L = (A0 += R0.L * R3.L);
//d(0), c(2)
R3.L = 0x3B21; //Cos(3*pi/24)/2
R3.H = 0x187E; //Sin(3*pi/24)/2
A1 = R1.H * R3.L, A0 = R1.L * R3.L;
R1.H = (A1 += R1.L * R3.H), R1.L = (A0 -= R1.H * R3.H);
// -d(1), c(1)
R3.L = 0x3F74; // Cos(pi/24)/2
R3.H = 0x85A; // Sin(pi/24)/2
A1 = R2.H * R3.L, A0 = R2.H * R3.H;
R2.H = (A1 -= R2.L * R3.H), R2.L = (A0 += R2.L * R3.L);
// d(2), c(0)
/***************************** LAST STAGE *************************************/
R3.H = R2.L + R2.H (S); // x(0) = d(0) + c(2)
R3.L = R1.L - R1.H (S) || W[I1++] = R3.H;
// x(1) = d(1) + c(1)
R3.L = R0.L + R0.H (S) || W[I1++] = R3.L;
// x(2) = d(2) + c(0)
R3 = -R3 (V) || W[I1++] = R3.L;
// x(3) = -(d(2) + c(0))
R3.L = R1.H - R1.L (S) || W[I1++] = R3.L;
// x(4) = -(d(1) + c(1))
R1 = -R1 (V) || W[I1++] = R3.L;
// x(5) = -(d(0) + c(2))
R2.L = R2.H - R2.L (S) || W[I1++] = R3.H;
// x(6) = d(0) - c(2)
R1.L = R1.H + R1.L (S) || W[I1++] = R2.L;
// x(7) = d(1) - c(1)
R0.L = R0.H - R0.L (S) || W[I1++] = R1.L;
// x(8) = d(2) - c(0)
W[I1++] = R0.L;
// x(9) = d(2) - c(0)
W[I1++] = R0.L;
// x(10) = d(1) - c(1)
W[I1++] = R1.L;
// x(11) = d(0) - c(2)
W[I1++] = R2.L;
RTS;
NOP; //to avoid one stall if LINK or UNLINK happens to be
//the next instruction after RTS in the memory.
__imdct12.end:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -