📄 mdct12.asm
字号:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs ( Micro Signal Architecture 1.0 specification).
By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software.
********************************************************************************
Module Name : mdct12.asm
Label name : __mdct12
Version : 1.3
Change History :
Version Date Author Comments
1.3 11/18/2002 Swarnalatha Tested with VDSP++ 3.0
compiler 6.2.2 on
ADSP-21535 Rev.0.2
1.2 11/13/2002 Swarnalatha Tested with VDSP++ 3.0
on ADSP-21535 Rev. 0.2
1.1 01/22/2002 Vijay Modified to match
silicon cycle count
1.0 06/26/2001 Vijay Original
Description : This asm routine is a 16-bit implementation of the 12-point
modified discrete cosine transform (MDCT) which is used in
the MPEG Layer III audio codec. The equation of the MDCT for
a sequence x of length N is given below:
y(k) = sum 0 to N-1 { x(n)cos(pi*(2k + 1)*(2n + 1 + N/2)/2N)
n = 0 to N-1
k = 0 to N/2-1
Note : MDCT exhibits the antisymmetry property given by
y(N - k - 1) = -y(k), k = 0 to N/2 - 1
Algorithm : The algorithm used to implement MDCT is given as a pseudocode
below:
First stage computations:
c1(0) = x(8) + x(9) ;d1(2) = c1(0);
c1(1) = x(7) + x(10);d1(1) = c1(1);
c1(2) = x(6) + x(11);d1(0) = c1(2);
c2(0) = x(2) - x(3);d2(2) = c2(0);
c2(1) = x(1) - x(4);d2(1) = c2(1);
c2(2) = x(0) - x(5);d2(0) = c2(2);
c = c2 - c1;d = d2 + d1;
Twiddle factor multiplications:
W1 = cos(pi/24*(5:-2:1));
W2 = sin(pi/24*(5:-2:1));
a(2) = c(0)*W1(0) - d(2)*W2(0);
a(1) = c(1)*W1(1) - d(1)*W2(1);
a(0) = c(2)*W1(2) - d(0)*W2(2);
b(2) = c(0)*W2(0) + d(2)*W1(0);
b(1) = c(1)*W2(1) + d(1)*W1(1);
b(0) = c(2)*W2(2) + d(0)*W1(2);
3-Point DCT computation:
y1(0) = a(0) + a(1) + a(2);
y1(1) = (a(0) - a(2))*sqrt(3)/2;
y1(2) = (a(0) + a(2))/2 - a(1);
3-Point DST computation:
y2(0) = b(0) - b(1) + b(2);
y2(1) = (b(0) - b(2))*sqrt(3)/2;
y2(2) = (b(0) + b(2))/2 + b(1);
Last stage:
y(0) = y1(0);
y(2) = -(y1(1) - y2(2));
y(4) = y1(2) - y2(1);
y(6) = y2(0);
y(8) = -y1(2) - y2(1);
y(10) = -(-y1(1) - y2(2));
y(N - k - 1) = -y(k)
y = y/sqrt(2);
Prototype : void _mdct12(fract16 *input, fract16 *output);
Assumptions : The input buffer is assumed to be aligned to a 4 byte boundary
The length of the input and output buffers are always 12 and
6, respectively
The input data must have sufficient guard bits to avoid
overflow
Registers used : A0, A1, R0-R3, R7, I0, I1, M0, L0, L1.
Performance :
Code size : 208 bytes
Cycle count : 54 cycles
*******************************************************************************/
.section L1_code;
.align 8;
.global __mdct12;
__mdct12:
L0 = 0;
L1 = 0;
I0 = R0; // Read input pointer
I1 = R1; // Read output pointer
M0 = 10;
[--SP] = R7;
/************************ FIRST STAGE **********************************/
R0 = [I0++]; // Read x(0) & x(1)
R1 = [I0++]; // Read x(2) & x(3)
R2 = [I0++]; // Read x(4) & x(5)
R2 = PACK(R2.L,R2.H) || R3 = [I0++];
// Interchange x(4) & x(5), Read x(6) & x(7)
R0 = R0 -|- R2 (S)|| R2 = [I0++];
// x(1) - x(4), x(0) - x(5), Read x(8) & x(9)
R1 = R1 +|- R2, R7 = R1 -|+ R2 (S) || R2 = [I0++];
// Read x(10) & x(11)
R2 = PACK(R2.L,R2.H); // Interchange x(10) & x(11)
R2 = R2 +|+ R3 (S); // x(7) + x(10), x(6) + x(11)
R3 = PACK(R7.L, R1.L);
R7 = PACK(R7.H,R1.H);
R3 = R3 -|- R7 (S); // x {2-8-(3+9)}, x (2+8-(3-9)}
R2 = R0 +|+ R2, R0 = R0 -|- R2 (S);
// x {1-4-7-10}, x {1-4+7+10}, x {0-5-6-11},
//x{0-5+6+11}
/******************** TWIDDLE FACTOR MULTIPLICATION *************************/
R1.L = 0x658D; // Cos(5*pi/24)
R1.H = 0x4DEC; // Sin(5*pi/24)
A1 = R3.L*R1.H, A0 = R3.L*R1.L;
R3.H = (A1 += R3.H*R1.L), R3.L = (A0 -= R3.H*R1.H) (T);
R7.L = 0x7EE8; // Cos(pi/24)
R7.H = 0x10B5; // Sin(pi/24)
A1 = R0.L*R7.H, A0 = R0.L*R7.L;
R7.H = (A1 += R2.L*R7.L), R7.L = (A0 -= R2.L*R7.H) (T);
R1.L = 0x7642; // Cos(3*pi/24)
R1.H = 0x30FC; // Sin(3*pi/24)
A1 = R0.H*R1.H, A0 = R0.H*R1.L;
R1.H = (A1 += R2.H*R1.L), R1.L = (A0 -= R2.H*R1.H) (T);
/*********************** 3-POINT DCT & DST ********************************/
R2 = R7 +|+ R3, R0 = R7 -|- R3 (S);
// a(0) + a(2), a(0) - a(2), b(0) + b(2), b(0)- b(2)
R7.L = 0x6EDA; // sqrt(3)/2
R3.H = R0.H*R7.L, R3.L = R0.L*R7.L (T);
// y1(1)=(a(0)-a(2))*sqrt(3)/2,
//y2(1)=(b(0)-b(2))*sqrt(3)/2
R7 = -R1 (V);
R7 = PACK(R7.H,R1.L);
R1 = R2 +|+ R7 (S); // y1(0)=a(0)+a(1)+a(2), y2(0)=b(0)-b(1)+b(2)
R7 = -R7 (V);
R2 = R2 >>> 1 (V);
R2 = R2 +|+ R7 (S); // y1(2)=(a(0)+a(2))/2-a(1),
//y2(2)=(b(0)+b(2))/2+b(1)
/**************************** OUTPUT STORAGE *******************************/
R0.L = 0x5A82; // 1/sqrt(2)
R1.H = R1.H*R0.L, R1.L = R1.L*R0.L (T);
R1 = -R1 (V) || W[I1] = R1.L || I1 += M0;
R2.H = R2.H*R0.L, R2.L = R2.L*R0.L (T) || W[I1--] = R1.H;
R3.H = R3.L*R0.L, R3.L = R3.H*R0.L (T);
R0 = R2 +|+ R3, R7 = R2 -|- R3 (S);
R2 = -R0 (V) || W[I1--] = R7.L;
W[I1--] = R0.L;
W[I1--] = R7.H;
W[I1--] = R2.H;
R7 = [SP++];
RTS;
NOP; //to avoid one stall if LINK or UNLINK happens to be
//the next instruction after RTS in the memory.
__mdct12.end:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -