📄 mdct12.asm

📁 嵌入式系统开发中
💻 ASM
字号:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs  ( Micro Signal Architecture 1.0 specification).

By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software. 
********************************************************************************
Module Name     : mdct12.asm
Label name      :  __mdct12
Version         :   1.3
Change History  :

                Version     Date          Author        Comments
                1.3         11/18/2002    Swarnalatha   Tested with VDSP++ 3.0
                                                        compiler 6.2.2 on 
                                                        ADSP-21535 Rev.0.2
                1.2         11/13/2002    Swarnalatha   Tested with VDSP++ 3.0
                                                        on ADSP-21535 Rev. 0.2
                1.1         01/22/2002    Vijay         Modified to match
                                                        silicon cycle count
                1.0         06/26/2001    Vijay         Original 

Description     : This asm routine is a 16-bit implementation of the 12-point 
                  modified discrete cosine transform (MDCT) which is used in
                  the MPEG Layer III audio codec. The equation of the MDCT for
                  a sequence x of length N is given below:

                    y(k) = sum 0 to N-1 { x(n)cos(pi*(2k + 1)*(2n + 1 + N/2)/2N)

                            n = 0 to N-1
                            k = 0 to N/2-1

                    Note : MDCT exhibits the antisymmetry property given by
                    y(N - k - 1) = -y(k), k = 0 to N/2 - 1

Algorithm       : The algorithm used to implement MDCT is given as a pseudocode
                  below:    

                First stage computations:

                c1(0) = x(8) + x(9) ;d1(2) = c1(0);
                c1(1) = x(7) + x(10);d1(1) = c1(1);
                c1(2) = x(6) + x(11);d1(0) = c1(2);

                c2(0) = x(2) - x(3);d2(2) = c2(0);
                c2(1) = x(1) - x(4);d2(1) = c2(1);
                c2(2) = x(0) - x(5);d2(0) = c2(2);

                c = c2 - c1;d = d2 + d1;

                Twiddle factor multiplications:

                W1 = cos(pi/24*(5:-2:1));
                W2 = sin(pi/24*(5:-2:1));

                a(2) = c(0)*W1(0) - d(2)*W2(0);
                a(1) = c(1)*W1(1) - d(1)*W2(1);
                a(0) = c(2)*W1(2) - d(0)*W2(2);

                b(2) = c(0)*W2(0) + d(2)*W1(0);
                b(1) = c(1)*W2(1) + d(1)*W1(1);
                b(0) = c(2)*W2(2) + d(0)*W1(2);
    
                3-Point DCT computation:

                y1(0) = a(0) + a(1) + a(2);
                y1(1) = (a(0) - a(2))*sqrt(3)/2;
                y1(2) = (a(0) + a(2))/2 - a(1);

                3-Point DST computation:

                y2(0) = b(0) - b(1) + b(2);
                y2(1) = (b(0) - b(2))*sqrt(3)/2;
                y2(2) = (b(0) + b(2))/2 + b(1);

                Last stage:
        
                y(0) = y1(0);
                y(2) = -(y1(1) - y2(2));
                y(4) = y1(2) - y2(1);
                y(6) = y2(0);
                y(8) = -y1(2) - y2(1);
                y(10) = -(-y1(1) - y2(2));
                y(N - k - 1) = -y(k)

                y = y/sqrt(2);

Prototype       : void _mdct12(fract16 *input, fract16 *output);

Assumptions     : The input buffer is assumed to be aligned to a 4 byte boundary
                  The length of the input and output buffers are always 12 and
                  6, respectively
                  The input data must have sufficient guard bits to avoid
                  overflow

Registers used  : A0, A1, R0-R3, R7, I0, I1, M0, L0, L1.

Performance     :
                Code size   :   208 bytes
                Cycle count :   54 cycles
    
*******************************************************************************/
.section L1_code;
.align 8;
.global __mdct12;
    
__mdct12:

    L0 = 0;
    L1 = 0;
    I0 = R0;                // Read input pointer
    I1 = R1;                // Read output pointer
    M0 = 10;
    [--SP] = R7;
    
/************************ FIRST STAGE **********************************/
    
    R0 = [I0++];            // Read x(0) & x(1)
    R1 = [I0++];            // Read x(2) & x(3)
    R2 = [I0++];            // Read x(4) & x(5)
    R2 = PACK(R2.L,R2.H) || R3 = [I0++];
                            // Interchange x(4) & x(5), Read x(6) & x(7) 
    R0 = R0 -|- R2 (S)|| R2 = [I0++];
                            // x(1) - x(4), x(0) - x(5), Read x(8) & x(9) 
    R1 = R1 +|- R2, R7 = R1 -|+ R2 (S) || R2 = [I0++];
                            // Read x(10) & x(11) 
    R2 = PACK(R2.L,R2.H);   // Interchange x(10) & x(11)
    R2 = R2 +|+ R3 (S);     // x(7) + x(10), x(6) + x(11)
    R3 = PACK(R7.L, R1.L);                                       
    R7 = PACK(R7.H,R1.H);
    R3 = R3 -|- R7 (S);     // x {2-8-(3+9)}, x (2+8-(3-9)}
    R2 = R0 +|+ R2, R0 = R0 -|- R2 (S);
                            // x {1-4-7-10}, x {1-4+7+10}, x {0-5-6-11},
                            //x{0-5+6+11} 
    
/******************** TWIDDLE FACTOR MULTIPLICATION *************************/ 
    
    R1.L = 0x658D;          // Cos(5*pi/24)
    R1.H = 0x4DEC;          // Sin(5*pi/24)
    A1 = R3.L*R1.H, A0 = R3.L*R1.L;
    R3.H = (A1 += R3.H*R1.L), R3.L = (A0 -= R3.H*R1.H) (T);
    
    R7.L = 0x7EE8;          // Cos(pi/24)
    R7.H = 0x10B5;          // Sin(pi/24)
    A1 = R0.L*R7.H, A0 = R0.L*R7.L;
    R7.H = (A1 += R2.L*R7.L), R7.L = (A0 -= R2.L*R7.H) (T);
    
    R1.L = 0x7642;          // Cos(3*pi/24)
    R1.H = 0x30FC;          // Sin(3*pi/24)
    A1 = R0.H*R1.H, A0 = R0.H*R1.L;
    R1.H = (A1 += R2.H*R1.L), R1.L = (A0 -= R2.H*R1.H) (T);
    
/*********************** 3-POINT DCT & DST ********************************/  
    
    R2 = R7 +|+ R3, R0 = R7 -|- R3 (S);
                            // a(0) + a(2), a(0) - a(2), b(0) + b(2), b(0)- b(2)
    R7.L = 0x6EDA;          // sqrt(3)/2
    R3.H = R0.H*R7.L, R3.L = R0.L*R7.L (T);
                            // y1(1)=(a(0)-a(2))*sqrt(3)/2, 
                            //y2(1)=(b(0)-b(2))*sqrt(3)/2 
    R7 = -R1 (V);
    R7 = PACK(R7.H,R1.L);
    R1 = R2 +|+ R7 (S);     // y1(0)=a(0)+a(1)+a(2), y2(0)=b(0)-b(1)+b(2)
    R7 = -R7 (V);
    R2 = R2 >>> 1 (V);
    R2 = R2 +|+ R7 (S);     // y1(2)=(a(0)+a(2))/2-a(1), 
                            //y2(2)=(b(0)+b(2))/2+b(1)
    
/**************************** OUTPUT STORAGE *******************************/
    
    R0.L = 0x5A82;          // 1/sqrt(2)
    R1.H = R1.H*R0.L, R1.L = R1.L*R0.L (T);
    R1 = -R1 (V) || W[I1] = R1.L || I1 += M0;
    R2.H = R2.H*R0.L, R2.L = R2.L*R0.L (T) || W[I1--] = R1.H;
    R3.H = R3.L*R0.L, R3.L = R3.H*R0.L (T);
    R0 = R2 +|+ R3, R7 = R2 -|- R3 (S);
    R2 = -R0 (V) || W[I1--] = R7.L;
    W[I1--] = R0.L;
    W[I1--] = R7.H;
    W[I1--] = R2.H;
    
    R7 = [SP++];
    RTS;
    NOP;                    //to avoid one stall if LINK or UNLINK happens to be
                            //the next instruction after RTS in the memory.
__mdct12.end:
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -