⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mdct36.asm

📁 嵌入式系统开发中
💻 ASM
字号:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs  ( Micro Signal Architecture 1.0 specification).

By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software. 
********************************************************************************
Module Name     : mdct36.asm
Label name      :  __mdct36
Version         :   1.3
Change History  :

                Version     Date          Author      Comments
                1.3         11/18/2002    Swarnalatha Tested with VDSP++ 3.0
                                                      compiler 6.2.2 on 
                                                      ADSP-21535 Rev.0.2
                1.2         11/13/2002    Swarnalatha Tested with VDSP++ 3.0
                                                      on ADSP-21535 Rev. 0.2
                1.1         01/22/2002    Vijay       Modified to match silicon
                                                      cycle count
                1.0         06/26/2001    Vijay       Original 

Description     : This asm routine is a 16-bit implementation of the 36-point 
                  modified discrete cosine transform (MDCT) which is used in the
                  MPEG Layer III audio codec. 
                  The equation of the MDCT for a sequence x of length N is 
                  given below:

                    y(k) = sum 0 to N-1 { x(n)cos(pi*(2k + 1)*(2n + 1 + N/2)/2N)

                            n = 0 to N-1
                            k = 0 to N/2-1

                    Note :  MDCT exhibits the antisymmetry property given by
                            y(N - k - 1) = -y(k), k = 0 to N/2 - 1

Algorithm       :
                  The algorithm used to implement MDCT is given as a pseudocode
                  below:

                First stage computations:
                for i=0:8
                    d1(i) = x(i) - x(17-i); c1(8-i) = d1(i);
                    d2(i) = x(i+18) + x(35-i); c2(8-i) = d2(i);
                end
                c = c1 - c2;
                d = d1 + d2;

                Twiddle factor multiplications:
                W1 = cos(pi/72*(17:-2:1));
                W2 = sin(pi/72*(17:-2:1));
                for i=0:8
                    a(8-i) = c(i)*W1(i) - d(8-i)*W2(i);
                    b(8-i) = c(i)*W2(i) + d(8-i)*W1(i);
                end

                9-Point DCT computation:
                WC = cos(pi/18*(0:8)'*(2*(0:8)+1));
                y1 = WC*a';
                y1 = y1/sqrt(2);

                9-Point DST computation:
                WS = sin(pi/18*((0:8)'+1)*(2*(0:8)+1));
                y2(9:-1:1) = WS*b';
                y2 = y2/sqrt(2);

                Last stage:
                y(0)  =  y1(0);
                y(1)  = -(y1(1) + y2(8)); y(2)  = -(y1(1) - y2(8));
                y(3)  =  (y1(2) + y2(7)); y(4)  =  (y1(2) - y2(7));
                y(5)  = -(y1(3) + y2(6)); y(6)  = -(y1(3) - y2(6));
                y(7)  =  (y1(4) + y2(5)); y(8)  =  (y1(4) - y2(5));
                y(9)  = -(y1(5) + y2(4)); y(10) = -(y1(5) - y2(4));
                y(11) =  (y1(6) + y2(3)); y(12) =  (y1(6) - y2(3));
                y(13) = -(y1(7) + y2(2)); y(14) = -(y1(7) - y2(2));
                y(15) =  (y1(8) + y2(1)); y(16) =  (y1(8) - y2(1));
                y(17) = -y2(0);
        

Prototype       : void _mdct12( fract16 *input, fract16 *output,
                                fract16 *twid_coef);

Assumptions     : The input buffer is assumed to be aligned to a 4 byte boundary
                  The length of the input and output buffers are always 36 and
                  18, respectively
                  The input data must sufficient guard bits to avoid overflow
                  The twid_factor array that is passed to the routine must have
                  9x9 cosine and sine coefficients interleaved in the following
                  manner:

                      i = 0;
                      for k = 0 to 8
                        for n = 0 to 8
                            twid_coef[i]   = (1/sqrt(2))*cos(PI/18*k*(2*n + 1));
                            twid_coef[i+1] = (1/sqrt(2))*sin(PI/18*(k + 1)*(2*n 
                                              + 1));
                            i = i + 2;
                        end
                      end

Registers used  : A0, A1, R0-R3, R6, R7, I0-I3, M0, M1, B0, L0-L3, P0-P2, P5, 
                  LC0.

Performance     :
                Code size   :    490 bytes
                Cycle count :    239 cycles

*******************************************************************************/
.section L1_code;
.global __mdct36;
.align 8;
    
__mdct36:
    
    [--SP] = (R7:6, P5:5);
    L0 = 0;
    L1 = 0;
    L2 = 0;
    L3 = 0;
    SP += -40;              // Offset stack pointer for temporary storage
    I2 = SP;                // Temporary array in stack
    I3 = SP;                // Temporary array in stack
    P2 = SP;
    I0 = R0;                // Input pointer
    I1 = R0;                        
    P1 = R1;                // Output pointer
    P5 = R2;                // 9 point cosine-sine coefficient array pointer
    M0 = 36;
    M1 = -32 (X);
    I3 += M0; 
    
               
/********************** PUSH COEFFICIENTS IN STACK ****************************/
    
    P2 += -8;
    R0.L = 0x7FE0;
    R0.H = 0x0596;
    [P2--] = R0;
    R0.L = 0x7EE7;
    R0.H = 0x10B5;
    [P2--] = R0;
    R0.L = 0x7CF7;
    R0.H = 0x1BB5;
    [P2--] = R0;
    R0.L = 0x7A13;
    R0.H = 0x267E;
    [P2--] = R0;
    R0.L = 0x7641;          // Push W1(i) = cos((17:-2:1)*pi/72 and
    R0.H = 0x30FC;          // W2(i) = sin((17:-2:1)*pi/72 into stack
    [P2--] = R0;
    R0.L = 0x7189;
    R0.H = 0x3B1B;
    [P2--] = R0;
    R0.L = 0x6BF4;
    R0.H = 0x44C6;
    [P2--] = R0;
    R0.L = 0x658C;
    R0.H = 0x4DEC;
    [P2--] = R0;
    R0.L = 0x5E5F;
    R0.H = 0x567A;
    [P2] = R0;
/*********************** FIRST STAGE *************************************/
    P0 = 4;
    R0 = [I0 ++ M0] || I1 -= M1;    
    R2 = [I0 ++ M1] || I2 -= 4;
    R1 = [I1 ++ M0] || I3 += 4;
    
    
    LSETUP(ST_FIRST_STAGE, END_FIRST_STAGE) LC0 = P0;            
ST_FIRST_STAGE:
        R1 = PACK(R1.L, R1.H) || R3 = [I1 ++ M1] || [I2++] = R6;
                            // d1(i) = x(i) - x(17-i); 
        R3 = PACK(R3.L, R3.H) || [I3--] = R7  || I1 -= 4;
                            // c1(8-i) = d1(i); 
        R6 = R0 -|- R1 (S) || R0 = [I0 ++ M0] || I1 -= 4;
                            // d2(i) = x(i+18) + x(35-i); 
        R7 = R2 +|+ R3 (S) || R2 = [I0 ++ M1];
                            // c2(8-i) = d2(i); 
END_FIRST_STAGE:
        R6 = R6 +|+ R7, R7 = R6 -|- R7 (S) || R1 = [I1 ++ M0];
                            // d = d1 + d2; c = c1 - c2; 
    I0 = P2;
    R1.L = R0.L - R0.H (S) || [I2--] = R6;
                            // d1(8) 
    R1.H = R2.L + R2.H (S) || [I3++] = R7;
                            // c1(0) 
    R0.H = R1.L + R1.H (S) || R3 = [P2++];
                            // d(8) 
    R0.L = R1.L - R1.H (S); // c(0)
/************************ TWIDDLE FACTOR MULTIPLICATION **********************/

    LSETUP(ST_TWID_MULT, END_TWID_MULT) LC0 = P0;
    A1 = R0.L*R3.H, A0 = R0.L*R3.L;
    R1.H = (A1 += R0.H*R3.L), R1.L = (A0 -= R0.H*R3.H);
ST_TWID_MULT:
        R0 = PACK(R6.H, R7.H) || [I0++] = R1;
        R2 = PACK(R6.L, R7.L) || R3 = [P2++];
        A1 = R0.L*R3.H, A0 = R0.L*R3.L || R7 = [P2++];
                            // a(8-i) = c(i)*W1(i) - d(8-i)*W2(i); 
        R1.H = (A1 += R0.H*R3.L), R1.L = (A0 -= R0.H*R3.H) || R6 = [I2--];
        A1 = R2.L*R7.H, A0 = R2.L*R7.L || [I0++] = R1;
                            // b(8-i) = c(i)*W2(i) + d(8-i)*W1(i); 
END_TWID_MULT:
        R1.H = (A1 += R2.H*R7.L), R1.L = (A0 -= R2.H*R7.H) || R7 = [I3++];
    P2 += -36;
    I2 += 4;
/*********************** 9 POINT DCT AND DST *******************************/
    A1 = A0 = 0 || R3 = [P5++] || [I0--] = R1;
                            // Fetch the first coefficient from the twid_coef 
                            // array 
    L0 = 36;                // Make the DCT/DST input array circular
    B0 = P2;
    P0 = 9;
    
    LSETUP(ST_OUT_LOOP, END_OUT_LOOP) LC0 = P0;
                            // DCT in A0 and DST in A1
ST_OUT_LOOP:
        R0.H = (A1 += R1.H*R3.H), R0.L = (A0 += R1.L*R3.L)|| R1 = [I0--]
        || R3 = [P5++];
        R0.H = (A1 += R1.H*R3.H), R0.L = (A0 += R1.L*R3.L)|| R1 = [I0--]
        || R3 = [P5++];
        R0.H = (A1 += R1.H*R3.H), R0.L = (A0 += R1.L*R3.L)|| R1 = [I0--]
        || R3 = [P5++];
        R0.H = (A1 += R1.H*R3.H), R0.L = (A0 += R1.L*R3.L)|| R1 = [I0--]
        || R3 = [P5++];
        R0.H = (A1 += R1.H*R3.H), R0.L = (A0 += R1.L*R3.L)|| R1 = [I0--]
        || R3 = [P5++];
        R0.H = (A1 += R1.H*R3.H), R0.L = (A0 += R1.L*R3.L)|| R1 = [I0--]
        || R3 = [P5++];
        R0.H = (A1 += R1.H*R3.H), R0.L = (A0 += R1.L*R3.L)|| R1 = [I0--]
        || R3 = [P5++];
        R0.H = (A1 += R1.H*R3.H), R0.L = (A0 += R1.L*R3.L)|| R1 = [I0--] 
        || R3 = [P5++];
        R0.H = (A1 += R1.H*R3.H), R0.L = (A0 += R1.L*R3.L)|| R1 = [I0--] 
        || R3 = [P5++];
END_OUT_LOOP:
        A1 = A0 = 0 || [I2++] = R0;
                            // Store y1(i) and y2(8-i) 
    
/************************** LAST STAGE **********************************/
    I1 = P1;
    I1 -= 2;                // Offset output buffer to account for
    L0 = 0;                 // Make the buffer linear
    I2 -= M0;               // the first dummy write in the loop
    R3.L = W[I1]||R0 = [I2++];                                  
    R3 = PACK(R0.L, R3.L) || R1 = [I2++];

    LSETUP(ST_OUT_COPY, END_OUT_COPY) LC0 = P0 >> 1;
ST_OUT_COPY:
        R2.L = R1.L + R0.H (S) || W[I1++] = R3.L;
                            // Compute output and store previous output 
        R2.H = R1.L - R0.H (S) || W[I1++] = R3.H;
        R2 = -R2 (V) || R0 = [I2++];
        R3.L = R0.L + R1.H (S) || W[I1++] = R2.L;
END_OUT_COPY:
        R3.H = R0.L - R1.H (S) || R1 = [I2++] || W[I1++] = R2.H;        
    
    R0 = -R0 (V) || W[I1++] = R3.L;
    W[I1++] = R3.H;                                 
    W[I1++] = R0.H;         // Store last output
    SP += 40;               // Restore stack pointer
    (R7:6, P5:5) = [SP++];  // Retrieve call preserved register contents
    
    RTS;
    NOP;                    //to avoid one stall if LINK or UNLINK happens to be
                            //the next instruction after RTS in the memory.
__mdct36.end:

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -