📄 dct8.txt

📁 平台：ADSP21xx 编程语言：ASM 说明：ADSP21xx-离散余弦变换在16位定点DSP上实现
💻 TXT
字号:
*********************************************************************

The following file contains information pertaining to the Discrete 
Cosine Transform. It consists of several source code programs as well
as batch files for assembly and linking.


*****  The following module is the calling shell for the one dimensional
DC8-point DCT.**

{  ONE DIMENSIONAL, FAST, DISCRETE COSINE TRANSFORM, 8 POINTS

   Implementation:
        as described by Hsieh S. Hou in IEEE Transactions on Acoustics, 
        Speech, and Signal Processing, Vol. ASSP-35, No. 10, October 1987

   Target Processor:
        ADSP-2100 family of DSP processors from Analog Devices, Inc.

   Execution Benchmark:
        160 instruction cycles -- ADSP-2100A -- 12.8 us at CLKOUT=12.5 MHz

   Memory Storage Requirement:
        149 PM = 142 program memory code, 7 program memory data (coefficients)
        16 DM = 8 data memory scratch pad, 8 data memory (8-pt vector)
        Note: resulting transform coefficients written over original input data
        assumes: unsigned 8-bit input data, signed 16-bit output coefficients

   Author of Code:
        Christoph D. Cavigioli -- DSP Applications Engineer -- (617) 461-3058

   Release History:
        27-March-1989
        this release: 23-July-1989

   Analog Devices, Inc., DSP Division, P.O.Box 9106, Norwood, MA 02062, USA
}   
.module/ram/abs=0   fast_8pt_dct;
.var/pm/ram         cosvals[7];         { cosine coefficients }
.var/circ/abs=0     tmp[8];             { temporary scratch memory }
.var                x[8];               { 8-pt vector to transform }
.global    tmp;
.external  DIF8_8, DIF4_8, DIF2_8, RLR4_8, RLR8_8, DC_AND_BREV_8;
.init      x: <x.dat>;
.init  cosvals[0]: h#7D8A00, h#471C00, 
.init  cosvals[4]: h#764100, h#CF0500;
.init  cosvals[6]: h#5A8200;

        rti; rti; rti; rti; 

setup:  l0=0; l1=0; l2=0; l3=0; l5=0; l6=0; m6=1; se=1;
dct8:   i2=^x;
        i3=^x+7;
        m5=1;
        i6=^cosvals;
        m2=2;
        m3=-2;
        call DIF8_8;
        call DIF4_8;
        call DIF2_8;
        call RLR4_8;
        call RLR8_8;
        i5=^x;
        call DC_AND_BREV_8;
        trap;
.endmod;



*** The following module is the calling shell for the two dimensional
8x8 DCT.  *******


{  TWO DIMENSIONAL, FAST, DISCRETE COSINE TRANSFORM, 8 x 8 POINTS

   Implementation:
        as described by Hsieh S. Hou in IEEE Transactions on Acoustics, 
        Speech, and Signal Processing, Vol. ASSP-35, No. 10, October 1987

   Target Processor:
        ADSP-2100 family of DSP processors from Analog Devices, Inc.

   Execution Benchmark:
        2540 instruction cycles -- ADSP-2100A -- 0.2032 ms at CLKOUT=12.5 MHz

   Memory Storage Requirement:
        149 PM = 142 program memory code, 7 program memory data (coefficients)
        16 DM = 8 data memory scratch pad, 8 data memory (8-pt vector)
        Note: resulting transform coefficients written over original input data
        assumes: unsigned 8-bit input data, signed 16-bit output coefficients

   Author of Code:
        Christoph D. Cavigioli -- DSP Applications Engineer -- (617) 461-3058

   Release History:
        27-March-1989
        this release: 23-July-1989

   Analog Devices, Inc., DSP Division, P.O.Box 9106, Norwood, MA 02062, USA
}
.module/ram/abs=0   fast_8x8_dct;
.var/pm/ram         cosvals[15];        { cosine coefficients }
.var/circ/abs=0     tmp[8];             { temporary scratch memory }
.var                xadr, xadr2;
.var                x[64];              { 8x8 block to transform }
.global    tmp;
.external  DIF8_8, DIF4_8, DIF2_8, RLR4_8, RLR8_8, DC_AND_BREV_8;
.init      x: <xx.dat>;
.init  cosvals[0]: h#7D8A00, h#471C00, h#E70800, h#959300;
.init  cosvals[4]: h#764100, h#CF0500;
.init  cosvals[6]: h#5A8200;

        rti; rti; rti; rti; 

setup:  l0=0; l1=0; l2=0; l3=0; l5=0; l6=0; m6=1; se=1;
rows:   si=^x;                  { cols: ^x }
        dm(xadr)=si;
        i2=si;
        si=^x+7;                { cols: ^x+56 }
        dm(xadr2)=si;
        i3=si;
        m5=1;                   { cols: 8 }
        cntr=8;
        do rowdcts until ce;
                i6=^cosvals;
                m2=2;           { c
                m3=-2;          { cols: -16 }
                call DIF8_8;
                call DIF4_8;
                call DIF2_8;
                call RLR4_8;
                call RLR8_8;
                si=dm(xadr);
                i5=si;
                call DC_AND_BREV_8;
nextrow:        ay0=8;          { cols: 1 }
                ax0=dm(xadr);
                ar=ax0+ay0;
                dm(xadr)=ar;
                i2=ar;
                ax0=dm(xadr2);
                ar=ax0+ay0;
                dm(xadr2)=ar;
rowdcts:        i3=ar;
cols:   si=^x;                  { cols: ^x }
        dm(xadr)=si;
        i2=si;
        si=^x+56;               { cols: ^x+56 }
        dm(xadr2)=si;
        i3=si;
        m5=8;                   { cols: 8 }
        cntr=8;
        do coldcts until ce;
                i6=^cosvals;
                m2=16;          { cols: 16 }
                m3=-16;         { cols: -16 }
                call DIF8_8;
         
                call DIF2_8;
                call RLR4_8;
                call RLR8_8;
                si=dm(xadr);
                i5=si;
                call DC_AND_BREV_8;
nextcol:        ay0=1;          { cols: 1 }
                ax0=dm(xadr);
                ar=ax0+ay0;
                dm(xadr)=ar;
                i2=ar;
                ax0=dm(xadr2);
                ar=ax0+ay0;
                dm(xadr2)=ar;
coldcts:        i3=ar; trap;

.endmod;




***** The following modules are called by the calling shells for both
one and two dimensional DCTs.   ************


.module/ram do_DIF8_8;        {*** 1 8-way DIFs ***}
.external tmp;
.entry DIF8_8;

DIF8_8:
        i0=^tmp;
        i1=^tmp+4;
        m1=1;

        ax1=dm(i3,m3);
        af=pass ax1, ax0=dm(i2,m2);
        ar=ax0+af, ax1=dm(i3,m3), my0=pm(i6,m6);
        ar=ax0-af, dm(i0,m1)=ar;
        mr=ar*my0(ss);
        af=pass ax1, ax0=dm(i2,m2);
        ar=ax0+af, ax1=dm(i3,m3), my0=pm(i6,m6);
        ar=ax0-af, dm(i0,m1)=ar;
        mr=ar*my0(ss), dm(i1,m1)=mr1;
        af=pass ax1, ax0=dm(i2,m2);
        ar=ax0+af, ax1=dm(i3,m3), my0=pm(i6,m6);
        ar=ax0-af, dm(i0,m1)=ar;
        mr=ar*my0(ss), dm(i1,m1)=mr1;
        af=pass ax1, ax0=dm(i2,m2);
        ar=ax0+af,                my0=pm(i6,m6);
        ar=ax0-af, dm(i0,m1)=ar;
        mr=ar*my0(ss), dm(i1,m1)=mr1;
        dm(i1,m1)=mr1;

        rts;                               {*** end 1 8-way DIFs ***}
.endmod;





.module/ram do_DIF4_8;        {*** 2 4-way DIFs ***}
.external tmp;
.entry DIF4_8;

DIF4_8:
        i0=^tmp;
        i1=^tmp+2;
        i2=^tmp;
        i3=^tmp+2;
        m2=3;

        ax1=dm(i3,m1);
        af=pass ax1, ax0=dm(i2,m1);
        ar=ax0+af, ax1=dm(i3,m2), my0=pm(i6,m6);
        ar=ax0-af, dm(i0,m1)=ar;
        mr=ar*my0(ss);
        af=pass ax1, ax0=dm(i2,m2);
        ar=ax0+af,                my1=pm(i6,m6);
        ar=ax0-af, dm(i0,m2)=ar;
        mr=ar*my1(ss), dm(i1,m1)=mr1;
        dm(i1,m2)=mr1;

        ax1=dm(i3,m1);
        af=pass ax1, ax0=dm(i2,m1);
        ar=ax0+af, ax1=dm(i3,m2);
        ar=ax0-af, dm(i0,m1)=ar;
        mr=ar*my0(ss);
        af=pass ax1, ax0=dm(i2,m2);
        ar=ax0+af;
        ar=ax0-af, dm(i0,m2)=ar;
        mr=ar*my1(ss), dm(i1,m1)=mr1;
        dm(i1,m2)=mr1;

        rts;                               {*** end 2 4-way DIFs ***}
.endmod;




.module/ram do_DIF2_8;        {*** 4 2-way DIFs ***}
.external tmp;
.entry DIF2_8;

DIF2_8:
        i0=^tmp;
        i1=^tmp+1;
        i2=^tmp;
        i3=^tmp+1;
        m0=2;

        ax1=dm(i3,m0);
        af=pass ax1, ax0=dm(i2,m0);
        ar=ax0+af, ax1=dm(i3,m0), my0=pm(i6,m6);
        ar=ax0-af, dm(i0,m0)=ar;
        mr=ar*my0(ss);

        af=pass ax1, ax0=dm(i2,m0);
        ar=ax0+af, ax1=dm(i3,m0);
        ar=ax0-af, dm(i0,m0)=ar;
        mr=ar*my0(ss), dm(i1,m0)=mr1;

        af=pass ax1, ax0=dm(i2,m0);
        ar=ax0+af, ax1=dm(i3,m0);
        ar=ax0-af, dm(i0,m0)=ar;
        mr=ar*my0(ss), dm(i1,m0)=mr1;

        af=pass ax1, ax0=dm(i2,m0);
        ar=ax0+af;
        ar=ax0-af, dm(i0,m0)=ar;
        mr=ar*my0(ss), dm(i1,m0)=mr1;

        dm(i1,m1)=mr1;
        rts;                                       {*** end 4 2-way DIFs ***}
.endmod;






.module do_RLR4_8;
.external tmp;
.entry RLR4_8;

RLR4_8:
        i0=^tmp+3;
        i1=^tmp+2;
        i2=i0;
        m0=4;
                                si=dm(i0,m0);
                                ay0=dm(i1,m0);
        sr=ashift si (hi),      si=dm(i0,m0);
        ar=sr1-ay0,             ay0=dm(i1,m0);
        dm(i2,m0)=ar;
        sr=ashift si (hi);
        ar=sr1-ay0;
        dm(i2,m0)=ar;
        rts;
.endmod;






.module do_RLR8_8;
.external tmp;
.entry RLR8_8;

RLR8_8:
        i0=^tmp+4;
        i1=^tmp+12;
        m0=1;
        m1=2;
        m2=-1;
        m3=-2;
                                ay0=dm(i0,m1);
                                si=dm(i0,m2);
        sr=ashift si (hi);
        ar=sr1-ay0,             si=dm(i0,m0);
        af=-ar,                 dm(i0,m0)=ar;
        sr=ashift si (hi);
        ar=sr1+af,              si=dm(i0,m3);
        af=-ar,                 dm(i0,m1)=ar;
        sr=ashift si (hi);
        ar=sr1+af;
                                dm(i0,m0)=ar;
        rts;
.endmod;






.module do_DC_and_brev_8;
.const sqrt2div2=h#5A82;
.external tmp;
.entry DC_AND_BREV_8;

DC_AND_BREV_8:  mx0=dm(tmp);
DCterm:         my0=sqrt2div2;
                mr=mx0*my0(rnd);        { calculate DC term using sqrt(2)/2 }
                dm(tmp)=mr1;
descramble:     i0=h#0000;      { h#0000 = bitrev(^tmp) when ^tmp=h#0000 }
                m0=2048;        { bitrev modifier = 16384/N = 2048 }
                cntr=8;
                ena bit_rev;
                do unbrev until ce;
                   ax0=dm(i0,m0);       { read from bit-reversed tmp buffer }
unbrev:            dm(i5,m5)=ax0;       { write to normal ordered x buffer }
                dis bit_rev;
                rts;
.endmod;




****** The following two batch files can be used to assemble the
proper modules for the one dimensional 8-pt DCT and the two dimensional 
8x8 DCT.   *******

dsppa 8PTDCT
dsppa DIF8_8 -s
dsppa DIF4_8 -s
dsppa DIF2_8 -s
dsppa RLR4_8
dsppa RLR8_8
dsppa DCBREV_8



dsppa 8x8
dsppa DIF8_8 -s
dsppa DIF4_8 -s
dsppa DIF2_8 -s
dsppa RLR4_8
dsppa RLR8_8
dsppa DCBREV_8



***** And these batch files can be used for linking .... *****

dsppl 8ptdct dif8_8 dif4_8 dif2_8 rlr4_8 rlr8_8 dcbrev_8 -g -x



dsppl 8x8 dif8_8 dif4_8 dif2_8 rlr4_8 rlr8_8 dcbrev_8 -g -x



***** 



***********************************************************************
This information is believed to be accurate and Analog Devices assumes
no responsibilty for its use.  Also, no license is granted by implication
or otherwise under the patent rights of Analog Devices.

No responsibility is assumed for any infringement of patents or other
rights of third parties which may result from its use.


For more information concerning DCTs, contact the DSP Applications Assistance
Line at (617) 461-3672.


Code and applications information is also available by contacting DSP
Applications at the number mentioned above.


*****************************************************************************
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -