📄 dct.vhd
字号:
--*********************************************************************
-- *********************************************************************
-- ** -----------------------------------------------------------------------------**
-- ** dct.v
-- **
-- ** 8x8 discrete Cosine Transform
-- **
-- **
-- **
-- ** Author: Latha Pillai
-- ** Senior Applications Engineer
-- **
-- ** Video Applications
-- ** Advanced Products Group
-- ** Xilinx, Inc.
-- **
-- ** Copyright (c) 2001 Xilinx, Inc.
-- ** All rights reserved
-- **
-- ** Date: Feb. 10, 2002
-- **
-- ** RESTRICTED RIGHTS LEGEND
-- **
-- ** This software has not been published by the author, and
-- ** has been disclosed to others for the purpose of enhancing
-- ** and promoting design productivity in Xilinx products.
-- **
-- ** Therefore use, duplication or disclosure, now and in the
-- ** future should give consideration to the productivity
-- ** enhancements afforded the user of this code by the author's
-- ** efforts. Thank you for using our products !
-- **
-- ** Disclaimer: THESE DESIGNS ARE PROVIDED "AS IS" WITH NO WARRANTY
-- ** WHATSOEVER AND XILINX SPECIFICALLY DISCLAIMS ANY
-- ** IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR
-- ** A PARTICULAR PURPOSE, OR AGAINST INFRINGEMENT.
-- ** Module: dct8x8 :
-- ** A 1D-DCT is implemented on the input pixels first. The output of this
-- ** called the intermediate value is stored in a RAM. The 2nd 1D-DCT operation
-- ** is done on this stored value to give the final 2D-DCT ouput dct_2d. The
-- ** inputs are 8 std_logics wide and the 2d-dct ouputs are 9 std_logics wide.
-- ** 1st 1D section
-- ** The input signals are taken one pixel at a time in the order x00 to x07,
-- ** x10 to x07 and so on upto x77. These inputs are fed into a 8 std_logic shift
-- ** register. The outputs of the 8 std_logic shift registers are registered by the
-- ** div8clk which is the CLK signal divided by 8. This will enable us to
-- ** register in 8 pixels (one row) at a time. The pixels are paired up in an
-- ** adder subtractor in the order xk0,xk7:xk1,xk6:xk2,xk5:xk3,xk4. The adder
-- ** subtractor is tied to CLK. For every clk, the adder/subtractor module
-- ** alternaltley chooses addtion and subtraction. This selection is done by
-- ** the toggle flop. The ouput of the addsub is fed into a muliplier whose
-- ** other input is connected to stored values in registers which act as
-- ** memory. The ouput of the 4 mulipliers are added at every CLK in the
-- ** final adder. The ouput of the adder z_out is the 1D-DCT values given
-- ** out in the order in which the inputs were read in.
-- ** It takes 8 clks to read in the first set of inputs, 1 clk to register
-- ** inputs,1 clk to do add/sub, 1clk to get absolute value,
-- ** 1 clk for multiplication, 2 clk for the final adder. total = 14 clks to get
-- ** the 1st z_out value. Every subsequent clk gives out the next z_out value.
-- ** So to get all the 64 values we need 11+63=74 clks.
-- ** Storage / RAM section
-- ** The ouputs z_out of the adder are stored in RAMs. Two RAMs are used so
-- ** that data write can be continuous. The 1st valid input for the RAM1 is
-- ** available at the 15th clk. So the RAM1 enable is active after 15 clks.
-- ** After this the write operation continues for 64 clks . At the 65th clock,
-- ** since z_out is continuous, we get the next valid z_out_00. This 2nd set of
-- ** valid 1D-DCT coefficients are written into RAM2 which is enabled at 15+64
-- ** clks. So at 65th clk, RAM1 goes into read mode for the next 64 clks and
-- ** RAM2 is in write mode. After this for every 64 clks, the read and write
-- ** switches between the 2 RAMS.
-- ** 2nd 1D-DCT section
-- ** After the 1st 79th clk when RAM1 is full, the 2nd 1d calculations can
-- ** start. The second 1D implementation is the same as the 1st 1D
-- ** implementation with the inputs now coming from either RAM1 or RAM2. Also,
-- ** the inputs are read in one column at a time in the order z00 to z70, z10 to
-- ** z70 upto z77. The oupts from the adder in the 2nd section are the 2D-DCT
-- ** coeeficients.
-- **********************************************************************
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_arith.all;
use IEEE.std_logic_unsigned.all;
use IEEE.numeric_std.all;
--library virtex;
--use virtex.components.all;
--library synplify;
--use synplify.attributes.all;
library unisims_ver; -- include this for modelsim simulation
-- when using mult18x18
ENTITY dct IS
PORT (
CLK : IN std_logic;
RST : IN std_logic;
xin : IN std_logic_vector(7 downto 0); -- 8 bit input.
dct_2d : OUT std_logic_vector(11 downto 0);
rdy_out : OUT std_logic);
END dct;
ARCHITECTURE logic OF dct IS
-- The max value of a pixel after processing (to make their expected mean to
-- zero). The max value of a pixel after processing (to make their expected
-- mean to zero) is 127. If all the values in a row are 127, the max value of
-- the product terms would be (127*8)*(23170/256) and that of z_out_int would
-- be (127*8)*23170/65536. This value divided by 2raised to 16 is
-- equivalent to ignoring the 16 lsb std_logics of the value
-- 1D section
signal xa0_in, xa1_in, xa2_in, xa3_in,
xa4_in, xa5_in, xa6_in, xa7_in: std_logic_vector(7 downto 0);
signal xa0_reg, xa1_reg, xa2_reg, xa3_reg,
xa4_reg, xa5_reg, xa6_reg, xa7_reg: std_logic_vector (8 downto 0);
signal addsub1a_comp,addsub2a_comp,
addsub3a_comp,addsub4a_comp : std_logic_vector (7 downto 0);
signal add_sub1a,add_sub2a,add_sub3a,add_sub4a : std_logic_vector (9 downto 0);
signal save_sign1a,save_sign2a,
save_sign3a,save_sign4a : std_logic;
signal xor1a,xor2a,xor3a,xor4a: std_logic;
signal p1a,p2a,p3a,p4a : std_logic_vector (18 downto 0);
signal p1a_all,p2a_all,p3a_all,p4a_all : std_logic_vector (14 downto 0); -- assign as 36 std_logic wide signal
signal i_wait : std_logic_vector(1 downto 0);
signal toggleA : std_logic;
signal z_out_int1 : std_logic_vector(18 downto 0);
signal z_out_int2 : std_logic_vector(18 downto 0);
signal z_out_int : std_logic_vector(18 downto 0);
signal z_out_rnd : std_logic_vector(10 downto 0);
signal z_out : std_logic_vector(10 downto 0);
signal indexi : integer;
-- clks and counters
signal cntr12 : std_logic_vector(3 downto 0);
signal cntr8 : std_logic_vector(3 downto 0);
signal cntr79 : std_logic_vector(6 downto 0);
signal wr_cntr : std_logic_vector(6 downto 0);
signal rd_cntr : std_logic_vector(6 downto 0);
signal cntr92 : std_logic_vector(6 downto 0);
-- memory section
signal memory1a, memory2a, memory3a, memory4a: std_logic_vector(7 downto 0);
signal data_out : std_logic_vector(10 downto 0);
signal en_ram1 : std_logic;
signal en_dct2d : std_logic;
signal en_ram1reg : std_logic;
signal en_dct2d_reg : std_logic;
type ram1a_mem is array (0 to 63) of std_logic_vector(10 downto 0);
signal ram1_mem : ram1a_mem;
-- add the following to infer block RAM in synlpicity
-- synthesis syn_ramstyle = "block_ram" //shd be within /*..*/
type ram2a_mem is array (0 to 63) of std_logic_vector(10 downto 0);
signal ram2_mem : ram2a_mem;
-- add the following to infer block RAM in synlpicity
-- synthesis syn_ramstyle = "block_ram" //shd be within /*..*/
-- 2D section
signal data_out_final : std_logic_vector(10 downto 0);
signal xb0_in, xb1_in, xb2_in, xb3_in,
xb4_in, xb5_in, xb6_in, xb7_in : std_logic_vector(10 downto 0);
signal xb0_reg, xb1_reg, xb2_reg, xb3_reg,
xb4_reg, xb5_reg, xb6_reg, xb7_reg : std_logic_vector(11 downto 0);
signal add_sub1b,add_sub2b,add_sub3b,add_sub4b: std_logic_vector(11 downto 0);
signal addsub1b_comp,addsub2b_comp,
addsub3b_comp,addsub4b_comp : std_logic_vector (10 downto 0);
signal save_sign1b,save_sign2b,save_sign3b,save_sign4b : std_logic;
signal xor1b,xor2b,xor3b,xor4b: std_logic;
signal p1b,p2b,p3b,p4b: std_logic_vector(19 downto 0);
signal p1b_all,p2b_all,p3b_all,p4b_all : std_logic_vector (17 downto 0);
-- assign as 36 std_logic wide signal if using mult18x18
signal toggleB : std_logic;
signal dct2d_int1 : std_logic_vector(19 downto 0);
signal dct2d_int2 : std_logic_vector(19 downto 0);
signal dct_2d_int : std_logic_vector(19 downto 0);
signal dct_2d_rnd : std_logic_vector(11 downto 0);
-- rounding of the value
BEGIN
-- 1D-DCT BEGIN
-- store 1D-DCT constant coeeficient values for multipliers */
PROCESS (RST, CLK)
BEGIN
IF (RST = '1') THEN
memory1a <= "00000000";
memory2a <= "00000000";
memory3a <= "00000000";
memory4a <= "00000000";
ELSIF (CLK'EVENT AND CLK = '1') THEN
CASE indexi IS
WHEN 0 =>
memory1a <= "01011011";
memory2a <= "01011011";
memory3a <= "01011011";
memory4a <= "01011011";
WHEN 1 =>
memory1a <= "01111110";
memory2a <= "01101010";
memory3a <= "01000111";
memory4a <= "00011001";
WHEN 2 =>
memory1a <= "01110110";
memory2a <= "00110001";
memory3a <= "10110001"; -- -8'd49;
memory4a <= "11110110"; -- end -8'd118;end
WHEN 3 =>
memory1a <= "01101010";
memory2a <= "10011001"; -- -8'd25;
memory3a <= "11111110"; -- -8'd126;
memory4a <= "11000111";
WHEN 4 =>
memory1a <= "01011011";
memory2a<= "11011011"; -- -8'd91;
memory3a <= "11011011"; -- -8'd91;
memory4a <= "01011011";
WHEN 5 =>
memory1a <= "01000111";
memory2a <= "11111110"; -- -8'd126;
memory3a <= "00011001";
memory4a <= "01101010";
WHEN 6 =>
memory1a <= "00110001";
memory2a <= "11110110"; -- -8'd118;
memory3a <= "01110110";
memory4a <= "10110001";
WHEN 7 =>
memory1a <= "00011001";
memory2a <= "11000111"; -- -8'd71;
memory3a <= "01101010";
memory4a <= "11111110";
WHEN OTHERS =>
NULL;
---8'd126;end
END CASE;
END IF;
END PROCESS;
-- 8-std_logic input shifted 8 times thru a shift register
PROCESS (CLK, RST)
BEGIN
IF (RST = '1') THEN
xa0_in <= "00000000"; xa1_in <= "00000000";
xa2_in <= "00000000"; xa3_in <= "00000000";
xa4_in <= "00000000"; xa5_in <= "00000000";
xa6_in <= "00000000"; xa7_in <= "00000000";
ELSIF (CLK'EVENT AND CLK = '1') THEN
xa0_in <= xin; xa1_in <= xa0_in;
xa2_in <= xa1_in; xa3_in <= xa2_in;
xa4_in <= xa3_in; xa5_in <= xa4_in;
xa6_in <= xa5_in; xa7_in <= xa6_in;
END IF;
END PROCESS;
-- shifted inputs registered every 8th clk (using cntr8)
PROCESS (CLK, RST)
BEGIN
IF (RST = '1') THEN
cntr8 <= "0000";
ELSIF (CLK'EVENT AND CLK = '1') THEN
IF (cntr8 < "1000") THEN
cntr8 <= cntr8 + "0001";
ELSE
cntr8 <= "0001";
END IF;
END IF;
END PROCESS;
PROCESS (CLK, RST)
BEGIN
IF (RST = '1') THEN
xa0_reg <= "000000000"; xa1_reg <= "000000000";
xa2_reg <= "000000000"; xa3_reg <= "000000000";
xa4_reg <= "000000000"; xa5_reg <= "000000000";
xa6_reg <= "000000000"; xa7_reg <= "000000000";
ELSIF (CLK'EVENT AND CLK = '1') THEN
IF (cntr8 = "1000") THEN
xa0_reg <= xa0_in(7) & xa0_in; xa1_reg <= xa1_in(7) & xa1_in;
xa2_reg <= xa2_in(7) & xa2_in; xa3_reg <= xa3_in(7) & xa3_in;
xa4_reg <= xa4_in(7) & xa4_in; xa5_reg <= xa5_in(7) & xa5_in;
xa6_reg <= xa6_in(7) & xa6_in; xa7_reg <= xa7_in(7) & xa7_in;
ELSE
END IF;
END IF;
END PROCESS;
PROCESS (CLK, RST)
BEGIN
IF (RST = '1') THEN
toggleA <= '0';
ELSIF (CLK'EVENT AND CLK = '1') THEN
toggleA <= NOT toggleA;
END IF;
END PROCESS;
-- adder / subtractor block
PROCESS (CLK, RST)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -