📄 matrix3x3.vhd
字号:
--*********************************************************************/
-- 3x3matrix.v - 3X3 Matrix Multiply
-- Implementation using basic equations
--
-- Author: Latha Pillai
-- Senior Applications Engineer
-- Video Applications
-- Advanced Products Division
-- Xilinx, Inc.
--
-- Copyright (c) 1999 Xilinx, Inc.
-- All rights reserved
--
-- Date: Aug. 7, 2001
-- For: Application note XAPP284
--
-- RESTRICTED RIGHTS LEGEND
--
-- This software has not been published by the author, and
-- has been disclosed to others for the purpose of enhancing
-- and promoting design productivity in Xilinx products.
--
-- Therefore use, duplication or disclosure, now and in the
-- future should give consideration to the productivity
-- enhancements afforded the user of this code by the author's
-- efforts. Thank you for using our products !
--
-- Disclaimer: THESE DESIGNS ARE PROVIDED "AS IS" WITH NO WARRANTY
-- WHATSOEVER AND XILINX SPECIFICALLY DISCLAIMS ANY
-- IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR
-- A PARTICULAR PURPOSE, OR AGAINST INFRINGEMENT.
--
-- Revision:
-- Aug. 7, 2001 Creation
-- Aug. 7, 2001 Rev1. Gregg Hawkes
--
--
-- Other modules instanced in this design:
--
-- MULT18X18
--
--BRIEF DESCRIPTION
--This code describes using a technique called time multiplexing to
--leverage a fast hardware multiply in a relatively slow operation,
--thereby increasing the efficiency of the implementation.
--The operation being shown is a 3X3 matrix of constants times a 3
--component vector. The equations look like:
--KA1 * A + KA2 * A + KA3 * A = X
--KB1 * A + KB2 * A + KB3 * A = X
--KC1 * A + KC2 * A + KC3 * A = X
--The hardware to accomplish this task consists of a multiplier fed by 3
--input registers and an accumulator to compute the three terms in each
--line above.
--DETAILED DESCRIPTION:
--The multiplier output is fed into the adder A input. It takes 3 clk
--cycles for the first valid mutiplier output reach the adder input A. The
--B input of the adder can be a zero or the adder's accumulating register.
--By selecting a zero on the B input the adder just passes the input A
--through to the accumulating register. By selecting the accumulating
--register, the contents of the previous add can be added to the output of
--the multiplier.
--The repeating flow for the accumlating register will be for the 1st clk,
--the mux output is '0', so we always pass the first argument through to
--the accumulator register. For the 2nd and 3rd clks, the accumulator
--register is fed back and added to the output of the multiply. This is
--made possible by using the cntr3 outputs as the select lines.
--The following text describes the condition of the internal nodes after
--consecutive clocks. The state of the nodes assumes the clock has
--occured and data is stable.
--clock multiplier adder adder
--number output output output
-- register register
---------------------------------------------
--rst X X 0
--1 X X 0
--2 X X 0
--3 KA1*A KA1*A 0
--4 KB1*B KA1*A+KB1*B KA1*A
--5 KC1*C KA1*A+KB1*B+KC1*C KA1*A+KB1*B
--6 KA2*A KA2*A KA1*A+KB1*B+KC1 answer 1
--7 KB2*B KA2*A+KB2*B KA2*A
--8 KC2*C KA2*A+KB2*B+KC2*C KA2*A+KB2*B
--9 KA3*A KA3*A KA2*A+KB2*B+KC2*C answer 2
--10 KB3*B KA3*A+KB3*B KA3*A
--11 KC3*C KA3*A+KB3*B+KC3*C KA3*A+KB3*B
--12 next KA1*A next KA1*A KA3*A+KB3*B+KC3*C answer 3
--*/
--/***********************************************************************/
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_arith.all;
use IEEE.std_logic_unsigned.all;
--library virtex;
--use virtex.components.all;
--library synplify;
--use synplify.attributes.all;
library unisims_ver; -- include this for modelsim simulation
-- when using mult18x18
entity matrix3x3 is
port ( A, B, C: in std_logic_vector(11 downto 0);
CLK, RST: in std_logic;
CWEL: in std_logic_vector(1 downto 0);
KA, KB, KC: in std_logic_vector(9 downto 0);
X, Y, Z: out std_logic_vector(11 downto 0));
end matrix3x3 ;
architecture model of matrix3x3 is
signal A_reg, B_reg, C_reg: std_logic_vector (11 downto 0);
signal A_reg1, B_reg1, C_reg1: std_logic_vector (11 downto 0);
signal CWEL_reg,i_wait: std_logic_vector(1 downto 0);
signal cnt9_wait: std_logic_vector (2 downto 0);
signal ain, bin: std_logic_vector (17 downto 0);
signal KA1, KB1, KC1, KA2, KB2, KC2, KA3, KB3, KC3: std_logic_vector (9 downto 0);
signal data_mux: std_logic_vector (11 downto 0);
signal coeff_mux: std_logic_vector (9 downto 0);
signal cntr9, cntr9_out : std_logic_vector (3 downto 0);
signal P1_reg,adder_mux,sum: std_logic_vector (35 downto 0);
signal P1, P2, P3: std_logic_vector (35 downto 0);
signal cntr3 : std_logic_vector (1 downto 0);
signal j : integer range 0 to 7;
signal indexi : integer range 0 to 8;
signal i : integer range 0 to 3;
component MULT18X18
port(
A,B: in std_logic_vector (17 downto 0);
P: out std_logic_vector (35 downto 0));
end component;
begin
--/* ----------DATA INPUT SECTION------------- */
--/* In the 3:1 data mux. To match the pipeline of the
--Data inputs with the coeeficient inputs, the data values are registered first
--At the input of the 3:1 mux and then again at the output of the 3:1 mux.
--To make sure that the inputs don't change in the middle of a set of vector
--summation, the inputs are held constant for 9 clks. This will ensure that
--the input values seen by the 3x3 vector is the same for the first set of
--answers. */
--/* cntr3 to count 0-1-2-3-1-2-3 */
process(CLK,RST) begin
if (RST='1') then
cntr3 <= "00";
elsif (rising_edge (CLK)) then
if(cntr3 = "11") then
cntr3 <= "01";
else cntr3 <= cntr3 + 1;
end if;
end if;
end process;
--/* inputs registered twice to match the pipe length of the coefficients */
process(CLK,RST)
begin
if (RST = '1') then
A_reg1 <= (others=>'0'); B_reg1 <= (others=>'0'); C_reg1 <= (others=>'0');
elsif (rising_edge (clk)) then
if (j = 0) then
A_reg1 <= A; B_reg1 <= B; C_reg1 <= C;
end if;
end if;
end process;
process(CLK,RST)
begin
if (RST = '1') then
A_reg <= (others=>'0'); B_reg <= (others=>'0'); C_reg <= (others=>'0');
elsif (rising_edge (clk)) then
A_reg <= A_reg1; B_reg <= B_reg1; C_reg <= C_reg1;
end if;
end process;
process(CLK,RST)
begin
if (RST = '1') then
j <= 0 ;
elsif (rising_edge (clk)) then
if ( j < 8) then j <= j + 1;
else j <= 0;
end if;
end if;
end process;
--/* ----------MODE SELECT SECTION------------- */
--/* mode select should be constant for 3 clk cycles to complete one set
--of coefficients. So modeselect is updated every 3rd clk */
--/* cntr3 used to hold CWEL constant for 3 clocks. */
process(CLK,RST)
begin
if (RST = '1') then
CWEL_reg <= "00"; i <= 0;
elsif (rising_edge (clk)) then
CWEL_reg <= CWEL;
end if;
if ( i < 4) then i <= i + 1;
else i <= 0;
end if;
--end if;
end process;
--/* coefficient register update. The register shd hold the
--value for 3 clks to get the right output. */
process (clk,rst)
begin
if (rst = '1') then
KA1 <= "0000000000"; KB1 <= "0000000000"; KC1 <= "0000000000";
KA2 <= "0000000000"; KB2 <= "0000000000"; KC2 <= "0000000000";
KA3 <= "0000000000"; KB3 <= "0000000000"; KC3 <= "0000000000";
elsif (rising_edge (clk)) then
case CWEL_reg is
when "01" => KA1 <= KA; KB1 <= KB; KC1 <= KC;
when "10" => KA2 <= KA; KB2 <= KB; KC2 <= KC;
when "11" => KA3 <= KA; KB3 <= KB; KC3 <= KC;
when others => null;
end case;
end if;
end process;
--/* ----------COEFFIECIENT MUX SECTION------------- */
--/*cntr9 to count 0-1-2-3-4-5-6-7-8-9-1 */
process(CLK,RST) begin
if (RST='1') then
cntr9 <= "0000";
elsif (rising_edge (CLK)) then
if (cntr9 = "1001") then
cntr9 <= "0001";
else cntr9 <= cntr9 + 1;
end if;
end if ;
end process;
process (clk,rst)
begin
if (rst = '1') then
coeff_mux <= "0000000000"; data_mux <= (others => '0');
elsif (rising_edge (clk)) then
case indexi is
when 0 => coeff_mux <= KA1; data_mux <= A_reg;
when 1 => coeff_mux <= KB1; data_mux <= B_reg;
when 2 => coeff_mux <= KC1; data_mux <= C_reg;
when 3 => coeff_mux <= KA2; data_mux <= A_reg;
when 4 => coeff_mux <= KB2; data_mux <= B_reg;
when 5 => coeff_mux <= KC2; data_mux <= C_reg;
when 6 => coeff_mux <= KA3; data_mux <= A_reg;
when 7 => coeff_mux <= KB3; data_mux <= B_reg;
when 8 => coeff_mux <= KC3; data_mux <= C_reg;
when others => null;
end case;
end if;
end process;
process(CLK,RST) begin
if (RST='1') then
i_wait <= "01";
elsif (rising_edge (CLK)) then
if (i_wait > "00") then
i_wait <= i_wait - '1';
else i_wait <= i_wait;
end if;
end if ;
end process;
process(CLK,RST) begin
if (RST='1') then
indexi <= 8;
elsif (rising_edge (CLK)) then
if (i_wait = "00") then
if (indexi = 8) then
indexi <= 0;
else indexi <= indexi + 1;
end if;
end if;
end if ;
end process;
--/* ----------MULTIPLIER SECTION------------- */
--/* 9x pumped multiplier; P3 registered twice to match the pipelining
-- of the first adder */
ain <= "00000000" & coeff_mux; bin <= "000000" & data_mux;
MULT1: MULT18X18 port map( A => ain, B => bin, P => P1);
-- registering multiplier outputs --
process(CLK,RST)
begin
if (RST = '1') then
P1_reg <= (others => '0');
elsif (rising_edge (clk)) then
P1_reg <= P1;
end if;
end process;
--/* ----------ADDER SECTION------------- */
--/* Adder mux. Inputs a '0' every 3rd clk */
process (cntr3(1) , cntr3(0) , sum)
begin
if (cntr3 = "01") then
adder_mux <= (others => '0');
else adder_mux <= sum;
end if;
end process;
-- Final adder -
process(CLK,RST)
begin
if (RST = '1') then
sum <= (others => '0');
elsif (rising_edge (clk)) then
sum <= P1_reg + adder_mux ;
end if;
end process;
--/* ----------OUTPUT SECTION------------- */
--/* At the output of the adder, the first valid X values appears at the 6th clk
--after reset. After this, at every 3rd clk, a valid output values are obtained for
--Y ,Z, X, Y, Z and so on. This function is realised using a enable cntr. The cntr
--after reset, counts upto 3 at which point another output counter is enabled. The
--output of the enable counter holds its value of 3 as long as it is not reset. */
--/* output cntr starts after 4 clk to match the initial pipe
--delays of inputs/coeeficients */
process(CLK,RST)
begin
if (RST = '1') then
cnt9_wait <= "100";
elsif (rising_edge (clk)) then
if (cnt9_wait > "000") then
cnt9_wait <= cnt9_wait - '1';
else cnt9_wait <= cnt9_wait;
end if;
end if;
end process;
--/*cntr9_out to count 0-1-2-3-4-5-6-7-8-9-1-2- */
process(CLK,RST)
begin
if (RST = '1') then
cntr9_out <= "0000";
elsif (rising_edge (clk)) then
if (cnt9_wait = "000") then
if (cntr9_out = "1001") then
cntr9_out <= "0001";
else cntr9_out <= cntr9_out + 1;
end if;
end if;
end if;
end process;
--/* adder output assigned to X,Y and Z */
process (clk,rst)
begin
if (rst = '1') then
X <= "000000000000"; Y <= "000000000000"; Z <= "000000000000";
elsif (rising_edge (clk)) then
case cntr9_out is
--when "0001" => X <= X; Y <= Y; Z <= Z;
--when "0010" => X <= X; Y <= Y; Z <= Z;
when "0011" => X <= sum(11 downto 0); --Y <= Y; Z <= Z;
--when "0100" => X <= X; Y <= Y; Z <= Z;
--when "0101" => X <= X; Y <= Y; Z <= Z;
when "0110" => Y <= sum(11 downto 0); --X <= X; Z <= Z;
--when "0111" => X <= X; Y <= Y; Z <= Z;
--when "1000" => X <= X; Y <= Y; Z <= Z;
when "1001" => Z <= sum(11 downto 0); --X <= X; Y <= Y;
when others => null;
end case;
end if;
end process;
end model;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -