📄 h264cavlc.vhd
字号:
-------------------------------------------------------------------------
-- H264 CAVLC encoding - VHDL
--
-- Written by Andy Henson
-- Copyright (c) 2008 Zexia Access Ltd
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are met:
-- * Redistributions of source code must retain the above copyright
-- notice, this list of conditions and the following disclaimer.
-- * Redistributions in binary form must reproduce the above copyright
-- notice, this list of conditions and the following disclaimer in the
-- documentation and/or other materials provided with the distribution.
-- * Neither the name of the Zexia Access Ltd nor the
-- names of its contributors may be used to endorse or promote products
-- derived from this software without specific prior written permission.
--
-- THIS SOFTWARE IS PROVIDED BY ZEXIA ACCESS LTD ``AS IS'' AND ANY
-- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-- DISCLAIMED. IN NO EVENT SHALL ZEXIA ACCESS LTD OR ANDY HENSON BE LIABLE FOR ANY
-- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------------
-- This is the CAVLC encoding for H264
-- Context Adaptive Variable Length Coding (CAVLC) encodes all co-efficients
-- from 4x4 (or 2x2) residuals in an efficient way. It is context dependant in that
-- it uses different tables depending on recent 4x4 encodings (or 2x2 encoding)
-- Input: VIN - value to be encoded in reverse zigzag order (1 per clock)
-- also: NIN - number of coefficients in adjacent blocks (Nu+Nl/2)
-- Output: VE,VL - encoded value as "wide" bits
-- also: NOUT - number of coefficients this block (set 1clk after ENABLE low)
-- Wide bits output format consists of words of:
-- 25 bits of data (VE) (aligned right), plus 5 bits of length (VL)
-- valid lengths (VL) are 1..31 (unspecified bits are zero)
-- ENABLE should be high for duration of 4x4 (or 2x2) subblock encoding
-- and must remain high for exactly 16 (4x4), 15 (4x4-1) or 4 (2x2) clocks
-- then must be low for at least 5 clocks.
-- Latency for output of all data is <= 20 clocks from last VIN.
-- If switching to ChromaDC 2x2 blocks after 4x4 blocks, allow an extra 12
-- clocks for data to be output, but similar size blocks may be pipelined.
-- READY is 1 when ENABLE may be set, but it goes to 0 a CLK2 after ENABLE
-- is set; ENABLE must continue to be set for entire 4x4 (2x2) block.
-- Typically 18 CLK2's to input (4x4) subblock - 16 + 2 idle.
-- Worst case is 18 CLK's to output all parameters (CTOKEN + 16 COEFFS +
-- RUNBF state, or <=15 COEFFS and TZEROS, or <=15 COEFFS and T1SIGN).
-- This is unlikely and most will be output in 9 CLKs
-- VALID is set when output VE/VL is valid
-- Internal operation:
--
-- When ENABLE goes high: STATE_READ is entered, data is read in and parsed, setting
-- parameters maxcoeffs totalcoeffs totalzeros trailingones t1signs and loading
-- tables with raw non-zero/non-t1 coeffs and raw run lengths
-- when ENABLE falls, STATE_CTOKEN is entered, and the coeff_token computed and output.
-- the next clock enters STATE_T1SIGNS where all T1 sign bits are output (if any)
-- the next clock enters STATE_COEFFS where all the coefficient levels are computed
-- and output, if any, this might take up to 16 clocks
-- then STATE_TZEROS is entered, and if needed totalzeros is output
-- the next clock enters STATE_RUNBF and, once the runbefore subprocessor has completed,
-- this outputs the result of the runbefore string in a single clock.
-- then STATE_IDLE is entered, and the totals are zeroed ready for the next 4x4 block.
--
-- Runnbefore subprocessor:
-- starts when ENABLE falls, uses the run information collected to compute the run
-- length string; this might take up to 16 clocks to run, so it done simultaneously with
-- the other states. The entire string is only about 18 bits long worst case.
-- when the system enters STATE_RUNBF and the subprocessor has finished, the word is output
-- and totalls are zeroed ready for next time.
-- Spartan3: 800 slices; 224MHz/91MHz (CLK2/CLK); Xpower: 21mW @ 180MHz/90Mhz
-- CycloneIII: 2012 LEs; 187MHz/90MHz (CLK2/CLK); Power:
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
use ieee.numeric_std.ALL;
entity h264cavlc is
port (
CLK : in std_logic; --main clock / output clock
CLK2 : in std_logic; --input clock (typically twice CLK)
ENABLE : in std_logic; --values transfered only when this is 1
READY : out std_logic; --enable can fall when this 1
VIN : in std_logic_vector(11 downto 0); --12bits max (+/- 2048)
NIN : in std_logic_vector(4 downto 0); --N coeffs nearby mb
SIN : in std_logic := '0'; --stream/strobe flag, copied to VS
VS : out std_logic := '0'; --stream/strobe flag sync'd with VL/VE
VE : out std_logic_vector(24 downto 0) := (others=>'0');
VL : out std_logic_vector(4 downto 0) := (others=>'0');
VALID : out std_logic := '0'; -- enable delayed to same as VE/VL
XSTATE : out std_logic_vector(2 downto 0); --debug only
NOUT : out std_logic_vector(4 downto 0) := b"00000" --N coeffs for this mb
);
end h264cavlc;
architecture cavlc of h264cavlc is
-- information collected from input when ENABLE=1
-- all thse are in the "CLK2" timing domain
signal eenable : std_logic := '0'; --1 if ENABLE=1 seen
signal eparity : std_logic := '0'; --which register bank to use
signal emaxcoeffs : std_logic_vector(4 downto 0) := b"00000";
signal etotalcoeffs : std_logic_vector(4 downto 0) := b"00000";
signal etotalzeros : std_logic_vector(4 downto 0) := b"00000";
signal etrailingones : std_logic_vector(1 downto 0) := b"00"; --max 3 allowed
signal ecnz : std_logic := '0'; --flag set if coeff nz so far
signal ecgt1 : std_logic := '0'; --flag set if coeff >1 so far
signal et1signs : std_logic_vector(2 downto 0) := b"000"; --signs of above (1=-ve)
signal erun : std_logic_vector(3 downto 0) := b"0000"; --run before next coeff
signal eindex : std_logic_vector(3 downto 0) := b"0000"; --index into coeff table
signal etable : std_logic_vector(1 downto 0);
signal es : std_logic := '0'; --s (stream) flag
-- holding buffer; "CLK2" timing domain
signal hvalidi : std_logic := '0'; --1 if holding buffer valid
signal hvalid : std_logic := '0'; --1 if holding buffer valid (delayed 1 clk)
signal hparity : std_logic := '0'; --which register bank to use
signal hmaxcoeffs : std_logic_vector(4 downto 0) := b"00000";
signal htotalcoeffs : std_logic_vector(4 downto 0) := b"00000";
signal htotalzeros : std_logic_vector(4 downto 0) := b"00000";
signal htrailingones : std_logic_vector(1 downto 0) := b"00"; --max 3 allowed
signal htable : std_logic_vector(1 downto 0);
signal hs : std_logic := '0'; --s (stream) flag
signal t1signs : std_logic_vector(2 downto 0) := b"000"; --signs of above (1=-ve)
--
--information copied from above during STATE_IDLE or RUNBF
--this is in the "CLK" domain
signal maxcoeffs : std_logic_vector(4 downto 0) := b"00000";
signal totalcoeffs : std_logic_vector(4 downto 0) := b"00000";
signal totalzeros : std_logic_vector(4 downto 0) := b"00000";
signal trailingones : std_logic_vector(1 downto 0) := b"00"; --max 3 allowed
signal parity : std_logic := '0'; --which register bank to use
--
-- states private to this processing engine
constant STATE_IDLE : std_logic_vector(2 downto 0) := b"000";
constant STATE_READ : std_logic_vector(2 downto 0) := b"001";
constant STATE_CTOKEN : std_logic_vector(2 downto 0) := b"010";
constant STATE_T1SIGN : std_logic_vector(2 downto 0) := b"011";
constant STATE_COEFFS : std_logic_vector(2 downto 0) := b"100";
constant STATE_TZEROS : std_logic_vector(2 downto 0) := b"101";
constant STATE_RUNBF : std_logic_vector(2 downto 0) := b"110";
signal state : std_logic_vector(2 downto 0) := STATE_IDLE;
--
-- runbefore subprocessor state
signal rbstate : std_logic := '0'; --1=running 0=done
--
--stuff used during processing
signal cindex : std_logic_vector(3 downto 0) := b"0000"; --index into coeff table
signal abscoeff : std_logic_vector(10 downto 0);
signal abscoeffa : std_logic_vector(10 downto 0); --adjusted version of abscoeff
signal signcoeff : std_logic := '0';
signal suffixlen : std_logic_vector(2 downto 0); --0..6
signal rbindex : std_logic_vector(3 downto 0) := b"0000"; --index into coeff table
signal runb : std_logic_vector(3 downto 0) := b"0000"; --run before next coeff
signal rbzerosleft : std_logic_vector(4 downto 0) := b"00000";
signal rbve : std_logic_vector(24 downto 0) := (others => '0');
signal rbvl : std_logic_vector(4 downto 0) := b"00000";
--tables
signal coeff_token : std_logic_vector(5 downto 0);
signal ctoken_len : std_logic_vector(4 downto 0);
constant CTABLE0 : std_logic_vector(2 downto 0) := b"000";
constant CTABLE1 : std_logic_vector(2 downto 0) := b"001";
constant CTABLE2 : std_logic_vector(2 downto 0) := b"010";
constant CTABLE3 : std_logic_vector(2 downto 0) := b"011";
constant CTABLE4 : std_logic_vector(2 downto 0) := b"100";
signal ctable : std_logic_vector(2 downto 0) := CTABLE0;
signal ztoken : std_logic_vector(2 downto 0);
signal ztoken_len : std_logic_vector(3 downto 0);
signal ztable : std_logic := '0';
signal rbtoken : std_logic_vector(2 downto 0);
--data arrays
type Tcoeffarray is array(31 downto 0) of std_logic_vector(11 downto 0);
type Trunbarray is array(31 downto 0) of std_logic_vector(3 downto 0);
signal coeffarray : Tcoeffarray := (others=>x"000");
signal runbarray : Trunbarray := (others=>x"0");
--
begin
XSTATE <= state; --DEBUG only
--
-- tables for coeff_token
--
coeff_token <=
b"000001" when trailingones=0 and totalcoeffs=0 and ctable=0 else
b"000101" when trailingones=0 and totalcoeffs=1 and ctable=0 else
b"000001" when trailingones=1 and totalcoeffs=1 and ctable=0 else
b"000111" when trailingones=0 and totalcoeffs=2 and ctable=0 else
b"000100" when trailingones=1 and totalcoeffs=2 and ctable=0 else
b"000001" when trailingones=2 and totalcoeffs=2 and ctable=0 else
b"000111" when trailingones=0 and totalcoeffs=3 and ctable=0 else
b"000110" when trailingones=1 and totalcoeffs=3 and ctable=0 else
b"000101" when trailingones=2 and totalcoeffs=3 and ctable=0 else
b"000011" when trailingones=3 and totalcoeffs=3 and ctable=0 else
b"000111" when trailingones=0 and totalcoeffs=4 and ctable=0 else
b"000110" when trailingones=1 and totalcoeffs=4 and ctable=0 else
b"000101" when trailingones=2 and totalcoeffs=4 and ctable=0 else
b"000011" when trailingones=3 and totalcoeffs=4 and ctable=0 else
b"000111" when trailingones=0 and totalcoeffs=5 and ctable=0 else
b"000110" when trailingones=1 and totalcoeffs=5 and ctable=0 else
b"000101" when trailingones=2 and totalcoeffs=5 and ctable=0 else
b"000100" when trailingones=3 and totalcoeffs=5 and ctable=0 else
b"001111" when trailingones=0 and totalcoeffs=6 and ctable=0 else
b"000110" when trailingones=1 and totalcoeffs=6 and ctable=0 else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -