📄 xfft_v4_1_timing_calculator_core_fft64.vhd
字号:
VARIABLE debug : INTEGER := 4*mult_gen_mults(A_WIDTH, B_WIDTH);
BEGIN
RETURN debug;
END cmpy_nov4_4_mults;
FUNCTION cmpy_mult_add_DSP48s(A_WIDTH, B_WIDTH : INTEGER) RETURN INTEGER IS
VARIABLE blocks : INTEGER := 0;
CONSTANT smaller : INTEGER := min_i(A_WIDTH, B_WIDTH);
CONSTANT larger : INTEGER := max_i(A_WIDTH, B_WIDTH);
BEGIN
IF (larger < 19) THEN blocks := cmpy_mult18x18_DSP48s;
ELSE
IF (smaller < 19) THEN blocks := cmpy_mult35x18_DSP48s;
ELSE blocks := cmpy_mult35x35_DSP48s;
END IF;
END IF;
RETURN blocks;
END cmpy_mult_add_DSP48s;
FUNCTION cmpy_3_DSP48_DSP48s(A_WIDTH, B_WIDTH : INTEGER) RETURN INTEGER IS
VARIABLE blocks : INTEGER := 0;
BEGIN
blocks := cmpy_mult_add_DSP48s(B_WIDTH+1, A_WIDTH);
IF (A_WIDTH+B_WIDTH+1 > 48) THEN -- mult_35x18 can not be used
blocks := blocks+8;
ELSE
blocks := blocks+2*cmpy_mult_add_DSP48s(A_WIDTH+1, B_WIDTH);
END IF;
RETURN blocks;
END;
FUNCTION cmpy_arch(C_FAMILY, C_XDEVICEFAMILY : STRING; OPTIMIZE, LARGE_WIDTH, SMALL_WIDTH : INTEGER; SINGLE_OUTPUT : INTEGER := 0) RETURN INTEGER IS
VARIABLE result : INTEGER;
VARIABLE mult_3_DSP48s : INTEGER;
VARIABLE mult_4_DSP48s : INTEGER;
VARIABLE mult_3_cost : INTEGER;
VARIABLE mult_4_cost : INTEGER;
BEGIN
IF derived(c_family, "virtex4") OR derived(C_XDEVICEFAMILY, "spartan3adsp") THEN
IF (OPTIMIZE = 0) THEN -- optimize for speed
IF (LARGE_WIDTH < 19) THEN result := ARCH_cmpy_18x18;
ELSIF (LARGE_WIDTH < 36) AND (SMALL_WIDTH < 19) THEN result := ARCH_cmpy_35x18;
ELSIF (LARGE_WIDTH < 53) AND (SMALL_WIDTH < 19) THEN result := ARCH_cmpy_52x18;
ELSIF (LARGE_WIDTH < 36) THEN result := ARCH_cmpy_35x35;
ELSIF (cmpy_nov4_3_mults(LARGE_WIDTH, SMALL_WIDTH) < cmpy_nov4_4_mults(LARGE_WIDTH, SMALL_WIDTH))
THEN result := ARCH_complex_mult3;
ELSE result := ARCH_complex_mult4;
END IF;
ELSE
mult_3_DSP48s := cmpy_3_DSP48_DSP48s(LARGE_WIDTH, SMALL_WIDTH);
IF (LARGE_WIDTH < 19) THEN result := when_else((mult_3_DSP48s < cmpy18x18_DSP48s), ARCH_cmpy_3, ARCH_cmpy_18x18);
ELSIF (SMALL_WIDTH < 19) AND (LARGE_WIDTH < 35) THEN result := when_else((mult_3_DSP48s < cmpy35x18_DSP48s), ARCH_cmpy_3, ARCH_cmpy_35x18); -- Was (LARGE_WIDTH<36). Broken into two lines
ELSIF (SMALL_WIDTH < 19) AND (LARGE_WIDTH = 35) THEN result := ARCH_cmpy_35x18;
ELSIF (LARGE_WIDTH < 35) THEN result := when_else((mult_3_DSP48s < cmpy35x35_DSP48s), ARCH_cmpy_3, ARCH_cmpy_35x35);
ELSIF (LARGE_WIDTH = 35) THEN result := ARCH_cmpy_35x35;
ELSIF (cmpy_nov4_3_mults(LARGE_WIDTH, SMALL_WIDTH) < cmpy_nov4_4_mults(LARGE_WIDTH, SMALL_WIDTH))
THEN result := ARCH_complex_mult3;
ELSE result := ARCH_complex_mult4;
END IF;
END IF;
ELSIF derived(c_family, "virtex5") THEN
-- ignore OPTIMIZE generic - assume always optimize for speed
IF (LARGE_WIDTH < 26) AND (SMALL_WIDTH < 19) THEN result := ARCH_cmpy_18x18; -- 25x18 : 1x1 DSP48E
ELSIF (LARGE_WIDTH < 36) AND (SMALL_WIDTH < 26) THEN result := ARCH_cmpy_35x18; -- 35x25 : 2x1 DSP48E
ELSIF (LARGE_WIDTH < 53) AND (SMALL_WIDTH < 26) THEN result := ARCH_cmpy_52x18; -- 52x25 : 3x1 DSP48E
ELSIF (LARGE_WIDTH < 43) AND (SMALL_WIDTH < 36) THEN result := ARCH_cmpy_35x35; -- 42x35 : 2x2 DSP48E
ELSIF (cmpy_nov4_3_mults(LARGE_WIDTH, SMALL_WIDTH) < cmpy_nov4_4_mults(LARGE_WIDTH, SMALL_WIDTH))
THEN result := ARCH_complex_mult3;
ELSE result := ARCH_complex_mult4;
END IF;
ELSE
IF (cmpy_nov4_3_mults(LARGE_WIDTH, SMALL_WIDTH) < cmpy_nov4_4_mults(LARGE_WIDTH, SMALL_WIDTH))
THEN result := ARCH_complex_mult3;
ELSE result := ARCH_complex_mult4;
END IF;
END IF;
-- If single output, map architectures that don't support single output onto ones that do
IF SINGLE_OUTPUT = 1 THEN
IF result = ARCH_cmpy_3 THEN -- DSP48-based architecture
IF (LARGE_WIDTH < 19) THEN result := ARCH_cmpy_18x18;
ELSIF (LARGE_WIDTH < 36) AND (SMALL_WIDTH < 19) THEN result := ARCH_cmpy_35x18;
ELSIF (LARGE_WIDTH < 53) AND (SMALL_WIDTH < 19) THEN result := ARCH_cmpy_52x18;
ELSIF (LARGE_WIDTH < 36) THEN result := ARCH_cmpy_35x35;
END IF;
ELSIF result = ARCH_complex_mult3 THEN -- 18x18 mult based architecture
result := ARCH_complex_mult4;
END IF;
END IF;
RETURN result;
END cmpy_arch;
FUNCTION mult_latency(C_FAMILY, C_XDEVICEFAMILY : STRING; A_WIDTH, B_WIDTH : INTEGER) RETURN INTEGER IS
VARIABLE latency : INTEGER;
BEGIN
-- Use function in mult_gen_v10_0.pkg_mult_gen_v10_0
latency := calc_fully_pipelined_latency(C_XDEVICEFAMILY,
A_WIDTH, -- a_width,
0, -- a_type,
B_WIDTH, -- b_width,
0, -- b_type,
1, -- mult_type = embedded,
1, -- opt_goal = speed,
0, -- ccm_imp
""); -- b_value
RETURN latency;
END mult_latency;
FUNCTION cmpy18x18_latency(PIPE_IN, PIPE_MID, PIPE_OUT : INTEGER) RETURN INTEGER IS
VARIABLE latency : INTEGER;
BEGIN
-- Basic latency of the cmpy
latency := min_i(max_i(0, PIPE_IN), 1) + min_i(max_i(0, PIPE_MID), 1) + 2*min_i(max_i(0, PIPE_OUT), 1);
RETURN latency;
END cmpy18x18_latency;
FUNCTION cmpy35x18_latency(C_XDEVICEFAMILY : STRING; PIPE_IN, PIPE_MID, PIPE_OUT : INTEGER) RETURN INTEGER IS
VARIABLE latency : INTEGER;
CONSTANT DSP48A_EXTRA_DELAY : INTEGER := boolean'pos(derived(C_XDEVICEFAMILY, "spartan3adsp"));
BEGIN
-- Basic latency of the cmpy
latency := min_i(max_i(0, PIPE_IN), 1) + min_i(max_i(0, PIPE_MID), 1) + 4*min_i(max_i(0, PIPE_OUT), 1) + DSP48A_EXTRA_DELAY;
RETURN latency;
END cmpy35x18_latency;
FUNCTION cmpy52x18_latency(C_XDEVICEFAMILY : STRING; PIPE_IN, PIPE_MID, PIPE_OUT : INTEGER) RETURN INTEGER IS
VARIABLE latency : INTEGER;
CONSTANT DSP48A_EXTRA_DELAY : INTEGER := boolean'pos(derived(C_XDEVICEFAMILY, "spartan3adsp"));
BEGIN
-- Basic latency of the cmpy
latency := min_i(max_i(0, PIPE_IN), 1) + min_i(max_i(0, PIPE_MID), 1) + 6*min_i(max_i(0, PIPE_OUT), 1) + 2*DSP48A_EXTRA_DELAY;
RETURN latency;
END cmpy52x18_latency;
FUNCTION cmpy35x35_latency(C_XDEVICEFAMILY : STRING; PIPE_IN, PIPE_MID, PIPE_OUT : INTEGER) RETURN INTEGER IS
VARIABLE latency : INTEGER;
CONSTANT DSP48A_EXTRA_DELAY : INTEGER := boolean'pos(derived(C_XDEVICEFAMILY, "spartan3adsp"));
BEGIN
-- Basic latency of the cmpy
latency := min_i(max_i(0, PIPE_IN), 1) + min_i(max_i(0, PIPE_MID), 1) + 8*min_i(max_i(0, PIPE_OUT), 1) + 2*DSP48A_EXTRA_DELAY;
RETURN latency;
END cmpy35x35_latency;
FUNCTION cmpy_mult_add_latency(C_XDEVICEFAMILY : STRING; A_WIDTH, B_WIDTH, C_WIDTH, ROUND_BITS, MODE, PIPE_IN, PIPE_MID, PIPE_OUT : INTEGER) RETURN INTEGER IS
VARIABLE latency : INTEGER := 0; -- Latency of the cmpy_mult35x35 without any registers
CONSTANT smaller : INTEGER := min_i(A_WIDTH, B_WIDTH);
CONSTANT larger : INTEGER := max_i(A_WIDTH, B_WIDTH);
VARIABLE arch : BOOLEAN;
VARIABLE DSP48A_EXTRA_LATENCY : INTEGER := 0;
BEGIN
-- Add on an extra cycle of latency for the DSP48A implementation
-- due to the the use of the Creg rather than the PCIN port pull in the
-- 17-bit right-shifted data from the upstream DSP48A
-- Note that this extra latency is not required if the cmpy_mult_add is <= 18x18
IF derived(C_XDEVICEFAMILY, "spartan3adsp") THEN
DSP48A_EXTRA_LATENCY := 1;
ELSE
DSP48A_EXTRA_LATENCY := 0;
END IF;
IF (larger < 19) THEN
latency := max_i(0, PIPE_IN) + min_i(max_i(0, PIPE_MID), 1) + min_i(max_i(0, PIPE_OUT), 1);
ELSIF (smaller < 19) THEN
latency := max_i(0, PIPE_IN) + min_i(max_i(0, PIPE_MID), 1) + 2*min_i(max_i(0, PIPE_OUT), 1) + DSP48A_EXTRA_LATENCY;
ELSE
arch := cascade_mult35x35(MODE, A_WIDTH, B_WIDTH, C_WIDTH, ROUND_BITS);
latency := max_i(0, PIPE_IN) + min_i(max_i(0, PIPE_MID), 1) + 4*min_i(max_i(0, PIPE_OUT), 1) + 2*DSP48A_EXTRA_LATENCY;
IF (NOT arch) THEN
latency := latency + 2*min_i(max_i(0, PIPE_OUT), 1);
END IF;
END IF;
RETURN latency;
END cmpy_mult_add_latency;
FUNCTION cmpy_3_DSP48_latency(C_XDEVICEFAMILY : STRING; A_WIDTH, B_WIDTH, P_WIDTH, ROUND, PIPE_IN, PIPE_MID, PIPE_OUT : INTEGER) RETURN INTEGER IS
VARIABLE ADDER_DELAY_1_3 : INTEGER;
VARIABLE ADDER_DELAY_2 : INTEGER;
VARIABLE P2_WIDTH : INTEGER;
VARIABLE POST_MULT2_DELAY : INTEGER;
VARIABLE MULT_13_PIPE_IN : INTEGER;
VARIABLE cmpy_3_DSP48_LATENCY : INTEGER;
VARIABLE ROUND_BITS_2 : INTEGER;
BEGIN
ADDER_DELAY_1_3 := PIPE_IN+PIPE_MID*(A_WIDTH/18)+PIPE_OUT;
ADDER_DELAY_2 := PIPE_IN+PIPE_MID*((B_WIDTH+eval(PIPE_IN = 0)*(A_WIDTH-B_WIDTH))/18)+PIPE_OUT;
P2_WIDTH := A_WIDTH+B_WIDTH+1;
ROUND_BITS_2 := P2_WIDTH-P_WIDTH-1;
POST_MULT2_DELAY := ADDER_DELAY_2 + cmpy_mult_add_latency(C_XDEVICEFAMILY, A_WIDTH, B_WIDTH+1, 0, ROUND_BITS_2, ROUND, PIPE_IN, PIPE_MID, PIPE_OUT);
MULT_13_PIPE_IN := max_i(PIPE_IN, POST_MULT2_DELAY - ADDER_DELAY_1_3);
cmpy_3_DSP48_LATENCY := ADDER_DELAY_1_3 + cmpy_mult_add_latency(C_XDEVICEFAMILY, A_WIDTH+1, B_WIDTH, A_WIDTH+1+B_WIDTH, 0, 5, MULT_13_PIPE_IN, PIPE_MID, PIPE_OUT) + 1;
RETURN cmpy_3_DSP48_LATENCY;
END cmpy_3_DSP48_latency;
FUNCTION cmpy_latency(C_FAMILY, C_XDEVICEFAMILY : STRING; OPTIMIZE, A_WIDTH, B_WIDTH, P_WIDTH, ROUND, PIPE_IN, PIPE_MID, PIPE_OUT, C_HAS_SCLR : INTEGER; SINGLE_OUTPUT : INTEGER := 0) RETURN INTEGER IS
CONSTANT LARGE_WIDTH : INTEGER := max_i(A_WIDTH, B_WIDTH);
CONSTANT SMALL_WIDTH : INTEGER := min_i(A_WIDTH, B_WIDTH);
VARIABLE arch : INTEGER := cmpy_arch(C_FAMILY, C_XDEVICEFAMILY, OPTIMIZE, LARGE_WIDTH, SMALL_WIDTH, SINGLE_OUTPUT);
VARIABLE latency : INTEGER;
-- add two cycles of delay for SO for the sync'ing of Re and Im
CONSTANT SO_CMPY_SYNC_LATENCY : INTEGER := 2;
-- we register the muxing that provides the cross-optimisation in the cmpy
-- components, so add this latency for all cases - applies to all families
CONSTANT SO_INPUT_MUX_REG_LATENCY : INTEGER := 1;
BEGIN
CASE arch IS
-- These architectures support single output
WHEN ARCH_cmpy_18x18 => latency := cmpy18x18_latency(PIPE_IN, PIPE_MID, PIPE_OUT);
WHEN ARCH_cmpy_35x18 => latency := cmpy35x18_latency(C_XDEVICEFAMILY, PIPE_IN, PIPE_MID, PIPE_OUT);
WHEN ARCH_cmpy_52x18 => latency := cmpy52x18_latency(C_XDEVICEFAMILY, PIPE_IN, PIPE_MID, PIPE_OUT);
WHEN ARCH_cmpy_35x35 => latency := cmpy35x35_latency(C_XDEVICEFAMILY, PIPE_IN, PIPE_MID, PIPE_OUT);
WHEN ARCH_complex_mult4 => latency := MULT_LATENCY(C_FAMILY, C_XDEVICEFAMILY, A_WIDTH, B_WIDTH)+PIPE_OUT+1;
-- These architectures don't support single output
WHEN ARCH_complex_mult3 => RETURN max_i(MULT_LATENCY(C_FAMILY, C_XDEVICEFAMILY, A_WIDTH+1, B_WIDTH),
MULT_LATENCY(C_FAMILY, C_XDEVICEFAMILY, A_WIDTH, B_WIDTH+1))+2*PIPE_OUT+1;
WHEN OTHERS => RETURN cmpy_3_DSP48_latency(C_XDEVICEFAMILY, LARGE_WIDTH, SMALL_WIDTH, P_WIDTH, ROUND, PIPE_IN, PIPE_MID, PIPE_OUT);
END CASE;
IF SINGLE_OUTPUT = 1 THEN
RETURN latency + SO_CMPY_SYNC_LATENCY + SO_INPUT_MUX_REG_LATENCY;
ELSE
RETURN latency;
END IF;
END cmpy_latency;
END timing_model_pkg;
-- synthesis translate_on
-- synthesis translate_off
-- $Id: timing_pkg.vhd,v 1.1.4.1 2007/03/16 10:45:54 akennedy Exp $
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -