📄 xfft_v4_1_timing_calculator_fft64.vhd
字号:
FUNCTION so_round_latency(HAS_ROUNDING : INTEGER) RETURN INTEGER IS
BEGIN
IF HAS_ROUNDING = 1 THEN
RETURN 3;
ELSE
RETURN 0;
END IF;
END so_round_latency;
FUNCTION so_bfp_scale_gen_latency(HAS_SCALING, HAS_ROUNDING : INTEGER) RETURN INTEGER IS
BEGIN
-- latency = scale + round + ranger + max&hold latencies
RETURN so_scale_latency(HAS_SCALING) + so_round_latency(HAS_ROUNDING) + SO_BFP_RANGER_LATENCY + SO_BFP_MAXHOLD_LATENCY;
END so_bfp_scale_gen_latency;
FUNCTION so_pe_latency(C_FAMILY, C_XDEVICEFAMILY : STRING; C_DATA_MEM_TYPE, C_HAS_ROUNDING, C_HAS_SCALING, C_NFFT_MAX, C_OUTPUT_WIDTH, C_TWIDDLE_MEM_TYPE, EXPAND_TW_WIDTH, C_FAST_BFY, C_FAST_CMPY : INTEGER) RETURN INTEGER IS
CONSTANT POSTMULT_WIDTH : INTEGER := C_OUTPUT_WIDTH + 4; -- Width after complex mult, before butterfly
CONSTANT TWIDDLE_LATENCY : INTEGER := get_twiddle_latency(C_FAMILY, C_XDEVICEFAMILY, C_TWIDDLE_MEM_TYPE, C_NFFT_MAX-1, EXPAND_TW_WIDTH, true);
CONSTANT INPUT_MUX_LATENCY : INTEGER := 1;
CONSTANT DATA_MEM_LATENCY : INTEGER := INPUT_MUX_LATENCY + get_min_mem_delay(C_FAMILY, C_XDEVICEFAMILY, C_DATA_MEM_TYPE, C_NFFT_MAX);
CONSTANT READ_DATA_HOLD_LATENCY : INTEGER := 1;
CONSTANT COMPLEX_MULT_LATENCY : INTEGER := cmpy_latency(C_FAMILY, C_XDEVICEFAMILY, 1-C_FAST_CMPY, C_OUTPUT_WIDTH, EXPAND_TW_WIDTH, POSTMULT_WIDTH, 0, 1, 1, 1, 0, 1);
CONSTANT BUTTERFLY_LATENCY : INTEGER := so_butterfly_latency(C_FAST_BFY, 1);
CONSTANT READ_DATA_DELAY : INTEGER := max_i(TWIDDLE_LATENCY - DATA_MEM_LATENCY - READ_DATA_HOLD_LATENCY, 0);
CONSTANT SCALER_LATENCY : INTEGER := so_scale_latency(C_HAS_SCALING);
CONSTANT ROUNDER_LATENCY : INTEGER := so_round_latency(C_HAS_ROUNDING);
BEGIN
-- Note that DATA_MEM_LATENCY also includes the cycle of latency introduced
-- by the mux feeding the memory. This can be considered as being a cycle
-- of latency in the memory as all the memory inputs are registered to sync
-- them with the data coming from the mux
RETURN
DATA_MEM_LATENCY +
READ_DATA_DELAY +
READ_DATA_HOLD_LATENCY +
COMPLEX_MULT_LATENCY +
BUTTERFLY_LATENCY +
SCALER_LATENCY +
ROUNDER_LATENCY;
END so_pe_latency;
FUNCTION so_data_reuse(NFFT : INTEGER) RETURN INTEGER IS
VARIABLE result : INTEGER;
BEGIN
result := 2**(NFFT-1) - 1;
RETURN result;
END so_data_reuse;
FUNCTION get_nfft_min(ARCH, HAS_NFFT, NFFT_MAX : INTEGER) RETURN INTEGER IS
VARIABLE result : INTEGER;
BEGIN
-- If run-time configurable NFFT: minimum depends on architecture
IF HAS_NFFT = 1 THEN
CASE ARCH IS
WHEN 0 => -- deprecated arch A
ASSERT false REPORT "xfft_v4_1 : deprecated architecture A specified in call to function get_nfft_min" SEVERITY error;
WHEN 1 => -- arch B, radix-4
result := 6;
WHEN 2 => -- arch C, radix-2
result := 3;
WHEN 3 => -- arch D, streaming
result := 3;
WHEN 4 => -- arch E, single-output
result := 3;
WHEN OTHERS => -- unknown architecture
ASSERT false REPORT "xfft_v4_1 : unknown architecture specified in call to function get_nfft_min" SEVERITY error;
END CASE;
ELSE -- not run-time configurable NFFT: minimum equals maximum
result := NFFT_MAX;
END IF;
RETURN result;
END get_nfft_min;
FUNCTION eval(condition : BOOLEAN)
RETURN INTEGER IS
BEGIN
IF condition THEN RETURN 1;
ELSE RETURN 0;
END IF;
END eval;
FUNCTION max_i(a, b : INTEGER) RETURN INTEGER IS
BEGIN
IF (a > b) THEN RETURN a;
ELSE RETURN b;
END IF;
END;
FUNCTION min_i(a, b : INTEGER) RETURN INTEGER IS
BEGIN
IF (a > b) THEN RETURN b;
ELSE RETURN a;
END IF;
END;
FUNCTION when_else(condition : BOOLEAN; if_true, if_false : INTEGER) RETURN INTEGER IS
BEGIN
IF condition THEN
RETURN if_true;
ELSE
RETURN if_false;
END IF;
END when_else;
FUNCTION mult_latency_bc(C_FAMILY, C_XDEVICEFAMILY : STRING; OPTIMIZE, A_WIDTH, B_WIDTH, P_WIDTH, ROUND, PIPE_IN, PIPE_MID, PIPE_OUT, C_HAS_SCLR : INTEGER) RETURN INTEGER IS
VARIABLE latency : INTEGER;
BEGIN
latency := cmpy_latency(C_FAMILY, C_XDEVICEFAMILY, OPTIMIZE, A_WIDTH, B_WIDTH, P_WIDTH, ROUND, PIPE_IN, PIPE_MID, PIPE_OUT, C_HAS_SCLR);
IF (derived(c_family, "virtex4") OR derived(c_family, "virtex5") OR derived(c_xdevicefamily, "spartan3adsp")) THEN
latency := latency + 1; -- +1 due to logic fabric register needed after V4, V5 or S3ADSP cmpy for speed
END IF;
RETURN latency;
END mult_latency_bc;
FUNCTION cmult_out_width(C_FAMILY : STRING; DRFLY_WIDTH, IO_WIDTH, TW_WIDTH : INTEGER) RETURN INTEGER IS
BEGIN
RETURN(IO_WIDTH+TW_WIDTH+1);
END cmult_out_width;
FUNCTION PE_latency_b(C_FAST_BFY, CMULT_DELAY, C_HAS_MULTS, C_HAS_SCALER, C_HAS_ROUNDER : INTEGER) RETURN INTEGER IS
CONSTANT SCALER_LATENCY : INTEGER := 1; -- delay of the scaler unit
CONSTANT ROUNDER_LATENCY : INTEGER := 3; -- delay of the single rounder unit
VARIABLE latency : INTEGER := 0;
BEGIN
latency := radix4_dragonfly_latency(C_FAST_BFY);
IF C_HAS_MULTS = 1 THEN
latency := latency + CMULT_DELAY;
END IF;
IF C_HAS_SCALER = 1 THEN
latency := latency + SCALER_LATENCY;
END IF;
IF C_HAS_ROUNDER = 1 THEN
latency := latency + ROUNDER_LATENCY;
END IF;
RETURN latency;
END PE_latency_b;
FUNCTION radix4_dragonfly_latency(C_FAST_BFY : INTEGER) RETURN INTEGER IS
CONSTANT DRAGONFLY_LATENCY : INTEGER := 2;
CONSTANT DSP48_DRAGONFLY_EXTRA_LATENCY : INTEGER := 3; -- Building the dragonfly from DSP48*s incurs extra latency - need fabric register between DSPs as well
CONSTANT DFLY_FABRIC_OUTPUT_REG : INTEGER := 1; -- Need an extra register after the 'fast' dragonfly to ease the route from the DSP48s to slice logic
BEGIN -- FUNCTION radix4_dragonfly_latency
RETURN DRAGONFLY_LATENCY + C_FAST_BFY*(DSP48_DRAGONFLY_EXTRA_LATENCY + DFLY_FABRIC_OUTPUT_REG);
END FUNCTION radix4_dragonfly_latency;
FUNCTION r2_pe_latency(C_FAST_BFY, CMULT_DELAY, C_HAS_SCALER, C_HAS_ROUNDER : INTEGER) RETURN INTEGER IS
VARIABLE latency : INTEGER;
-- Need an extra register after the 'fast' butterfly to ease the route from
-- the DSP48s to slice logic
CONSTANT BFLY_FABRIC_OUTPUT_REG : INTEGER := C_FAST_BFY;
BEGIN
latency := CMULT_DELAY + 1 + C_FAST_BFY + BFLY_FABRIC_OUTPUT_REG + C_HAS_SCALER + 3*C_HAS_ROUNDER;
RETURN latency;
END r2_PE_latency;
FUNCTION calc_dist_mem_addr_latency (c_family : STRING; data_mem_depth : INTEGER) RETURN INTEGER IS
VARIABLE addr_decode_latency : INTEGER := 99; -- debug value
BEGIN -- FUNCTION calc_dist_mem_addr_latency
IF (derived(c_family, "virtex5")) THEN
CASE data_mem_depth IS
WHEN 1 TO 7 => addr_decode_latency := 0; -- Assume RAM128X1D is used!
WHEN 8 TO 10 => addr_decode_latency := 1; -- switch to a carry chain implementation
WHEN OTHERS =>
REPORT "ERROR: xfft_v4_1: Invalid data_mem_depth caught in calc_dist_mem_addr_latency " & INTEGER'image(data_mem_depth)
SEVERITY failure;
END CASE;
ELSE
CASE data_mem_depth IS
WHEN 1 TO 4 => addr_decode_latency := 0;
WHEN 5 TO 10 => addr_decode_latency := 1; -- switch to a carry chain implementation
WHEN OTHERS =>
REPORT "ERROR: xfft_v4_1: Invalid data_mem_depth caught in calc_dist_mem_addr_latency " & INTEGER'image(data_mem_depth)
SEVERITY failure;
END CASE;
END IF;
RETURN addr_decode_latency;
END FUNCTION calc_dist_mem_addr_latency;
FUNCTION calc_dist_mem_mux_latency (c_family : STRING; data_mem_depth : INTEGER) RETURN INTEGER IS
VARIABLE output_mux_latency : INTEGER := 99; -- debug value
BEGIN -- FUNCTION calc_dist_mem_mux_latency
IF (derived(c_family, "virtex5")) THEN
CASE data_mem_depth IS
WHEN 1 TO 7 => output_mux_latency := 0; -- no muxing
WHEN 8 TO 9 => output_mux_latency := 1; -- 2:1 or 4:1 muxing
WHEN 10 => output_mux_latency := 2; -- 8:1 muxing
WHEN OTHERS =>
REPORT "ERROR: xfft_v4_1: Invalid data_mem_depth caught in calc_dist_mem_mux_latency " & INTEGER'image(data_mem_depth)
SEVERITY failure;
END CASE;
ELSE
CASE data_mem_depth IS
WHEN 1 TO 4 => output_mux_latency := 0; -- no muxing
WHEN 5 TO 6 => output_mux_latency := 1; -- 2:1 or 4:1 muxing
WHEN 7 TO 8 => output_mux_latency := 2; -- 8:1 or 16:1 muxing
WHEN 9 TO 10 => output_mux_latency := 3; -- 32:1 or 64:1 muxing
WHEN OTHERS =>
REPORT "ERROR: xfft_v4_1: Invalid data_mem_depth caught in calc_dist_mem_mux_latency " & INTEGER'image(data_mem_depth)
SEVERITY failure;
END CASE;
END IF;
RETURN output_mux_latency;
END FUNCTION calc_dist_mem_mux_latency;
FUNCTION get_min_mem_delay(c_family, c_xdevicefamily : STRING; data_mem_type, data_mem_depth : INTEGER) RETURN INTEGER IS
VARIABLE mem_latency : INTEGER := 1;
CONSTANT DISTRIBUTED_MEMORY : INTEGER := 0;
CONSTANT BLOCK_MEMORY : INTEGER := 1;
BEGIN
IF data_mem_type = DISTRIBUTED_MEMORY THEN
-- need one register stage after the RAM elements at least
mem_latency := 1;
-- work out how much pipelining is required for the address decoding for the write-enables
mem_latency := mem_latency + calc_dist_mem_addr_latency(c_family, data_mem_depth);
-- work out how much pipelining is required for the mux tree, if required
mem_latency := mem_latency + calc_dist_mem_mux_latency(c_family, data_mem_depth);
ELSIF data_mem_type = BLOCK_MEMORY THEN
IF (derived(c_family, "virtex4") OR derived(c_family, "virtex5") OR derived(c_xdevicefamily, "spartan3adsp")) THEN
-- need address register, output register and fabric speed-up register
mem_latency := 3;
ELSE
-- need address register and output register
mem_latency := 2;
END IF;
END IF;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -