📄 xfft_v4_1_timing_calculator_fft64.vhd
字号:
RETURN mem_latency;
END get_min_mem_delay;
FUNCTION get_mem_delay(c_family, c_xdevicefamily : STRING; data_mem_type, data_mem_depth, sin_cos_delay, tw_addr_gen_delay, rw_addr_gen_delay, mux_delay, switch_delay : INTEGER) RETURN INTEGER IS
VARIABLE base_mem_delay : INTEGER;
VARIABLE result : INTEGER;
BEGIN
-- Get the minimum memory delay
base_mem_delay := get_min_mem_delay(c_family, c_xdevicefamily, data_mem_type, data_mem_depth);
-- Add to this to balance the pipeline against other components
IF ((tw_addr_gen_delay + sin_cos_delay) > (rw_addr_gen_delay + mux_delay + base_mem_delay + switch_delay)) THEN
result := (tw_addr_gen_delay + sin_cos_delay - rw_addr_gen_delay - mux_delay - switch_delay);
ELSE
result := base_mem_delay;
END IF;
RETURN result;
END get_mem_delay;
FUNCTION r22_pe_width(scaling, nfft_max, input_bits : INTEGER)
RETURN r22_const_array IS
CONSTANT NUMBER_OF_PEs : INTEGER := (nfft_max+1)/2;
VARIABLE index : INTEGER := 0;
VARIABLE bits : r22_const_array;
BEGIN
IF scaling = 1 THEN
WHILE (index <= NUMBER_OF_PEs) LOOP
bits(index) := input_bits;
index := index + 1;
END LOOP;
ELSE
bits(0) := input_bits;
bits(1) := input_bits + 3;
index := 2;
WHILE (index < NUMBER_OF_PEs) LOOP
bits(index) := bits(index-1) + 2;
index := index + 1;
END LOOP;
bits(NUMBER_OF_PEs) := bits(NUMBER_OF_PEs-1) + 1 + eval(nfft_max/2 = NUMBER_OF_PEs);
END IF;
RETURN bits;
END r22_pe_width;
FUNCTION r22_bf1_delay(OPT_DSP48s, pe_id, HAS_NFFT, NFFT_MAX_EVEN : INTEGER) RETURN INTEGER IS
-- PE_ID is from output towards input!!!
CONSTANT BF_ID : INTEGER := 2*PE_ID+NFFT_MAX_EVEN;
CONSTANT BF1_MEM_LATENCY : INTEGER := 3; -- fixed memory latency in general BF1
VARIABLE latency : INTEGER;
BEGIN
IF (PE_ID > 2) OR ((PE_ID = 2) AND (NFFT_MAX_EVEN = 1)) THEN
RETURN BF1_MEM_LATENCY+1+OPT_DSP48s; -- general BF1
ELSE
RETURN 3+HAS_NFFT+OPT_DSP48s+2**(BF_ID); -- special BF1
END IF;
END r22_bf1_delay;
FUNCTION r22_bf2_delay(OPT_DSP48s, pe_id, HAS_NFFT, NFFT_MAX_EVEN : INTEGER) RETURN INTEGER IS
-- PE_ID is from output towards input!!!
CONSTANT BF_ID : INTEGER := max_i(2*PE_ID+NFFT_MAX_EVEN-1, 0);
CONSTANT BF2_MEM_LATENCY : INTEGER := 3; -- fixed memory latency in general BF2
VARIABLE latency : INTEGER;
BEGIN
IF (PE_ID > 2) THEN
RETURN BF2_MEM_LATENCY+1+OPT_DSP48s; -- general BF2
ELSE
RETURN 5+HAS_NFFT+2**(BF_ID); -- special BF2
END IF;
END r22_bf2_delay;
FUNCTION r22_mem_type(nfft_max, bram_stage : INTEGER)
RETURN r22_const_array IS
CONSTANT NUMBER_OF_PEs : INTEGER := (nfft_max+1)/2;
VARIABLE num_of_stage : INTEGER := bram_stage;
VARIABLE index : INTEGER := 0;
VARIABLE mem_type : r22_const_array;
BEGIN
WHILE (index < NUMBER_OF_PEs) LOOP
IF (num_of_stage > 1) THEN
mem_type(index) := 2;
num_of_stage := num_of_stage - 2;
ELSIF (num_of_stage > 0) THEN
mem_type(index) := 1;
num_of_stage := num_of_stage - 1;
ELSE
mem_type(index) := 0;
END IF;
index := index + 1;
END LOOP;
RETURN mem_type;
END r22_mem_type;
FUNCTION r22_pe_latency(c_family, C_XDEVICEFAMILY : STRING;
C_FAST_BFY, C_FAST_CMPY, c_fast_sincos, has_nfft, nfft_max, tw_bits, has_scaling, has_rounding, has_mux : INTEGER;
width_of_pe, memory_type : r22_const_array) RETURN r22_const_array IS
CONSTANT NUMBER_OF_PEs : INTEGER := (nfft_max+1)/2;
VARIABLE pe_id : INTEGER;
CONSTANT power2 : INTEGER := (nfft_max+2)/2-NUMBER_OF_PEs; -- NFFT_MAX EVEN
VARIABLE index : INTEGER := 0;
VARIABLE mult_type : INTEGER;
VARIABLE mult_delay : INTEGER;
VARIABLE twiddle_gen_delay : INTEGER;
VARIABLE tw_delay : INTEGER;
VARIABLE bf1_delay : INTEGER;
VARIABLE bf2_delay : INTEGER;
VARIABLE data_tw_sync : INTEGER;
VARIABLE has_rounder : BOOLEAN;
VARIABLE latency : r22_const_array;
BEGIN
WHILE (index < NUMBER_OF_PEs) LOOP
pe_id := NUMBER_OF_PEs-1-index;
twiddle_gen_delay := get_twiddle_latency(c_family, c_xdevicefamily, eval(memory_type(index) = 2), nfft_max-2*index-1, tw_bits);
tw_delay := 2 + twiddle_gen_delay;
bf1_delay := r22_bf1_delay(C_FAST_BFY, pe_id, HAS_NFFT, power2);
bf2_delay := r22_bf2_delay(C_FAST_BFY, pe_id, HAS_NFFT, power2);
data_tw_sync := tw_delay + has_nfft - bf1_delay - bf2_delay;
mult_delay := cmpy_latency(C_FAMILY, C_XDEVICEFAMILY, 1-C_FAST_CMPY, width_of_pe(index)+2, tw_bits, width_of_pe(index)+tw_bits+3, 0, 1, 1, 1, 0);
has_rounder := (has_rounding = 1) AND ((index < NUMBER_OF_PEs-1) OR (has_scaling = 1));
latency(index) := bf1_delay;
IF ((power2 = 1) OR (index < NUMBER_OF_PEs-1)) THEN -- PE has BF2
latency(index) := latency(index) + bf2_delay;
END IF;
IF (HAS_NFFT = 1) AND (index < NUMBER_OF_PEs-2) AND (HAS_MUX = 1) THEN -- PE has bypass muxes.
-- Though physically bypass muxes are before PEs 1 to NUMBER_OF_PEs-2, their delay is bundled with the previous PE
latency(index) := latency(index) + 1;
END IF;
IF (index < NUMBER_OF_PEs-1) THEN -- PE has twiddle mult
latency(index) := latency(index) + max_i(data_tw_sync, 0);
latency(index) := latency(index) + mult_delay;
END IF;
IF (has_scaling = 1) OR has_rounder THEN -- latency of the scaler
latency(index) := latency(index) + 1; -- or pipeline registers before the mult
END IF;
IF has_rounder THEN -- latency of the rounder
latency(index) := latency(index) + 2;
END IF;
index := index + 1;
END LOOP;
RETURN latency;
END r22_pe_latency;
FUNCTION get_twiddle_arch(MEM_TYPE, THETA_WIDTH, TWIDDLE_WIDTH : INTEGER; SINGLE_OUTPUT : BOOLEAN := false) RETURN T_TWGEN_ARCH IS
-- get_twiddle_arch - returns architecture for twiddle_gen
-- For the meanings of return values, see the declaration of T_TWGEN_ARCH
BEGIN
IF MEM_TYPE = DIST_RAM THEN -- distributed memory
IF SINGLE_OUTPUT THEN
RETURN TW_DISTMEM_SO;
ELSE
RETURN TW_DISTMEM;
END IF;
ELSE -- block memory
IF ((THETA_WIDTH <= 8) OR
(THETA_WIDTH = 9 AND TWIDDLE_WIDTH <= 18) OR
(THETA_WIDTH = 10 AND TWIDDLE_WIDTH <= 9) OR
(THETA_WIDTH = 11 AND TWIDDLE_WIDTH <= 4)) THEN
RETURN TW_BRAM_HALF_SINCOS;
ELSE
RETURN TW_BRAM_QUARTER_SIN;
END IF;
END IF;
END get_twiddle_arch;
FUNCTION get_twiddle_latency(C_FAMILY, C_XDEVICEFAMILY : STRING; MEM_TYPE, THETA_WIDTH, TWIDDLE_WIDTH : INTEGER; SINGLE_OUTPUT : BOOLEAN := false) RETURN INTEGER IS
CONSTANT ARCH : T_TWGEN_ARCH := get_twiddle_arch(MEM_TYPE, THETA_WIDTH, TWIDDLE_WIDTH, SINGLE_OUTPUT);
VARIABLE latency : INTEGER;
BEGIN
CASE ARCH IS
WHEN TW_BRAM_HALF_SINCOS => -- block memory architecture - 1/2 wave
IF derived(C_FAMILY, "virtex4") OR derived(C_FAMILY, "virtex5") OR derived(C_XDEVICEFAMILY, "spartan3adsp") THEN
latency := 3;
ELSE
latency := 2;
END IF;
WHEN TW_BRAM_QUARTER_SIN => -- block memory architecture - 1/4 wave
-- Add 1 extra cycle of latency for the registering of the sin_addr and
-- cos_addr signals in twgen_quarter_sin.vhd
IF derived(C_FAMILY, "virtex4") OR derived(C_FAMILY, "virtex5") OR derived(C_XDEVICEFAMILY, "spartan3adsp") THEN
latency := 4;
ELSE
latency := 3;
END IF;
WHEN TW_DISTMEM => -- distributed memory architecture
latency := 2;
WHEN TW_DISTMEM_SO => -- distributed memory architecture, single output version
latency := 5; -- Added one cycle for pipelining of sin_addr and cos_addr with c_addsubs
WHEN OTHERS => -- unknown
ASSERT true REPORT "Unknown twiddle generator architecture in function get_twiddle_latency" SEVERITY failure;
END CASE;
RETURN latency;
END get_twiddle_latency;
FUNCTION cascade_mult35x35(MODE, A_WIDTH, B_WIDTH, C_WIDTH, ROUND_BITS : INTEGER) RETURN BOOLEAN IS
VARIABLE OK : BOOLEAN;
BEGIN
OK := (MODE = 0) OR
((MODE = 1) AND (ROUND_BITS < 46)) OR
((MODE = 2) AND (A_WIDTH+B_WIDTH < 49)) OR
((MODE = 3) AND (A_WIDTH+B_WIDTH < 49) AND (ROUND_BITS < 46)) OR
((MODE = 4) AND (A_WIDTH+B_WIDTH < 49) AND (C_WIDTH < 49)) OR
((MODE = 5) AND (A_WIDTH+B_WIDTH < 49) AND (C_WIDTH < 49));
RETURN OK;
END cascade_mult35x35;
FUNCTION mult_gen_mults(A_WIDTH, B_WIDTH : INTEGER) RETURN INTEGER IS
BEGIN
RETURN (1+(A_WIDTH-2)/17)*(1+(B_WIDTH-2)/17);
END mult_gen_mults;
FUNCTION cmpy_nov4_3_mults(A_WIDTH, B_WIDTH : INTEGER) RETURN INTEGER IS
VARIABLE debug : INTEGER := 2*mult_gen_mults(A_WIDTH+1, B_WIDTH)+mult_gen_mults(A_WIDTH, B_WIDTH+1);
BEGIN
RETURN debug;
END cmpy_nov4_3_mults;
FUNCTION cmpy_nov4_4_mults(A_WIDTH, B_WIDTH : INTEGER) RETURN INTEGER IS
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -