📄 path_delay.c
字号:
#include <stdio.h>#include "util.h"#include "vpr_types.h"#include "globals.h"#include "path_delay.h"#include "path_delay2.h"#include "net_delay.h"#include "vpr_utils.h"#include <assert.h>/****************** Timing graph Structure ************************************ * * * In the timing graph I create, input pads and constant generators have no * * inputs; everything else has inputs. Every input pad and output pad is * * represented by two tnodes -- an input pin and an output pin. For an input * * pad the input pin comes from off chip and has no fanin, while the output * * pin drives outpads and/or CLBs. For output pads, the input node is driven * * by a CLB or input pad, and the output node goes off chip and has no * * fanout (out-edges). I need two nodes to respresent things like pads * * because I mark all delay on tedges, not on tnodes. * * * * Every used (not OPEN) CLB pin becomes a timing node. As well, every used * * subblock pin within a CLB also becomes a timing node. Unused (OPEN) pins * * don't create any timing nodes. If a subblock is used in combinational mode * * (i.e. its clock pin is open), I just hook the subblock input tnodes to the * * subblock output tnode. If the subblock is used in sequential mode, I * * create two extra tnodes. One is just the subblock clock pin, which is * * connected to the subblock output. This means that FFs don't generate * * their output until their clock arrives. For global clocks coming from an * * input pad, the delay of the clock is 0, so the FFs generate their outputs * * at T = 0, as usual. For locally-generated or gated clocks, however, the * * clock will arrive later, and the FF output will be generated later. This * * lets me properly model things like ripple counters and gated clocks. The * * other extra node is the FF storage node (i.e. a sink), which connects to * * the subblock inputs and has no fanout. * * * * One other subblock that needs special attention is a constant generator. * * This has no used inputs, but its output is used. I create an extra tnode, * * a dummy input, in addition to the output pin tnode. The dummy tnode has * * no fanin. Since constant generators really generate their outputs at T = * * -infinity, I set the delay from the input tnode to the output to a large- * * magnitude negative number. This guarantees every block that needs the * * output of a constant generator sees it available very early. * * * * For this routine to work properly, subblocks whose outputs are unused must * * be completely empty -- all their input pins and their clock pin must be * * OPEN. Check_netlist checks the input netlist to guarantee this -- don't * * disable that check. * * * * NB: The discussion below is only relevant for circuits with multiple * * clocks. For circuits with a single clock, everything I do is exactly * * correct. * * * * A note about how I handle FFs: By hooking the clock pin up to the FF * * output, I properly model the time at which the FF generates its output. * * I don't do a completely rigorous job of modelling required arrival time at * * the FF input, however. I assume every FF and outpad needs its input at * * T = 0, which is when the earliest clock arrives. This can be conservative * * -- a fuller analysis would be to do a fast path analysis of the clock * * feeding each FF and subtract its earliest arrival time from the delay of * * the D signal to the FF input. This is too much work, so I'm not doing it. * * Alternatively, when one has N clocks, it might be better to just do N * * separate timing analyses, with only signals from FFs clocked on clock i * * being propagated forward on analysis i, and only FFs clocked on i being * * considered as sinks. This gives all the critical paths within clock * * domains, but ignores interactions. Instead, I assume all the clocks are * * more-or-less synchronized (they might be gated or locally-generated, but * * they all have the same frequency) and explore all interactions. Tough to * * say what's the better way. Since multiple clocks aren't important for my * * work, it's not worth bothering about much. * * * ******************************************************************************//***************** Variables local to this module ***************************//* Variables for "chunking" the tedge memory. If the head pointer is NULL, * * no timing graph exists now. */static struct s_linked_vptr *tedge_ch_list_head = NULL;static int tedge_ch_bytes_avail = 0;static char *tedge_ch_next_avail = NULL;/***************** Subroutines local to this module *************************/static int alloc_and_load_pin_mappings (int ***block_pin_to_tnode_ptr, int ****sblk_pin_to_tnode_ptr, t_subblock_data subblock_data, int **num_uses_of_sblk_opin); static void free_pin_mappings (int **block_pin_to_tnode, int ***sblk_pin_to_tnode, int *num_subblocks_per_block);static void alloc_and_load_fanout_counts (int ***num_uses_of_clb_ipin_ptr, int ***num_uses_of_sblk_opin_ptr, t_subblock_data subblock_data); static void free_fanout_counts (int **num_uses_of_clb_ipin, int **num_uses_of_sblk_opin); static float **alloc_net_slack (void); static void compute_net_slacks (float **net_slack);static void alloc_and_load_tnodes_and_net_mapping (int **num_uses_of_clb_ipin, int **num_uses_of_sblk_opin, int **block_pin_to_tnode, int *** sblk_pin_to_tnode, t_subblock_data subblock_data, t_timing_inf timing_inf); static void build_clb_tnodes (int iblk, int *n_uses_of_clb_ipin, int **block_pin_to_tnode, int **sub_pin_to_tnode, int subblock_lut_size, int num_subs, t_subblock *sub_inf, float T_clb_ipin_to_sblk_ipin, int *next_clb_ipin_edge); static void build_subblock_tnodes (int *n_uses_of_sblk_opin, int *blk_pin_to_tnode, int **sub_pin_to_tnode, int subblock_lut_size, int num_subs, t_subblock *sub_inf, float T_sblk_opin_to_sblk_ipin, float T_sblk_opin_to_clb_opin, t_T_subblock *T_subblock, int *next_sblk_opin_edge, int iblk); static void build_ipad_tnodes (int iblk, int **block_pin_to_tnode, float T_ipad, int *num_subblocks_per_block, t_subblock **subblock_inf);static boolean is_global_clock (int iblk, int *num_subblocks_per_block, t_subblock **subblock_inf);static void build_opad_tnodes (int *blk_pin_to_tnode, float T_opad, int iblk);static void build_block_output_tnode (int inode, int iblk, int ipin, int **block_pin_to_tnode); /********************* Subroutine definitions *******************************/void free_subblock_data (t_subblock_data *subblock_data_ptr) {/* Frees all the subblock data structures. */ free_chunk_memory (subblock_data_ptr->chunk_head_ptr); free (subblock_data_ptr->num_subblocks_per_block); free (subblock_data_ptr->subblock_inf);/* Mark as freed. */ subblock_data_ptr->num_subblocks_per_block = NULL; subblock_data_ptr->subblock_inf = NULL; subblock_data_ptr->chunk_head_ptr = NULL;}float **alloc_and_load_timing_graph (t_timing_inf timing_inf, t_subblock_data subblock_data) {/* This routine builds the graph used for timing analysis. Every clb or * * subblock pin is a timing node (tnode). The connectivity between pins is * * represented by timing edges (tedges). All delay is marked on edges, not * * on nodes. This routine returns an array that will store slack values: * * net_slack[0..num_nets-1][1..num_pins-1]. *//* The two arrays below are valid only for CLBs, not pads. */ int **num_uses_of_clb_ipin; /* [0..num_blocks-1][0..pins_per_clb-1] */ int **num_uses_of_sblk_opin; /* [0..num_blocks-1][0..num_subs_per[iblk]-1] *//* Array for mapping from a pin on a block to a tnode index. For pads, only * * the first two pin locations are used (input to pad is first, output of * * pad is second). For clbs, all OPEN pins on the clb have their mapping * * set to OPEN so I won't use it by mistake. */ int **block_pin_to_tnode; /* [0..num_blocks-1][0..pins_per_clb-1] *//* Array for mapping from a pin on a subblock to a tnode index. Unused * * or nonexistent subblock pins have their mapping set to OPEN. * * [0..num_blocks-1][0..num_subblocks_per_block-1][0..subblock_lut_size+1] */ int ***sblk_pin_to_tnode; int num_sinks; float **net_slack; /* [0..num_nets-1][1..num_pins-1]. *//************* End of variable declarations ********************************/ if (tedge_ch_list_head != NULL) { printf ("Error in alloc_and_load_timing_graph:\n" "\tAn old timing graph still exists.\n"); exit (1); }/* If either of the checks below ever fail, change the definition of * * tnode_descript to use ints instead of shorts for isubblk or ipin. */ if (subblock_data.max_subblocks_per_block > MAX_SHORT) { printf ("Error in alloc_and_load_timing_graph: max_subblocks_per_block" "\tis %d -- will cause short overflow in tnode_descript.\n", subblock_data.max_subblocks_per_block); exit (1); } if (pins_per_clb > MAX_SHORT) { printf ("Error in alloc_and_load_timing_graph: pins_per_clb is %d." "\tWill cause short overflow in tnode_descript.\n", pins_per_clb); exit (1); } alloc_and_load_fanout_counts (&num_uses_of_clb_ipin, &num_uses_of_sblk_opin, subblock_data); num_tnodes = alloc_and_load_pin_mappings (&block_pin_to_tnode, &sblk_pin_to_tnode, subblock_data, num_uses_of_sblk_opin); alloc_and_load_tnodes_and_net_mapping (num_uses_of_clb_ipin, num_uses_of_sblk_opin, block_pin_to_tnode, sblk_pin_to_tnode, subblock_data, timing_inf); num_sinks = alloc_and_load_timing_graph_levels (); check_timing_graph (subblock_data.num_const_gen, subblock_data.num_ff, num_sinks); free_fanout_counts (num_uses_of_clb_ipin, num_uses_of_sblk_opin); free_pin_mappings (block_pin_to_tnode, sblk_pin_to_tnode, subblock_data.num_subblocks_per_block); net_slack = alloc_net_slack (); return (net_slack);}static float **alloc_net_slack (void) {/* Allocates the net_slack structure. Chunk allocated to save space. */ float **net_slack; /* [0..num_nets-1][1..num_pins-1] */ float *tmp_ptr; int inet; net_slack = (float **) my_malloc (num_nets * sizeof (float *)); for (inet=0;inet<num_nets;inet++) { tmp_ptr = (float *) my_chunk_malloc ((net[inet].num_pins - 1) * sizeof (float), &tedge_ch_list_head, &tedge_ch_bytes_avail, &tedge_ch_next_avail); net_slack[inet] = tmp_ptr - 1; /* [1..num_pins-1] */ } return (net_slack);}static int alloc_and_load_pin_mappings (int ***block_pin_to_tnode_ptr, int ****sblk_pin_to_tnode_ptr, t_subblock_data subblock_data, int **num_uses_of_sblk_opin) {/* Allocates and loads the block_pin_to_tnode and sblk_pin_to_tnode * * structures, and computes num_tnodes. */ int iblk, isub, ipin, num_subblocks, out_pin, clk_pin; int curr_tnode; int ***sblk_pin_to_tnode, **block_pin_to_tnode; int subblock_lut_size; int *num_subblocks_per_block; t_subblock **subblock_inf; boolean has_inputs; subblock_lut_size = subblock_data.subblock_lut_size; num_subblocks_per_block = subblock_data.num_subblocks_per_block; subblock_inf = subblock_data.subblock_inf; block_pin_to_tnode = (int **) alloc_matrix (0, num_blocks - 1, 0, pins_per_clb - 1, sizeof (int)); sblk_pin_to_tnode = (int ***) my_malloc (num_blocks * sizeof (int **)); curr_tnode = 0; out_pin = subblock_lut_size; clk_pin = subblock_lut_size + 1; for (iblk=0;iblk<num_blocks;iblk++) { if (block[iblk].type == CLB) { /* First do the block mapping */ for (ipin=0;ipin<pins_per_clb;ipin++) { if (block[iblk].nets[ipin] == OPEN) { block_pin_to_tnode[iblk][ipin] = OPEN; } else { block_pin_to_tnode[iblk][ipin] = curr_tnode; curr_tnode++; } } /* Now do the subblock mapping. */ num_subblocks = num_subblocks_per_block[iblk]; sblk_pin_to_tnode[iblk] = (int **) alloc_matrix (0, num_subblocks - 1, 0, subblock_lut_size + 1, sizeof (int)); for (isub=0;isub<num_subblocks;isub++) { /* Pin ordering: inputs, output, clock. */ has_inputs = FALSE; for (ipin=0;ipin<subblock_lut_size;ipin++) { if (subblock_inf[iblk][isub].inputs[ipin] != OPEN) { has_inputs = TRUE; sblk_pin_to_tnode[iblk][isub][ipin] = curr_tnode; curr_tnode++; } else { sblk_pin_to_tnode[iblk][isub][ipin] = OPEN; } } /* subblock output */ /* If the subblock opin is unused the subblock is empty and we * * shoudn't count it. */ if (num_uses_of_sblk_opin[iblk][isub] != 0) { sblk_pin_to_tnode[iblk][isub][out_pin] = curr_tnode; if (has_inputs) /* Regular sblk */ curr_tnode++; else /* Constant generator. Make room for dummy input */ curr_tnode += 2; } else { sblk_pin_to_tnode[iblk][isub][out_pin] = OPEN; } if (subblock_inf[iblk][isub].clock != OPEN) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -