📄 path_delay.c
字号:
#include <stdio.h>#include "util.h"#include "vpr_types.h"#include "globals.h"#include "path_delay.h"#include "path_delay2.h"#include "net_delay.h"#include "vpr_utils.h"#include <assert.h>/* TODO: Add option for registered inputs and outputs once works, currently, outputs only *//****************** Timing graph Structure ************************************ * * * In the timing graph I create, input pads and constant generators have no * * inputs; everything else has inputs. Every input pad and output pad is * * represented by two tnodes -- an input pin and an output pin. For an input * * pad the input pin comes from off chip and has no fanin, while the output * * pin drives outpads and/or CLBs. For output pads, the input node is driven * * by a FB or input pad, and the output node goes off chip and has no * * fanout (out-edges). I need two nodes to respresent things like pads * * because I mark all delay on tedges, not on tnodes. * * * * Every used (not OPEN) FB pin becomes a timing node. As well, every used * * subblock pin within a FB also becomes a timing node. Unused (OPEN) pins * * don't create any timing nodes. If a subblock is used in combinational mode * * (i.e. its clock pin is open), I just hook the subblock input tnodes to the * * subblock output tnode. If the subblock is used in sequential mode, I * * create two extra tnodes. One is just the subblock clock pin, which is * * connected to the subblock output. This means that FFs don't generate * * their output until their clock arrives. For global clocks coming from an * * input pad, the delay of the clock is 0, so the FFs generate their outputs * * at T = 0, as usual. For locally-generated or gated clocks, however, the * * clock will arrive later, and the FF output will be generated later. This * * lets me properly model things like ripple counters and gated clocks. The * * other extra node is the FF storage node (i.e. a sink), which connects to * * the subblock inputs and has no fanout. * * * * One other subblock that needs special attention is a constant generator. * * This has no used inputs, but its output is used. I create an extra tnode, * * a dummy input, in addition to the output pin tnode. The dummy tnode has * * no fanin. Since constant generators really generate their outputs at T = * * -infinity, I set the delay from the input tnode to the output to a large- * * magnitude negative number. This guarantees every block that needs the * * output of a constant generator sees it available very early. * * * * For this routine to work properly, subblocks whose outputs are unused must * * be completely empty -- all their input pins and their clock pin must be * * OPEN. Check_netlist checks the input netlist to guarantee this -- don't * * disable that check. * * * * NB: The discussion below is only relevant for circuits with multiple * * clocks. For circuits with a single clock, everything I do is exactly * * correct. * * * * A note about how I handle FFs: By hooking the clock pin up to the FF * * output, I properly model the time at which the FF generates its output. * * I don't do a completely rigorous job of modelling required arrival time at * * the FF input, however. I assume every FF and outpad needs its input at * * T = 0, which is when the earliest clock arrives. This can be conservative * * -- a fuller analysis would be to do a fast path analysis of the clock * * feeding each FF and subtract its earliest arrival time from the delay of * * the D signal to the FF input. This is too much work, so I'm not doing it. * * Alternatively, when one has N clocks, it might be better to just do N * * separate timing analyses, with only signals from FFs clocked on clock i * * being propagated forward on analysis i, and only FFs clocked on i being * * considered as sinks. This gives all the critical paths within clock * * domains, but ignores interactions. Instead, I assume all the clocks are * * more-or-less synchronized (they might be gated or locally-generated, but * * they all have the same frequency) and explore all interactions. Tough to * * say what's the better way. Since multiple clocks aren't important for my * * work, it's not worth bothering about much. * * * ******************************************************************************/#define T_CONSTANT_GENERATOR -1000 /* Essentially -ve infinity *//***************** Types local to this module ***************************/enum e_subblock_pin_type{ SUB_INPUT = 0, SUB_OUTPUT, SUB_CLOCK, NUM_SUB_PIN_TYPES };/***************** Variables local to this module ***************************//* Variables for "chunking" the tedge memory. If the head pointer is NULL, * * no timing graph exists now. */static struct s_linked_vptr *tedge_ch_list_head = NULL;static int tedge_ch_bytes_avail = 0;static char *tedge_ch_next_avail = NULL;/***************** Subroutines local to this module *************************/static int alloc_and_load_pin_mappings(int ***block_pin_to_tnode_ptr, int *****snode_block_pin_to_tnode_ptr, t_subblock_data subblock_data, int ***num_uses_of_sblk_opin);static void free_pin_mappings(int **block_pin_to_tnode, int ****snode_block_pin_to_tnode, int *num_subblocks_per_block);static void alloc_and_load_fanout_counts(int ***num_uses_of_fb_ipin_ptr, int ****num_uses_of_sblk_opin_ptr, t_subblock_data subblock_data);static void free_fanout_counts(int **num_uses_of_fb_ipin, int ***num_uses_of_sblk_opin);static float **alloc_net_slack(void);static void compute_net_slacks(float **net_slack);static void alloc_and_load_tnodes_and_net_mapping(int **num_uses_of_fb_ipin, int ***num_uses_of_sblk_opin, int **block_pin_to_tnode, int ****snode_block_pin_to_tnode, t_subblock_data subblock_data, t_timing_inf timing_inf);static void build_fb_tnodes(int iblk, int *n_uses_of_fb_ipin, int **block_pin_to_tnode, int ***sub_pin_to_tnode, int num_subs, t_subblock * sub_inf, float T_fb_ipin_to_sblk_ipin);static void build_subblock_tnodes(int **n_uses_of_sblk_opin, int *node_block_pin_to_tnode, int ***sub_pin_to_tnode, int *num_subblocks_per_block, t_subblock ** subblock_inf, t_timing_inf timing_inf, int iblk);static boolean is_global_clock(int iblk, int sub, int subpin, int *num_subblocks_per_block, t_subblock ** subblock_inf);static void build_block_output_tnode(int inode, int iblk, int ipin, int **block_pin_to_tnode);/********************* Subroutine definitions *******************************/float **alloc_and_load_timing_graph(t_timing_inf timing_inf, t_subblock_data subblock_data){/* This routine builds the graph used for timing analysis. Every fb or * * subblock pin is a timing node (tnode). The connectivity between pins is * * represented by timing edges (tedges). All delay is marked on edges, not * * on nodes. This routine returns an array that will store slack values: * * net_slack[0..num_nets-1][1..num_pins-1]. *//* The two arrays below are valid only for FBs, not pads. */ int i; int **num_uses_of_fb_ipin; /* [0..num_blocks-1][0..type->num_pins-1] */ int ***num_uses_of_sblk_opin; /* [0..num_blocks-1][0..type->num_subblocks][0..type->max_subblock_outputs] *//* Array for mapping from a pin on a block to a tnode index. For pads, only * * the first two pin locations are used (input to pad is first, output of * * pad is second). For fbs, all OPEN pins on the fb have their mapping * * set to OPEN so I won't use it by mistake. */ int **block_pin_to_tnode; /* [0..num_blocks-1][0..num_pins-1] *//* Array for mapping from a pin on a subblock to a tnode index. Unused * * or nonexistent subblock pins have their mapping set to OPEN. * * [0..num_blocks-1][0..num_subblocks_per_block-1][0..NUM_SUB_PIN_TYPES][0..total_subblock_pins-1] */ int ****snode_block_pin_to_tnode; int num_sinks; float **net_slack; /* [0..num_nets-1][1..num_pins-1]. *//************* End of variable declarations ********************************/ if(tedge_ch_list_head != NULL) { printf("Error in alloc_and_load_timing_graph:\n" "\tAn old timing graph still exists.\n"); exit(1); }/* If either of the checks below ever fail, change the definition of * * tnode_descript to use ints instead of shorts for isubblk or ipin. */ for(i = 0; i < num_types; i++) { if(type_descriptors[i].num_pins > MAX_SHORT) { printf ("Error in alloc_and_load_timing_graph: pins for type %s is %d." "\tWill cause short overflow in tnode_descript.\n", type_descriptors[i].name, type_descriptors[i].num_pins); exit(1); } if(type_descriptors[i].max_subblocks > MAX_SHORT) { printf ("Error in alloc_and_load_timing_graph: max_subblocks_per_block" "\tis %d -- will cause short overflow in tnode_descript.\n", type_descriptors[i].max_subblocks); exit(1); } } alloc_and_load_fanout_counts(&num_uses_of_fb_ipin, &num_uses_of_sblk_opin, subblock_data); num_tnodes = alloc_and_load_pin_mappings(&block_pin_to_tnode, &snode_block_pin_to_tnode, subblock_data, num_uses_of_sblk_opin); alloc_and_load_tnodes_and_net_mapping(num_uses_of_fb_ipin, num_uses_of_sblk_opin, block_pin_to_tnode, snode_block_pin_to_tnode, subblock_data, timing_inf); num_sinks = alloc_and_load_timing_graph_levels(); check_timing_graph(subblock_data.num_const_gen, subblock_data.num_ff, num_sinks); free_fanout_counts(num_uses_of_fb_ipin, num_uses_of_sblk_opin); free_pin_mappings(block_pin_to_tnode, snode_block_pin_to_tnode, subblock_data.num_subblocks_per_block); net_slack = alloc_net_slack(); return (net_slack);}static float **alloc_net_slack(void){/* Allocates the net_slack structure. Chunk allocated to save space. */ float **net_slack; /* [0..num_nets-1][1..num_pins-1] */ float *tmp_ptr; int inet; net_slack = (float **)my_malloc(num_nets * sizeof(float *)); for(inet = 0; inet < num_nets; inet++) { tmp_ptr = (float *)my_chunk_malloc(((net[inet].num_sinks + 1) - 1) * sizeof(float), &tedge_ch_list_head, &tedge_ch_bytes_avail, &tedge_ch_next_avail); net_slack[inet] = tmp_ptr - 1; /* [1..num_pins-1] */ } return (net_slack);}static intalloc_and_load_pin_mappings(int ***block_pin_to_tnode_ptr, int *****snode_block_pin_to_tnode_ptr, t_subblock_data subblock_data, int ***num_uses_of_sblk_opin){/* Allocates and loads the block_pin_to_tnode and snode_block_pin_to_tnode * * structures, and computes num_tnodes. */ int iblk, isub, ipin, num_subblocks, opin, clk_pin; int curr_tnode; int ****snode_block_pin_to_tnode, **block_pin_to_tnode; int *num_subblocks_per_block; t_type_ptr type; t_subblock **subblock_inf; boolean has_inputs; num_subblocks_per_block = subblock_data.num_subblocks_per_block; subblock_inf = subblock_data.subblock_inf; block_pin_to_tnode = (int **)my_malloc(num_blocks * sizeof(int *)); snode_block_pin_to_tnode = (int ****)my_malloc(num_blocks * sizeof(int ***)); curr_tnode = 0; for(iblk = 0; iblk < num_blocks; iblk++) { type = block[iblk].type; block_pin_to_tnode[iblk] = (int *)my_malloc(type->num_pins * sizeof(int)); /* First do the block mapping */ for(ipin = 0; ipin < block[iblk].type->num_pins; ipin++) { if(block[iblk].nets[ipin] == OPEN) { block_pin_to_tnode[iblk][ipin] = OPEN; } else { block_pin_to_tnode[iblk][ipin] = curr_tnode; curr_tnode++; } } /* Now do the subblock mapping. */ num_subblocks = num_subblocks_per_block[iblk]; snode_block_pin_to_tnode[iblk] = (int ***)alloc_matrix(0, num_subblocks - 1, 0, NUM_SUB_PIN_TYPES - 1, sizeof(int *)); /* [0..max_subblocks_for_type - 1][0..SUB_NUM_PIN_TYPES - 1] */ for(isub = 0; isub < num_subblocks; isub++) { /* Allocate space for each type of subblock pin */ snode_block_pin_to_tnode[iblk][isub][SUB_INPUT] = (int *)my_malloc(type->max_subblock_inputs * sizeof(int)); snode_block_pin_to_tnode[iblk][isub][SUB_OUTPUT] = (int *)my_malloc(type->max_subblock_outputs * sizeof(int)); snode_block_pin_to_tnode[iblk][isub][SUB_CLOCK] = (int *)my_malloc(sizeof(int)); /* Pin ordering: inputs, outputs, clock. */ has_inputs = FALSE; for(ipin = 0; ipin < type->max_subblock_inputs; ipin++) { if(subblock_inf[iblk][isub].inputs[ipin] != OPEN) { has_inputs = TRUE; snode_block_pin_to_tnode[iblk][isub] [SUB_INPUT][ipin] = curr_tnode; curr_tnode++; if(type == IO_TYPE) curr_tnode++; /* Output pad needs additional dummy sink node */ } else { snode_block_pin_to_tnode[iblk][isub] [SUB_INPUT][ipin] = OPEN; } } /* subblock output */ /* If the subblock opin is unused the subblock is empty and we * * shoudn't count it. */ for(opin = 0; opin < type->max_subblock_outputs; opin++) { if(num_uses_of_sblk_opin[iblk][isub][opin] != 0) { snode_block_pin_to_tnode[iblk][isub] [SUB_OUTPUT][opin] = curr_tnode; if(type == IO_TYPE) curr_tnode += 2; /* Input pad needs a dummy source node */ else if(has_inputs) /* Regular sblk */ curr_tnode++; else /* Constant generator. Make room for dummy input */ curr_tnode += 2; } else { snode_block_pin_to_tnode[iblk][isub] [SUB_OUTPUT][opin] = OPEN; } } clk_pin = 0; if(subblock_inf[iblk][isub].clock != OPEN) { /* If this is a sequential block, we have two more pins per used output: #1: the * clock input (connects to the subblock output node) and #2: the * sequential sink (which the subblock LUT inputs will connect to). */ snode_block_pin_to_tnode[iblk][isub][SUB_CLOCK] [clk_pin] = curr_tnode; for(opin = 0; opin < type->max_subblock_outputs; opin++) { if(subblock_inf[iblk][isub]. outputs[opin] != OPEN) curr_tnode += 2; } } else { snode_block_pin_to_tnode[iblk][isub][SUB_CLOCK] [clk_pin] = OPEN; } } } /* End for all blocks */ *snode_block_pin_to_tnode_ptr = snode_block_pin_to_tnode; *block_pin_to_tnode_ptr = block_pin_to_tnode; return (curr_tnode);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -