path_delay.c

来自「用于学术研究的FPGA布局布线软件VPR」· C语言代码 · 共 1,677 行 · 第 1/4 页
1,677 行
#include <stdio.h>#include "util.h"#include "vpr_types.h"#include "globals.h"#include "path_delay.h"#include "path_delay2.h"#include "net_delay.h"#include "vpr_utils.h"#include <assert.h>/* TODO: Add option for registered inputs and outputs once works, currently, outputs only *//****************** Timing graph Structure ************************************ *                                                                            * * In the timing graph I create, input pads and constant generators have no   * * inputs; everything else has inputs.  Every input pad and output pad is     * * represented by two tnodes -- an input pin and an output pin.  For an input * * pad the input pin comes from off chip and has no fanin, while the output   * * pin drives outpads and/or CLBs.  For output pads, the input node is driven * * by a FB or input pad, and the output node goes off chip and has no        * * fanout (out-edges).  I need two nodes to respresent things like pads       * * because I mark all delay on tedges, not on tnodes.                         * *                                                                            * * Every used (not OPEN) FB pin becomes a timing node.  As well, every used  * * subblock pin within a FB also becomes a timing node.  Unused (OPEN) pins  * * don't create any timing nodes. If a subblock is used in combinational mode * * (i.e. its clock pin is open), I just hook the subblock input tnodes to the * * subblock output tnode.  If the subblock is used in sequential mode, I      * * create two extra tnodes.  One is just the subblock clock pin, which is     * * connected to the subblock output.  This means that FFs don't generate      * * their output until their clock arrives.  For global clocks coming from an  * * input pad, the delay of the clock is 0, so the FFs generate their outputs  * * at T = 0, as usual.  For locally-generated or gated clocks, however, the   * * clock will arrive later, and the FF output will be generated later.  This  * * lets me properly model things like ripple counters and gated clocks.  The  * * other extra node is the FF storage node (i.e. a sink), which connects to   * * the subblock inputs and has no fanout.                                     * *                                                                            * * One other subblock that needs special attention is a constant generator.   * * This has no used inputs, but its output is used.  I create an extra tnode, * * a dummy input, in addition to the output pin tnode.  The dummy tnode has   * * no fanin.  Since constant generators really generate their outputs at T =  * * -infinity, I set the delay from the input tnode to the output to a large-  * * magnitude negative number.  This guarantees every block that needs the     * * output of a constant generator sees it available very early.               * *                                                                            * * For this routine to work properly, subblocks whose outputs are unused must * * be completely empty -- all their input pins and their clock pin must be    * * OPEN.  Check_netlist checks the input netlist to guarantee this -- don't   * * disable that check.                                                        * *                                                                            * * NB:  The discussion below is only relevant for circuits with multiple      * * clocks.  For circuits with a single clock, everything I do is exactly      * * correct.                                                                   * *                                                                            * * A note about how I handle FFs:  By hooking the clock pin up to the FF      * * output, I properly model the time at which the FF generates its output.    * * I don't do a completely rigorous job of modelling required arrival time at * * the FF input, however.  I assume every FF and outpad needs its input at    * * T = 0, which is when the earliest clock arrives.  This can be conservative * * -- a fuller analysis would be to do a fast path analysis of the clock      * * feeding each FF and subtract its earliest arrival time from the delay of   * * the D signal to the FF input.  This is too much work, so I'm not doing it. * * Alternatively, when one has N clocks, it might be better to just do N      * * separate timing analyses, with only signals from FFs clocked on clock i    * * being propagated forward on analysis i, and only FFs clocked on i being    * * considered as sinks.  This gives all the critical paths within clock       * * domains, but ignores interactions.  Instead, I assume all the clocks are   * * more-or-less synchronized (they might be gated or locally-generated, but   * * they all have the same frequency) and explore all interactions.  Tough to  * * say what's the better way.  Since multiple clocks aren't important for my  * * work, it's not worth bothering about much.                                 * *                                                                            * ******************************************************************************/#define T_CONSTANT_GENERATOR -1000	/* Essentially -ve infinity *//***************** Types local to this module ***************************/enum e_subblock_pin_type{ SUB_INPUT = 0, SUB_OUTPUT, SUB_CLOCK, NUM_SUB_PIN_TYPES };/***************** Variables local to this module ***************************//* Variables for "chunking" the tedge memory.  If the head pointer is NULL, * * no timing graph exists now.                                              */static struct s_linked_vptr *tedge_ch_list_head = NULL;static int tedge_ch_bytes_avail = 0;static char *tedge_ch_next_avail = NULL;/***************** Subroutines local to this module *************************/static int alloc_and_load_pin_mappings(int ***block_pin_to_tnode_ptr,				       int *****snode_block_pin_to_tnode_ptr,				       t_subblock_data subblock_data,				       int ***num_uses_of_sblk_opin);static void free_pin_mappings(int **block_pin_to_tnode,			      int ****snode_block_pin_to_tnode,			      int *num_subblocks_per_block);static void alloc_and_load_fanout_counts(int ***num_uses_of_fb_ipin_ptr,					 int ****num_uses_of_sblk_opin_ptr,					 t_subblock_data subblock_data);static void free_fanout_counts(int **num_uses_of_fb_ipin,			       int ***num_uses_of_sblk_opin);static float **alloc_net_slack(void);static void compute_net_slacks(float **net_slack);static void alloc_and_load_tnodes_and_net_mapping(int **num_uses_of_fb_ipin,						  int						  ***num_uses_of_sblk_opin,						  int **block_pin_to_tnode,						  int						  ****snode_block_pin_to_tnode,						  t_subblock_data						  subblock_data,						  t_timing_inf timing_inf);static void build_fb_tnodes(int iblk,			    int *n_uses_of_fb_ipin,			    int **block_pin_to_tnode,			    int ***sub_pin_to_tnode,			    int num_subs,			    t_subblock * sub_inf,			    float T_fb_ipin_to_sblk_ipin);static void build_subblock_tnodes(int **n_uses_of_sblk_opin,				  int *node_block_pin_to_tnode,				  int ***sub_pin_to_tnode,				  int *num_subblocks_per_block,				  t_subblock ** subblock_inf,				  t_timing_inf timing_inf,				  int iblk);static boolean is_global_clock(int iblk,			       int sub,			       int subpin,			       int *num_subblocks_per_block,			       t_subblock ** subblock_inf);static void build_block_output_tnode(int inode,				     int iblk,				     int ipin,				     int **block_pin_to_tnode);/********************* Subroutine definitions *******************************/float **alloc_and_load_timing_graph(t_timing_inf timing_inf,			    t_subblock_data subblock_data){/* This routine builds the graph used for timing analysis.  Every fb or    * * subblock pin is a timing node (tnode).  The connectivity between pins is * * represented by timing edges (tedges).  All delay is marked on edges, not * * on nodes.  This routine returns an array that will store slack values:   * * net_slack[0..num_nets-1][1..num_pins-1].                                 *//* The two arrays below are valid only for FBs, not pads.                  */    int i;    int **num_uses_of_fb_ipin;	/* [0..num_blocks-1][0..type->num_pins-1]       */    int ***num_uses_of_sblk_opin;	/* [0..num_blocks-1][0..type->num_subblocks][0..type->max_subblock_outputs] *//* Array for mapping from a pin on a block to a tnode index. For pads, only * * the first two pin locations are used (input to pad is first, output of   * * pad is second).  For fbs, all OPEN pins on the fb have their mapping   * * set to OPEN so I won't use it by mistake.                                */    int **block_pin_to_tnode;	/* [0..num_blocks-1][0..num_pins-1]      *//* Array for mapping from a pin on a subblock to a tnode index.  Unused     * * or nonexistent subblock pins have their mapping set to OPEN.             * * [0..num_blocks-1][0..num_subblocks_per_block-1][0..NUM_SUB_PIN_TYPES][0..total_subblock_pins-1]  */    int ****snode_block_pin_to_tnode;    int num_sinks;    float **net_slack;		/* [0..num_nets-1][1..num_pins-1]. *//************* End of variable declarations ********************************/    if(tedge_ch_list_head != NULL)	{	    printf("Error in alloc_and_load_timing_graph:\n"		   "\tAn old timing graph still exists.\n");	    exit(1);	}/* If either of the checks below ever fail, change the definition of        * * tnode_descript to use ints instead of shorts for isubblk or ipin.        */    for(i = 0; i < num_types; i++)	{	    if(type_descriptors[i].num_pins > MAX_SHORT)		{		    printf			("Error in alloc_and_load_timing_graph: pins for type %s is %d."			 "\tWill cause short overflow in tnode_descript.\n",			 type_descriptors[i].name,			 type_descriptors[i].num_pins);		    exit(1);		}	    if(type_descriptors[i].max_subblocks > MAX_SHORT)		{		    printf			("Error in alloc_and_load_timing_graph: max_subblocks_per_block"			 "\tis %d -- will cause short overflow in tnode_descript.\n",			 type_descriptors[i].max_subblocks);		    exit(1);		}	}    alloc_and_load_fanout_counts(&num_uses_of_fb_ipin,				 &num_uses_of_sblk_opin, subblock_data);    num_tnodes = alloc_and_load_pin_mappings(&block_pin_to_tnode,					     &snode_block_pin_to_tnode,					     subblock_data,					     num_uses_of_sblk_opin);    alloc_and_load_tnodes_and_net_mapping(num_uses_of_fb_ipin,					  num_uses_of_sblk_opin,					  block_pin_to_tnode,					  snode_block_pin_to_tnode,					  subblock_data, timing_inf);    num_sinks = alloc_and_load_timing_graph_levels();    check_timing_graph(subblock_data.num_const_gen, subblock_data.num_ff,		       num_sinks);    free_fanout_counts(num_uses_of_fb_ipin, num_uses_of_sblk_opin);    free_pin_mappings(block_pin_to_tnode, snode_block_pin_to_tnode,		      subblock_data.num_subblocks_per_block);    net_slack = alloc_net_slack();    return (net_slack);}static float **alloc_net_slack(void){/* Allocates the net_slack structure.  Chunk allocated to save space.      */    float **net_slack;		/* [0..num_nets-1][1..num_pins-1]  */    float *tmp_ptr;    int inet;    net_slack = (float **)my_malloc(num_nets * sizeof(float *));    for(inet = 0; inet < num_nets; inet++)	{	    tmp_ptr =		(float *)my_chunk_malloc(((net[inet].num_sinks + 1) - 1) *					 sizeof(float), &tedge_ch_list_head,					 &tedge_ch_bytes_avail,					 &tedge_ch_next_avail);	    net_slack[inet] = tmp_ptr - 1;	/* [1..num_pins-1] */	}    return (net_slack);}static intalloc_and_load_pin_mappings(int ***block_pin_to_tnode_ptr,			    int *****snode_block_pin_to_tnode_ptr,			    t_subblock_data subblock_data,			    int ***num_uses_of_sblk_opin){/* Allocates and loads the block_pin_to_tnode and snode_block_pin_to_tnode         * * structures, and computes num_tnodes.                                     */    int iblk, isub, ipin, num_subblocks, opin, clk_pin;    int curr_tnode;    int ****snode_block_pin_to_tnode, **block_pin_to_tnode;    int *num_subblocks_per_block;    t_type_ptr type;    t_subblock **subblock_inf;    boolean has_inputs;    num_subblocks_per_block = subblock_data.num_subblocks_per_block;    subblock_inf = subblock_data.subblock_inf;    block_pin_to_tnode = (int **)my_malloc(num_blocks * sizeof(int *));    snode_block_pin_to_tnode =	(int ****)my_malloc(num_blocks * sizeof(int ***));    curr_tnode = 0;    for(iblk = 0; iblk < num_blocks; iblk++)	{	    type = block[iblk].type;	    block_pin_to_tnode[iblk] =		(int *)my_malloc(type->num_pins * sizeof(int));	    /* First do the block mapping */	    for(ipin = 0; ipin < block[iblk].type->num_pins; ipin++)		{		    if(block[iblk].nets[ipin] == OPEN)			{			    block_pin_to_tnode[iblk][ipin] = OPEN;			}		    else			{			    block_pin_to_tnode[iblk][ipin] = curr_tnode;			    curr_tnode++;			}		}	    /* Now do the subblock mapping. */	    num_subblocks = num_subblocks_per_block[iblk];	    snode_block_pin_to_tnode[iblk] = (int ***)alloc_matrix(0, num_subblocks - 1, 0, NUM_SUB_PIN_TYPES - 1, sizeof(int *));	/* [0..max_subblocks_for_type - 1][0..SUB_NUM_PIN_TYPES - 1] */	    for(isub = 0; isub < num_subblocks; isub++)		{		    /* Allocate space for each type of subblock pin */		    snode_block_pin_to_tnode[iblk][isub][SUB_INPUT] =			(int *)my_malloc(type->max_subblock_inputs *					 sizeof(int));		    snode_block_pin_to_tnode[iblk][isub][SUB_OUTPUT] =			(int *)my_malloc(type->max_subblock_outputs *					 sizeof(int));		    snode_block_pin_to_tnode[iblk][isub][SUB_CLOCK] =			(int *)my_malloc(sizeof(int));		    /* Pin ordering:  inputs, outputs, clock.   */		    has_inputs = FALSE;		    for(ipin = 0; ipin < type->max_subblock_inputs; ipin++)			{			    if(subblock_inf[iblk][isub].inputs[ipin] != OPEN)				{				    has_inputs = TRUE;				    snode_block_pin_to_tnode[iblk][isub]					[SUB_INPUT][ipin] = curr_tnode;				    curr_tnode++;				    if(type == IO_TYPE)					curr_tnode++;	/* Output pad needs additional dummy sink node */				}			    else				{				    snode_block_pin_to_tnode[iblk][isub]					[SUB_INPUT][ipin] = OPEN;				}			}		    /* subblock output  */		    /* If the subblock opin is unused the subblock is empty and we    *		     * shoudn't count it.                                             */		    for(opin = 0; opin < type->max_subblock_outputs; opin++)			{			    if(num_uses_of_sblk_opin[iblk][isub][opin] != 0)				{				    snode_block_pin_to_tnode[iblk][isub]					[SUB_OUTPUT][opin] = curr_tnode;				    if(type == IO_TYPE)					curr_tnode += 2;	/* Input pad needs a dummy source node */				    else if(has_inputs)	/* Regular sblk */					curr_tnode++;				    else	/* Constant generator. Make room for dummy input */					curr_tnode += 2;				}			    else				{				    snode_block_pin_to_tnode[iblk][isub]					[SUB_OUTPUT][opin] = OPEN;				}			}		    clk_pin = 0;		    if(subblock_inf[iblk][isub].clock != OPEN)			{			    /* If this is a sequential block, we have two more pins per used output: #1: the			     * clock input (connects to the subblock output node) and #2: the			     * sequential sink (which the subblock LUT inputs will connect to). */			    snode_block_pin_to_tnode[iblk][isub][SUB_CLOCK]				[clk_pin] = curr_tnode;			    for(opin = 0; opin < type->max_subblock_outputs;				opin++)				{				    if(subblock_inf[iblk][isub].				       outputs[opin] != OPEN)					curr_tnode += 2;				}			}		    else			{			    snode_block_pin_to_tnode[iblk][isub][SUB_CLOCK]				[clk_pin] = OPEN;			}		}	}			/* End for all blocks */    *snode_block_pin_to_tnode_ptr = snode_block_pin_to_tnode;    *block_pin_to_tnode_ptr = block_pin_to_tnode;    return (curr_tnode);
path_delay.c - 源码说明

本页面展示了「用于学术研究的FPGA布局布线软件VPR」中的 path_delay.c 源码文件，采用 C语言编程语言编写，共 1,677 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与FPGA相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?