rr_graph_area.c

来自「用于学术研究的FPGA布局布线软件VPR」· C语言代码 · 共 763 行 · 第 1/2 页
763 行
#include <math.h>#include "util.h"#include "vpr_types.h"#include <assert.h>#include "globals.h"#include "rr_graph_util.h"#include "rr_graph_area.h"/************************ Subroutines local to this module *******************/static void count_bidir_routing_transistors(int num_switch,					    float R_minW_nmos,					    float R_minW_pmos);static void count_unidir_routing_transistors(t_segment_inf * segment_inf,					     float R_minW_nmos,					     float R_minW_pmos);static float get_cblock_trans(int *num_inputs_to_cblock,			      int max_inputs_to_cblock,			      float trans_cblock_to_lblock_buf,			      float trans_sram_bit);static float *alloc_and_load_unsharable_switch_trans(int num_switch,						     float trans_sram_bit,						     float R_minW_nmos);static float *alloc_and_load_sharable_switch_trans(int num_switch,						   float trans_sram_bit,						   float R_minW_nmos,						   float R_minW_pmos);static float trans_per_buf(float Rbuf,			   float R_minW_nmos,			   float R_minW_pmos);static float trans_per_mux(int num_inputs,			   float trans_sram_bit,			   float pass_trans_area);static float trans_per_R(float Rtrans,			 float R_minW_trans);/*************************** Subroutine definitions **************************/voidcount_routing_transistors(enum e_directionality directionality,			  int num_switch,			  t_segment_inf * segment_inf,			  float R_minW_nmos,			  float R_minW_pmos){/* Counts how many transistors are needed to implement the FPGA routing      * * resources.  Call this only when an rr_graph exists.  It does not count    * * the transistors used in logic blocks, but it counts the transistors in    * * the input connection block multiplexers and in the output pin drivers and * * pass transistors.  NB:  this routine assumes pass transistors always      * * generate two edges (one forward, one backward) between two nodes.         * * Physically, this is what happens -- make sure your rr_graph does it.      * *                                                                           * * I assume a minimum width transistor takes 1 unit of area.  A double-width * * transistor takes the twice the diffusion width, but the same spacing, so  * * I assume it takes 1.5x the area of a minimum-width transitor.             */    if(directionality == BI_DIRECTIONAL)	{	    count_bidir_routing_transistors(num_switch, R_minW_nmos,					    R_minW_pmos);	}    else	{	    assert(directionality == UNI_DIRECTIONAL);	    count_unidir_routing_transistors(segment_inf, R_minW_nmos,					     R_minW_pmos);	}}voidcount_bidir_routing_transistors(int num_switch,				float R_minW_nmos,				float R_minW_pmos){/* Tri-state buffers are designed as a buffer followed by a pass transistor. * * I make Rbuffer = Rpass_transitor = 1/2 Rtri-state_buffer.                 * * I make the pull-up and pull-down sides of the buffer the same strength -- * * i.e. I make the p transistor R_minW_pmos / R_minW_nmos wider than the n   * * transistor.                                                               * *                                                                           * * I generate two area numbers in this routine:  ntrans_sharing and          * * ntrans_no_sharing.  ntrans_sharing exactly reflects what the timing       * * analyzer, etc. works with -- each switch is a completely self contained   * * pass transistor or tri-state buffer.  In the case of tri-state buffers    * * this is rather pessimisitic.  The inverter chain part of the buffer (as   * * opposed to the pass transistor + SRAM output part) can be shared by       * * several switches in the same location.  Obviously all the switches from   * * an OPIN can share one buffer.  Also, CHANX and CHANY switches at the same * * spot (i,j) on a single segment can share a buffer.  For a more realistic  * * area number I assume all buffered switches from a node that are at the    * * *same (i,j) location* can share one buffer.  Only the lowest resistance   * * (largest) buffer is implemented.  In practice, you might want to build    * * something that is 1.5x or 2x the largest buffer, so this may be a bit     * * optimistic (but I still think it's pretty reasonable).                    */    int *num_inputs_to_cblock;	/* [0..num_rr_nodes-1], but all entries not    */    /* corresponding to IPINs will be 0.           */    boolean *cblock_counted;	/* [0..max(nx,ny)] -- 0th element unused. */    float *shared_buffer_trans;	/* [0..max_nx,ny)] */    float *unsharable_switch_trans, *sharable_switch_trans;	/* [0..num_switch-1] */    t_rr_type from_rr_type, to_rr_type;    int from_node, to_node, iedge, num_edges, maxlen;    int iswitch, i, j, iseg, max_inputs_to_cblock;    float input_cblock_trans, shared_opin_buffer_trans;    const float trans_sram_bit = 6.;/* Two variables below are the accumulator variables that add up all the    * * transistors in the routing.  Make doubles so that they don't stop        * * incrementing once adding a switch makes a change of less than 1 part in  * * 10^7 to the total.  If this still isn't good enough (adding 1 part in    * * 10^15 will still be thrown away), compute the transistor count in        * * "chunks", by adding up inodes 1 to 1000, 1001 to 2000 and then summing   * * the partial sums together.                                               */    double ntrans_sharing, ntrans_no_sharing;/* Buffers from the routing to the ipin cblock inputs, and from the ipin    * * cblock outputs to the logic block, respectively.  Assume minimum size n  * * transistors, and ptransistors sized to make the pull-up R = pull-down R. */    float trans_track_to_cblock_buf;    float trans_cblock_to_lblock_buf;    ntrans_sharing = 0.;    ntrans_no_sharing = 0.;    max_inputs_to_cblock = 0;/* Assume the two buffers below are 4x minimum drive strength (enough to * * drive a fanout of up to 16 pretty nicely -- should cover a reasonable *  * wiring C plus the fanout.                                             */    trans_track_to_cblock_buf =	trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos);    trans_cblock_to_lblock_buf =	trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos);    num_inputs_to_cblock = (int *)my_calloc(num_rr_nodes, sizeof(int));    maxlen = max(nx, ny) + 1;    cblock_counted = (boolean *) my_calloc(maxlen, sizeof(boolean));    shared_buffer_trans = (float *)my_calloc(maxlen, sizeof(float));    unsharable_switch_trans =	alloc_and_load_unsharable_switch_trans(num_switch, trans_sram_bit,					       R_minW_nmos);    sharable_switch_trans =	alloc_and_load_sharable_switch_trans(num_switch, trans_sram_bit,					     R_minW_nmos, R_minW_pmos);    for(from_node = 0; from_node < num_rr_nodes; from_node++)	{	    from_rr_type = rr_node[from_node].type;	    switch (from_rr_type)		{		case CHANX:		case CHANY:		    num_edges = rr_node[from_node].num_edges;		    for(iedge = 0; iedge < num_edges; iedge++)			{			    to_node = rr_node[from_node].edges[iedge];			    to_rr_type = rr_node[to_node].type;			    switch (to_rr_type)				{				case CHANX:				case CHANY:				    iswitch =					rr_node[from_node].switches[iedge];				    if(switch_inf[iswitch].buffered)					{					    iseg =						seg_index_of_sblock(from_node,								    to_node);					    shared_buffer_trans[iseg] =						max(shared_buffer_trans[iseg],						    sharable_switch_trans						    [iswitch]);					    ntrans_no_sharing +=						unsharable_switch_trans						[iswitch] +						sharable_switch_trans						[iswitch];					    ntrans_sharing +=						unsharable_switch_trans						[iswitch];					}				    else if(from_node < to_node)					{					    /* Pass transistor shared by two edges -- only count once.  *					     * Also, no part of a pass transistor is sharable.          */					    ntrans_no_sharing +=						unsharable_switch_trans						[iswitch];					    ntrans_sharing +=						unsharable_switch_trans						[iswitch];					}				    break;				case IPIN:				    num_inputs_to_cblock[to_node]++;				    max_inputs_to_cblock =					max(max_inputs_to_cblock,					    num_inputs_to_cblock[to_node]);				    iseg =					seg_index_of_cblock(from_rr_type,							    to_node);				    if(cblock_counted[iseg] == FALSE)					{					    cblock_counted[iseg] = TRUE;					    ntrans_sharing +=						trans_track_to_cblock_buf;					    ntrans_no_sharing +=						trans_track_to_cblock_buf;					}				    break;				default:				    printf					("Error in count_routing_transistors:  Unexpected \n"					 "connection from node %d (type %d) to node %d (type %d).\n",					 from_node, from_rr_type, to_node,					 to_rr_type);				    exit(1);				    break;				}	/* End switch on to_rr_type. */			}	/* End for each edge. */		    /* Now add in the shared buffer transistors, and reset some flags. */		    if(from_rr_type == CHANX)			{			    for(i = rr_node[from_node].xlow - 1;				i <= rr_node[from_node].xhigh; i++)				{				    ntrans_sharing += shared_buffer_trans[i];				    shared_buffer_trans[i] = 0.;				}			    for(i = rr_node[from_node].xlow;				i <= rr_node[from_node].xhigh; i++)				cblock_counted[i] = FALSE;			}		    else			{	/* CHANY */			    for(j = rr_node[from_node].ylow - 1;				j <= rr_node[from_node].yhigh; j++)				{				    ntrans_sharing += shared_buffer_trans[j];				    shared_buffer_trans[j] = 0.;				}			    for(j = rr_node[from_node].ylow;				j <= rr_node[from_node].yhigh; j++)				cblock_counted[j] = FALSE;			}		    break;		case OPIN:		    num_edges = rr_node[from_node].num_edges;		    shared_opin_buffer_trans = 0.;		    for(iedge = 0; iedge < num_edges; iedge++)			{			    iswitch = rr_node[from_node].switches[iedge];			    ntrans_no_sharing +=				unsharable_switch_trans[iswitch] +				sharable_switch_trans[iswitch];			    ntrans_sharing +=				unsharable_switch_trans[iswitch];			    shared_opin_buffer_trans =				max(shared_opin_buffer_trans,				    sharable_switch_trans[iswitch]);			}		    ntrans_sharing += shared_opin_buffer_trans;		    break;		default:		    break;		}		/* End switch on from_rr_type */	}			/* End for all nodes */    free(cblock_counted);    free(shared_buffer_trans);    free(unsharable_switch_trans);    free(sharable_switch_trans);/* Now add in the input connection block transistors. */    input_cblock_trans = get_cblock_trans(num_inputs_to_cblock,					  max_inputs_to_cblock,					  trans_cblock_to_lblock_buf,					  trans_sram_bit);    free(num_inputs_to_cblock);    ntrans_sharing += input_cblock_trans;    ntrans_no_sharing += input_cblock_trans;    printf("\nRouting area (in minimum width transistor areas):\n");    printf	("Assuming no buffer sharing (pessimistic). Total: %#g  Per logic tile: "	 "%#g\n", ntrans_no_sharing, ntrans_no_sharing / (float)(nx * ny));    printf	("Assuming buffer sharing (slightly optimistic). Total: %#g  Per logic tile: "	 "%#g\n\n", ntrans_sharing, ntrans_sharing / (float)(nx * ny));}voidcount_unidir_routing_transistors(t_segment_inf * segment_inf,				 float R_minW_nmos,				 float R_minW_pmos){    boolean *cblock_counted;	/* [0..max(nx,ny)] -- 0th element unused. */    int *num_inputs_to_cblock;	/* [0..num_rr_nodes-1], but all entries not    */    /* corresponding to IPINs will be 0.           */    t_rr_type from_rr_type, to_rr_type;    int i, j, iseg, from_node, to_node, iedge, num_edges, maxlen;    int max_inputs_to_cblock, cost_index, seg_type, switch_type;    float input_cblock_trans;    const float trans_sram_bit = 6.;/* Two variables below are the accumulator variables that add up all the    * * transistors in the routing.  Make doubles so that they don't stop        * * incrementing once adding a switch makes a change of less than 1 part in  * * 10^7 to the total.  If this still isn't good enough (adding 1 part in    * * 10^15 will still be thrown away), compute the transistor count in        * * "chunks", by adding up inodes 1 to 1000, 1001 to 2000 and then summing   * * the partial sums together.                                               */    double ntrans;/* Buffers from the routing to the ipin cblock inputs, and from the ipin    * * cblock outputs to the logic block, respectively.  Assume minimum size n  * * transistors, and ptransistors sized to make the pull-up R = pull-down R. */    float trans_track_to_cblock_buf;    float trans_cblock_to_lblock_buf;    max_inputs_to_cblock = 0;
rr_graph_area.c - 源码说明

本页面展示了「用于学术研究的FPGA布局布线软件VPR」中的 rr_graph_area.c 源码文件，采用 C语言编程语言编写，共 763 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与FPGA相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?