📄 rr_graph_area.c

📁 用c＋＋写的用于FPGA设计中布图布线的工具源码
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
#include <math.h>#include "util.h"#include "vpr_types.h"#include <assert.h>#include "globals.h"#include "rr_graph_util.h"#include "rr_graph_area.h"/************************ Subroutines local to this module *******************/static float get_cblock_trans (int *num_inputs_to_cblock, int           max_inputs_to_cblock, float trans_cblock_to_lblock_buf,           float trans_sram_bit); static float *alloc_and_load_unsharable_switch_trans (int num_switch,         float trans_sram_bit, float R_minW_nmos); static float *alloc_and_load_sharable_switch_trans (int num_switch,         float trans_sram_bit, float R_minW_nmos, float R_minW_pmos);static float trans_per_buf (float Rbuf, float R_minW_nmos, float R_minW_pmos); static float trans_per_mux (int num_inputs, float trans_sram_bit); static float trans_per_R (float Rtrans, float R_minW_trans); /*************************** Subroutine definitions **************************/void count_routing_transistors (int num_switch, float R_minW_nmos,            float R_minW_pmos) {/* Counts how many transistors are needed to implement the FPGA routing      * * resources.  Call this only when an rr_graph exists.  It does not count    * * the transistors used in logic blocks, but it counts the transistors in    * * the input connection block multiplexers and in the output pin drivers and * * pass transistors.  NB:  this routine assumes pass transistors always      * * generate two edges (one forward, one backward) between two nodes.         * * Physically, this is what happens -- make sure your rr_graph does it.      * *                                                                           * * I assume a minimum width transistor takes 1 unit of area.  A double-width * * transistor takes the twice the diffusion width, but the same spacing, so  * * I assume it takes 1.5x the area of a minimum-width transitor.  I always   * * design tri-state buffers as a buffer followed by a pass transistor.       * * I make Rbuffer = Rpass_transitor = 1/2 Rtri-state_buffer.                 * * I make the pull-up and pull-down sides of the buffer the same strength -- * * i.e. I make the p transistor R_minW_pmos / R_minW_nmos wider than the n   * * transistor.                                                               * *                                                                           * * I generate two area numbers in this routine:  ntrans_sharing and          * * ntrans_no_sharing.  ntrans_sharing exactly reflects what the timing       * * analyzer, etc. works with -- each switch is a completely self contained   * * pass transistor or tri-state buffer.  In the case of tri-state buffers    * * this is rather pessimisitic.  The inverter chain part of the buffer (as   * * opposed to the pass transistor + SRAM output part) can be shared by       * * several switches in the same location.  Obviously all the switches from   * * an OPIN can share one buffer.  Also, CHANX and CHANY switches at the same * * spot (i,j) on a single segment can share a buffer.  For a more realistic  * * area number I assume all buffered switches from a node that are at the    * * *same (i,j) location* can share one buffer.  Only the lowest resistance   * * (largest) buffer is implemented.  In practice, you might want to build    * * something that is 1.5x or 2x the largest buffer, so this may be a bit     * * optimistic (but I still think it's pretty reasonable).                    */ int *num_inputs_to_cblock;  /* [0..num_rr_nodes-1], but all entries not    */                             /* corresponding to IPINs will be 0.           */ boolean *cblock_counted;          /* [0..max(nx,ny)] -- 0th element unused. */ float *shared_buffer_trans;       /* [0..max_nx,ny)] */ float *unsharable_switch_trans, *sharable_switch_trans; /* [0..num_switch-1] */ t_rr_type from_rr_type, to_rr_type; int from_node, to_node, iedge, num_edges, maxlen; int iswitch, i, j, iseg, max_inputs_to_cblock; float input_cblock_trans, shared_opin_buffer_trans; const float trans_sram_bit = 6.;/* Two variables below are the accumulator variables that add up all the    * * transistors in the routing.  Make doubles so that they don't stop        * * incrementing once adding a switch makes a change of less than 1 part in  * * 10^7 to the total.  If this still isn't good enough (adding 1 part in    * * 10^15 will still be thrown away), compute the transistor count in        * * "chunks", by adding up inodes 1 to 1000, 1001 to 2000 and then summing   * * the partial sums together.                                               */ double ntrans_sharing, ntrans_no_sharing;/* Buffers from the routing to the ipin cblock inputs, and from the ipin    * * cblock outputs to the logic block, respectively.  Assume minimum size n  * * transistors, and ptransistors sized to make the pull-up R = pull-down R. */ float trans_track_to_cblock_buf;  float trans_cblock_to_lblock_buf; ntrans_sharing = 0.; ntrans_no_sharing = 0.; max_inputs_to_cblock = 0;/* Assume the two buffers below are 4x minimum drive strength (enough to * * drive a fanout of up to 16 pretty nicely -- should cover a reasonable *  * wiring C plus the fanout.                                             */ trans_track_to_cblock_buf = trans_per_buf (R_minW_nmos/4., R_minW_nmos,                                             R_minW_pmos); trans_cblock_to_lblock_buf = trans_per_buf (R_minW_nmos/4., R_minW_nmos,                                            R_minW_pmos);/* trans_track_to_cblock_buf = 1. + trans_per_R (R_minW_nmos, R_minW_pmos); trans_cblock_to_lblock_buf = 1. + trans_per_R (R_minW_nmos, R_minW_pmos); */  num_inputs_to_cblock = (int *) my_calloc (num_rr_nodes, sizeof (int)); maxlen = max (nx, ny) + 1; cblock_counted = (boolean *) my_calloc (maxlen, sizeof (boolean)); shared_buffer_trans = (float *) my_calloc (maxlen, sizeof (float)); unsharable_switch_trans = alloc_and_load_unsharable_switch_trans (num_switch,                           trans_sram_bit, R_minW_nmos); sharable_switch_trans = alloc_and_load_sharable_switch_trans (num_switch,                          trans_sram_bit, R_minW_nmos, R_minW_pmos); for (from_node=0;from_node<num_rr_nodes;from_node++) {    from_rr_type = rr_node[from_node].type;        switch (from_rr_type) {    case CHANX: case CHANY:       num_edges = rr_node[from_node].num_edges;       for (iedge=0;iedge<num_edges;iedge++) {                  to_node = rr_node[from_node].edges[iedge];          to_rr_type = rr_node[to_node].type;                    switch (to_rr_type) {            case CHANX: case CHANY:             iswitch = rr_node[from_node].switches[iedge];             if (switch_inf[iswitch].buffered) {                iseg = seg_index_of_sblock (from_node, to_node);                shared_buffer_trans[iseg] = max (shared_buffer_trans[iseg],                                         sharable_switch_trans[iswitch]);                ntrans_no_sharing += unsharable_switch_trans[iswitch] +                                     sharable_switch_trans[iswitch];                ntrans_sharing += unsharable_switch_trans[iswitch];             }             else if (from_node < to_node) {               /* Pass transistor shared by two edges -- only count once.  *              * Also, no part of a pass transistor is sharable.          */                ntrans_no_sharing += unsharable_switch_trans[iswitch];                ntrans_sharing += unsharable_switch_trans[iswitch];             }             break;          case IPIN:             num_inputs_to_cblock[to_node]++;             max_inputs_to_cblock = max (max_inputs_to_cblock,                                          num_inputs_to_cblock[to_node]);             iseg = seg_index_of_cblock (from_rr_type, to_node);             if (cblock_counted[iseg] == FALSE) {                cblock_counted[iseg] = TRUE;                ntrans_sharing += trans_track_to_cblock_buf;                ntrans_no_sharing += trans_track_to_cblock_buf;             }             break;          default:             printf ("Error in count_routing_transistors:  Unexpected \n"                  "connection from node %d (type %d) to node %d (type %d).\n",                  from_node, from_rr_type, to_node, to_rr_type);             exit (1);             break;          }   /* End switch on to_rr_type. */                 }   /* End for each edge. */      /* Now add in the shared buffer transistors, and reset some flags. */       if (from_rr_type == CHANX) {          for (i=rr_node[from_node].xlow-1;i<=rr_node[from_node].xhigh;i++) {             ntrans_sharing += shared_buffer_trans[i];             shared_buffer_trans[i] = 0.;          }          for (i=rr_node[from_node].xlow;i<=rr_node[from_node].xhigh;i++)              cblock_counted[i] = FALSE;       }       else {  /* CHANY */          for (j=rr_node[from_node].ylow-1;j<=rr_node[from_node].yhigh;j++) {             ntrans_sharing += shared_buffer_trans[j];             shared_buffer_trans[j] = 0.;          }          for (j=rr_node[from_node].ylow;j<=rr_node[from_node].yhigh;j++)              cblock_counted[j] = FALSE;       }       break;    case OPIN:       num_edges = rr_node[from_node].num_edges;       shared_opin_buffer_trans = 0.;       for (iedge=0;iedge<num_edges;iedge++) {          iswitch = rr_node[from_node].switches[iedge];
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -