📄 rr_graph_area.c
字号:
#include <math.h>#include "util.h"#include "vpr_types.h"#include <assert.h>#include "globals.h"#include "rr_graph_util.h"#include "rr_graph_area.h"/************************ Subroutines local to this module *******************/static void count_bidir_routing_transistors(int num_switch, float R_minW_nmos, float R_minW_pmos);static void count_unidir_routing_transistors(t_segment_inf * segment_inf, float R_minW_nmos, float R_minW_pmos);static float get_cblock_trans(int *num_inputs_to_cblock, int max_inputs_to_cblock, float trans_cblock_to_lblock_buf, float trans_sram_bit);static float *alloc_and_load_unsharable_switch_trans(int num_switch, float trans_sram_bit, float R_minW_nmos);static float *alloc_and_load_sharable_switch_trans(int num_switch, float trans_sram_bit, float R_minW_nmos, float R_minW_pmos);static float trans_per_buf(float Rbuf, float R_minW_nmos, float R_minW_pmos);static float trans_per_mux(int num_inputs, float trans_sram_bit, float pass_trans_area);static float trans_per_R(float Rtrans, float R_minW_trans);/*************************** Subroutine definitions **************************/voidcount_routing_transistors(enum e_directionality directionality, int num_switch, t_segment_inf * segment_inf, float R_minW_nmos, float R_minW_pmos){/* Counts how many transistors are needed to implement the FPGA routing * * resources. Call this only when an rr_graph exists. It does not count * * the transistors used in logic blocks, but it counts the transistors in * * the input connection block multiplexers and in the output pin drivers and * * pass transistors. NB: this routine assumes pass transistors always * * generate two edges (one forward, one backward) between two nodes. * * Physically, this is what happens -- make sure your rr_graph does it. * * * * I assume a minimum width transistor takes 1 unit of area. A double-width * * transistor takes the twice the diffusion width, but the same spacing, so * * I assume it takes 1.5x the area of a minimum-width transitor. */ if(directionality == BI_DIRECTIONAL) { count_bidir_routing_transistors(num_switch, R_minW_nmos, R_minW_pmos); } else { assert(directionality == UNI_DIRECTIONAL); count_unidir_routing_transistors(segment_inf, R_minW_nmos, R_minW_pmos); }}voidcount_bidir_routing_transistors(int num_switch, float R_minW_nmos, float R_minW_pmos){/* Tri-state buffers are designed as a buffer followed by a pass transistor. * * I make Rbuffer = Rpass_transitor = 1/2 Rtri-state_buffer. * * I make the pull-up and pull-down sides of the buffer the same strength -- * * i.e. I make the p transistor R_minW_pmos / R_minW_nmos wider than the n * * transistor. * * * * I generate two area numbers in this routine: ntrans_sharing and * * ntrans_no_sharing. ntrans_sharing exactly reflects what the timing * * analyzer, etc. works with -- each switch is a completely self contained * * pass transistor or tri-state buffer. In the case of tri-state buffers * * this is rather pessimisitic. The inverter chain part of the buffer (as * * opposed to the pass transistor + SRAM output part) can be shared by * * several switches in the same location. Obviously all the switches from * * an OPIN can share one buffer. Also, CHANX and CHANY switches at the same * * spot (i,j) on a single segment can share a buffer. For a more realistic * * area number I assume all buffered switches from a node that are at the * * *same (i,j) location* can share one buffer. Only the lowest resistance * * (largest) buffer is implemented. In practice, you might want to build * * something that is 1.5x or 2x the largest buffer, so this may be a bit * * optimistic (but I still think it's pretty reasonable). */ int *num_inputs_to_cblock; /* [0..num_rr_nodes-1], but all entries not */ /* corresponding to IPINs will be 0. */ boolean *cblock_counted; /* [0..max(nx,ny)] -- 0th element unused. */ float *shared_buffer_trans; /* [0..max_nx,ny)] */ float *unsharable_switch_trans, *sharable_switch_trans; /* [0..num_switch-1] */ t_rr_type from_rr_type, to_rr_type; int from_node, to_node, iedge, num_edges, maxlen; int iswitch, i, j, iseg, max_inputs_to_cblock; float input_cblock_trans, shared_opin_buffer_trans; const float trans_sram_bit = 6.;/* Two variables below are the accumulator variables that add up all the * * transistors in the routing. Make doubles so that they don't stop * * incrementing once adding a switch makes a change of less than 1 part in * * 10^7 to the total. If this still isn't good enough (adding 1 part in * * 10^15 will still be thrown away), compute the transistor count in * * "chunks", by adding up inodes 1 to 1000, 1001 to 2000 and then summing * * the partial sums together. */ double ntrans_sharing, ntrans_no_sharing;/* Buffers from the routing to the ipin cblock inputs, and from the ipin * * cblock outputs to the logic block, respectively. Assume minimum size n * * transistors, and ptransistors sized to make the pull-up R = pull-down R. */ float trans_track_to_cblock_buf; float trans_cblock_to_lblock_buf; ntrans_sharing = 0.; ntrans_no_sharing = 0.; max_inputs_to_cblock = 0;/* Assume the two buffers below are 4x minimum drive strength (enough to * * drive a fanout of up to 16 pretty nicely -- should cover a reasonable * * wiring C plus the fanout. */ trans_track_to_cblock_buf = trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos); trans_cblock_to_lblock_buf = trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos); num_inputs_to_cblock = (int *)my_calloc(num_rr_nodes, sizeof(int)); maxlen = max(nx, ny) + 1; cblock_counted = (boolean *) my_calloc(maxlen, sizeof(boolean)); shared_buffer_trans = (float *)my_calloc(maxlen, sizeof(float)); unsharable_switch_trans = alloc_and_load_unsharable_switch_trans(num_switch, trans_sram_bit, R_minW_nmos); sharable_switch_trans = alloc_and_load_sharable_switch_trans(num_switch, trans_sram_bit, R_minW_nmos, R_minW_pmos); for(from_node = 0; from_node < num_rr_nodes; from_node++) { from_rr_type = rr_node[from_node].type; switch (from_rr_type) { case CHANX: case CHANY: num_edges = rr_node[from_node].num_edges; for(iedge = 0; iedge < num_edges; iedge++) { to_node = rr_node[from_node].edges[iedge]; to_rr_type = rr_node[to_node].type; switch (to_rr_type) { case CHANX: case CHANY: iswitch = rr_node[from_node].switches[iedge]; if(switch_inf[iswitch].buffered) { iseg = seg_index_of_sblock(from_node, to_node); shared_buffer_trans[iseg] = max(shared_buffer_trans[iseg], sharable_switch_trans [iswitch]); ntrans_no_sharing += unsharable_switch_trans [iswitch] + sharable_switch_trans [iswitch]; ntrans_sharing += unsharable_switch_trans [iswitch]; } else if(from_node < to_node) { /* Pass transistor shared by two edges -- only count once. * * Also, no part of a pass transistor is sharable. */ ntrans_no_sharing += unsharable_switch_trans [iswitch]; ntrans_sharing += unsharable_switch_trans [iswitch]; } break; case IPIN: num_inputs_to_cblock[to_node]++; max_inputs_to_cblock = max(max_inputs_to_cblock, num_inputs_to_cblock[to_node]); iseg = seg_index_of_cblock(from_rr_type, to_node); if(cblock_counted[iseg] == FALSE) { cblock_counted[iseg] = TRUE; ntrans_sharing += trans_track_to_cblock_buf; ntrans_no_sharing += trans_track_to_cblock_buf; } break; default: printf ("Error in count_routing_transistors: Unexpected \n" "connection from node %d (type %d) to node %d (type %d).\n", from_node, from_rr_type, to_node, to_rr_type); exit(1); break; } /* End switch on to_rr_type. */ } /* End for each edge. */ /* Now add in the shared buffer transistors, and reset some flags. */ if(from_rr_type == CHANX) { for(i = rr_node[from_node].xlow - 1; i <= rr_node[from_node].xhigh; i++) { ntrans_sharing += shared_buffer_trans[i]; shared_buffer_trans[i] = 0.; } for(i = rr_node[from_node].xlow; i <= rr_node[from_node].xhigh; i++) cblock_counted[i] = FALSE; } else { /* CHANY */ for(j = rr_node[from_node].ylow - 1; j <= rr_node[from_node].yhigh; j++) { ntrans_sharing += shared_buffer_trans[j]; shared_buffer_trans[j] = 0.; } for(j = rr_node[from_node].ylow; j <= rr_node[from_node].yhigh; j++) cblock_counted[j] = FALSE; } break; case OPIN: num_edges = rr_node[from_node].num_edges; shared_opin_buffer_trans = 0.; for(iedge = 0; iedge < num_edges; iedge++) { iswitch = rr_node[from_node].switches[iedge]; ntrans_no_sharing += unsharable_switch_trans[iswitch] + sharable_switch_trans[iswitch]; ntrans_sharing += unsharable_switch_trans[iswitch]; shared_opin_buffer_trans = max(shared_opin_buffer_trans, sharable_switch_trans[iswitch]); } ntrans_sharing += shared_opin_buffer_trans; break; default: break; } /* End switch on from_rr_type */ } /* End for all nodes */ free(cblock_counted); free(shared_buffer_trans); free(unsharable_switch_trans); free(sharable_switch_trans);/* Now add in the input connection block transistors. */ input_cblock_trans = get_cblock_trans(num_inputs_to_cblock, max_inputs_to_cblock, trans_cblock_to_lblock_buf, trans_sram_bit); free(num_inputs_to_cblock); ntrans_sharing += input_cblock_trans; ntrans_no_sharing += input_cblock_trans; printf("\nRouting area (in minimum width transistor areas):\n"); printf ("Assuming no buffer sharing (pessimistic). Total: %#g Per logic tile: " "%#g\n", ntrans_no_sharing, ntrans_no_sharing / (float)(nx * ny)); printf ("Assuming buffer sharing (slightly optimistic). Total: %#g Per logic tile: " "%#g\n\n", ntrans_sharing, ntrans_sharing / (float)(nx * ny));}voidcount_unidir_routing_transistors(t_segment_inf * segment_inf, float R_minW_nmos, float R_minW_pmos){ boolean *cblock_counted; /* [0..max(nx,ny)] -- 0th element unused. */ int *num_inputs_to_cblock; /* [0..num_rr_nodes-1], but all entries not */ /* corresponding to IPINs will be 0. */ t_rr_type from_rr_type, to_rr_type; int i, j, iseg, from_node, to_node, iedge, num_edges, maxlen; int max_inputs_to_cblock, cost_index, seg_type, switch_type; float input_cblock_trans; const float trans_sram_bit = 6.;/* Two variables below are the accumulator variables that add up all the * * transistors in the routing. Make doubles so that they don't stop * * incrementing once adding a switch makes a change of less than 1 part in * * 10^7 to the total. If this still isn't good enough (adding 1 part in * * 10^15 will still be thrown away), compute the transistor count in * * "chunks", by adding up inodes 1 to 1000, 1001 to 2000 and then summing * * the partial sums together. */ double ntrans;/* Buffers from the routing to the ipin cblock inputs, and from the ipin * * cblock outputs to the logic block, respectively. Assume minimum size n * * transistors, and ptransistors sized to make the pull-up R = pull-down R. */ float trans_track_to_cblock_buf; float trans_cblock_to_lblock_buf; max_inputs_to_cblock = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -