📄 rr_graph_area.c
字号:
/* Assume the two buffers below are 4x minimum drive strength (enough to * * drive a fanout of up to 16 pretty nicely -- should cover a reasonable * * wiring C plus the fanout. */ trans_track_to_cblock_buf = trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos); trans_cblock_to_lblock_buf = trans_per_buf(R_minW_nmos / 4., R_minW_nmos, R_minW_pmos); num_inputs_to_cblock = (int *)my_calloc(num_rr_nodes, sizeof(int)); maxlen = max(nx, ny) + 1; cblock_counted = (boolean *) my_calloc(maxlen, sizeof(boolean)); ntrans = 0; for(from_node = 0; from_node < num_rr_nodes; from_node++) { from_rr_type = rr_node[from_node].type; switch (from_rr_type) { case CHANX: case CHANY: num_edges = rr_node[from_node].num_edges; cost_index = rr_node[from_node].cost_index; seg_type = rr_indexed_data[cost_index].seg_index; switch_type = segment_inf[seg_type].wire_switch; assert(segment_inf[seg_type].wire_switch == segment_inf[seg_type].opin_switch); assert(switch_inf[switch_type].mux_trans_size >= 1); /* can't be smaller than min sized transistor */ /* Each wire segment begins with a multipexer followed by a driver for unidirectional */ /* Add up area of multiplexer */ ntrans += trans_per_mux(rr_node[from_node].num_wire_drivers + rr_node[from_node].num_opin_drivers, trans_sram_bit, switch_inf[switch_type].mux_trans_size); /* Add up area of buffer */ if(switch_inf[switch_type].buf_size == 0) { ntrans += trans_per_buf(switch_inf[switch_type].R, R_minW_nmos, R_minW_pmos); } else { ntrans += switch_inf[switch_type].buf_size; } for(iedge = 0; iedge < num_edges; iedge++) { to_node = rr_node[from_node].edges[iedge]; to_rr_type = rr_node[to_node].type; switch (to_rr_type) { case CHANX: case CHANY: break; case IPIN: num_inputs_to_cblock[to_node]++; max_inputs_to_cblock = max(max_inputs_to_cblock, num_inputs_to_cblock[to_node]); iseg = seg_index_of_cblock(from_rr_type, to_node); if(cblock_counted[iseg] == FALSE) { cblock_counted[iseg] = TRUE; ntrans += trans_track_to_cblock_buf; } break; default: printf ("Error in count_routing_transistors: Unexpected \n" "connection from node %d (type %d) to node %d (type %d).\n", from_node, from_rr_type, to_node, to_rr_type); exit(1); break; } /* End switch on to_rr_type. */ } /* End for each edge. */ /* Reset some flags */ if(from_rr_type == CHANX) { for(i = rr_node[from_node].xlow; i <= rr_node[from_node].xhigh; i++) cblock_counted[i] = FALSE; } else { /* CHANY */ for(j = rr_node[from_node].ylow; j <= rr_node[from_node].yhigh; j++) cblock_counted[j] = FALSE; } break; case OPIN: break; default: break; } /* End switch on from_rr_type */ } /* End for all nodes */ /* Now add in the input connection block transistors. */ input_cblock_trans = get_cblock_trans(num_inputs_to_cblock, max_inputs_to_cblock, trans_cblock_to_lblock_buf, trans_sram_bit); free(cblock_counted); free(num_inputs_to_cblock); ntrans += input_cblock_trans; printf("\nRouting area (in minimum width transistor areas):\n"); printf("Total Routing Area: %#g Per logic tile: %#g\n", ntrans, ntrans / (float)(nx * ny));}static floatget_cblock_trans(int *num_inputs_to_cblock, int max_inputs_to_cblock, float trans_cblock_to_lblock_buf, float trans_sram_bit){/* Computes the transistors in the input connection block multiplexers and * * the buffers from connection block outputs to the logic block input pins. * * For speed, I precompute the number of transistors in the multiplexers of * * interest. */ float *trans_per_cblock; /* [0..max_inputs_to_cblock] */ float trans_count; int i, num_inputs; trans_per_cblock = (float *)my_malloc((max_inputs_to_cblock + 1) * sizeof(float)); trans_per_cblock[0] = 0.; /* i.e., not an IPIN or no inputs *//* With one or more inputs, add the mux and output buffer. I add the output * * buffer even when the number of inputs = 1 (i.e. no mux) because I assume * * I need the drivability just for metal capacitance. */ for(i = 1; i <= max_inputs_to_cblock; i++) trans_per_cblock[i] = trans_per_mux(i, trans_sram_bit, ipin_mux_trans_size) + trans_cblock_to_lblock_buf; trans_count = 0.; for(i = 0; i < num_rr_nodes; i++) { num_inputs = num_inputs_to_cblock[i]; trans_count += trans_per_cblock[num_inputs]; } free(trans_per_cblock); return (trans_count);}static float *alloc_and_load_unsharable_switch_trans(int num_switch, float trans_sram_bit, float R_minW_nmos){/* Loads up an array that says how many transistors are needed to implement * * the unsharable portion of each switch type. The SRAM bit of a switch and * * the pass transistor (forming either the entire switch or the output part * * of a tri-state buffer) are both unsharable. */ float *unsharable_switch_trans, Rpass; int i; unsharable_switch_trans = (float *)my_malloc(num_switch * sizeof(float)); for(i = 0; i < num_switch; i++) { if(switch_inf[i].buffered == FALSE) { Rpass = switch_inf[i].R; } else { /* Buffer. Set Rpass = Rbuf = 1/2 Rtotal. */ Rpass = switch_inf[i].R / 2.; } unsharable_switch_trans[i] = trans_per_R(Rpass, R_minW_nmos) + trans_sram_bit; } return (unsharable_switch_trans);}static float *alloc_and_load_sharable_switch_trans(int num_switch, float trans_sram_bit, float R_minW_nmos, float R_minW_pmos){/* Loads up an array that says how many transistor are needed to implement * * the sharable portion of each switch type. The SRAM bit of a switch and * * the pass transistor (forming either the entire switch or the output part * * of a tri-state buffer) are both unsharable. Only the buffer part of a * * buffer switch is sharable. */ float *sharable_switch_trans, Rbuf; int i; sharable_switch_trans = (float *)my_malloc(num_switch * sizeof(float)); for(i = 0; i < num_switch; i++) { if(switch_inf[i].buffered == FALSE) { sharable_switch_trans[i] = 0.; } else { /* Buffer. Set Rbuf = Rpass = 1/2 Rtotal. */ Rbuf = switch_inf[i].R / 2.; sharable_switch_trans[i] = trans_per_buf(Rbuf, R_minW_nmos, R_minW_pmos); } } return (sharable_switch_trans);}static floattrans_per_buf(float Rbuf, float R_minW_nmos, float R_minW_pmos){/* Returns the number of minimum width transistor area equivalents needed to * * implement this buffer. Assumes a stage ratio of 4, and equal strength * * pull-up and pull-down paths. */ int num_stage, istage; float trans_count, stage_ratio, Rstage; if(Rbuf > 0.6 * R_minW_nmos || Rbuf <= 0.) { /* Use a single-stage buffer */ trans_count = trans_per_R(Rbuf, R_minW_nmos) + trans_per_R(Rbuf, R_minW_pmos); } else { /* Use a multi-stage buffer */ /* Target stage ratio = 4. 1 minimum width buffer, then num_stage bigger * * ones. */ num_stage = nint(log10(R_minW_nmos / Rbuf) / log10(4.)); num_stage = max(num_stage, 1); stage_ratio = pow(R_minW_nmos / Rbuf, 1. / (float)num_stage); Rstage = R_minW_nmos; trans_count = 0.; for(istage = 0; istage <= num_stage; istage++) { trans_count += trans_per_R(Rstage, R_minW_nmos) + trans_per_R(Rstage, R_minW_pmos); Rstage /= stage_ratio; } } return (trans_count);}static floattrans_per_mux(int num_inputs, float trans_sram_bit, float pass_trans_area){/* Returns the number of transistors needed to build a pass transistor mux. * * DOES NOT include input buffers or any output buffer. * * Attempts to select smart multiplexer size depending on number of inputs * * For multiplexers with inputs 4 or less, one level is used, more has two * * levels. */ float ntrans, sram_trans, pass_trans; int num_second_stage_trans; if(num_inputs <= 1) { return (0); } else if(num_inputs == 2) { pass_trans = 2 * pass_trans_area; sram_trans = 1 * trans_sram_bit; } else if(num_inputs <= 4) { /* One-hot encoding */ pass_trans = num_inputs * pass_trans_area; sram_trans = num_inputs * trans_sram_bit; } else { /* This is a large multiplexer so design it using a two-level multiplexer * * + 0.00001 is to make sure exact square roots two don't get rounded down * * to one lower level. */ num_second_stage_trans = floor(sqrt(num_inputs) + 0.00001); pass_trans = (num_inputs + num_second_stage_trans) * pass_trans_area; sram_trans = (ceil((float)num_inputs / num_second_stage_trans - 0.00001) + num_second_stage_trans) * trans_sram_bit; if(num_second_stage_trans == 2) { /* Can use one-bit instead of a two-bit one-hot encoding for the second stage */ /* Eliminates one sram bit counted earlier */ sram_trans -= 1 * trans_sram_bit; } } ntrans = pass_trans + sram_trans; return (ntrans);}static floattrans_per_R(float Rtrans, float R_minW_trans){/* Returns the number of minimum width transistor area equivalents needed * * to make a transistor with Rtrans, given that the resistance of a minimum * * width transistor of this type is R_minW_trans. */ float trans_area; if(Rtrans <= 0.) /* Assume resistances are nonsense -- use min. width */ return (1.); if(Rtrans >= R_minW_trans) return (1.);/* Area = minimum width area (1) + 0.5 for each additional unit of width. * * The 50% factor takes into account the "overlapping" that occurs in * * horizontally-paralleled transistors, and the need for only one spacing, * * not two (i.e. two min W transistors need two spaces; a 2W transistor * * needs only 1). */ trans_area = 0.5 * R_minW_trans / Rtrans + 0.5; return (trans_area);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -