📄 cluster.c
字号:
connection_driven); if (num_pins_of_net_in_cluster[inet] == 1) { (*clocks_used)++; } else if (num_pins_of_net_in_cluster[inet] == 2 && net_output_in_cluster[inet]) { (*clocks_used)++; }/* Note: unlike inputs, I'm currently assuming there is no internal * * connection in a cluster from LUT outputs to clock inputs. */ } update_total_gain(alpha, timing_driven, connection_driven);}/*****************************************/static boolean inputs_and_clocks_feasible (int iblk, int inputs_avail, int clocks_avail, int lut_size, boolean *is_clock) {/* Checks if adding iblk to the currently open cluster would violate * * the cluster input and clock pin limitations. Returns TRUE if * * iblk can be added to the cluster, FALSE otherwise. */ int ipin, inet, output_net;/* Output first. */ output_net = block[iblk].nets[0]; if (num_pins_of_net_in_cluster[output_net] != 0 && !is_clock[output_net]) inputs_avail++;/* Inputs. */ for (ipin=1;ipin<=lut_size;ipin++) { inet = block[iblk].nets[ipin]; if (inet != OPEN) { if (num_pins_of_net_in_cluster[inet] == 0 && inet != output_net) inputs_avail--; } } if (clocks_feasible (iblk, inputs_avail, clocks_avail, lut_size, is_clock) == FALSE) return (FALSE); if (inputs_avail >= 0) return (TRUE); else return (FALSE);}/*****************************************/static int get_highest_gain_block (int inputs_avail, int clocks_avail, int lut_size, boolean *is_clock, boolean (*is_feasible) ( int iblk, int inputs_avail, int clocks_avail, int lut_size, boolean *is_clock), enum e_gain_type gain_mode) {/* This routine finds the block with the highest gain that is * * not currently in a cluster and satisfies the feasibility * * function passed in as is_feasible. If there are no feasible * * blocks it returns NO_CLUSTER. */ int best_block, i, iblk; int best_hillgain; float best_gain; best_gain = (float)NOT_VALID + 1.0; best_hillgain=NOT_VALID+1; best_block = NO_CLUSTER;/* Divide into two cases for speed only. *//* Typical case: not too many blocks have been marked. */ if (num_marked_blocks < num_blocks / MARKED_FRAC) { for (i=0;i<num_marked_blocks;i++) { iblk = marked_blocks[i]; if (cluster_of_block[iblk] == NO_CLUSTER) { if (gain_mode != HILL_CLIMBING){ if (gain[iblk] > best_gain) { if (is_feasible (iblk, inputs_avail, clocks_avail, lut_size, is_clock)) { best_gain = gain[iblk]; best_block = iblk; } } } else{ /*hill climbing*/ if (hillgain[iblk] > best_hillgain) { if (is_feasible (iblk, inputs_avail, clocks_avail, lut_size, is_clock)) { best_hillgain = hillgain[iblk]; best_block = iblk; } } } } } } else { /* Some high fanout nets marked lots of blocks. */ for (iblk=first_clusterable_block;iblk<num_blocks;iblk++) { if (cluster_of_block[iblk] == NO_CLUSTER) { if (gain_mode != HILL_CLIMBING){ if (gain[iblk] > best_gain) { if (is_feasible (iblk, inputs_avail, clocks_avail, lut_size, is_clock)) { best_gain = gain[iblk]; best_block = iblk; } } } else{ /*hill climbing*/ if (hillgain[iblk] > best_hillgain) { if (is_feasible (iblk, inputs_avail, clocks_avail, lut_size, is_clock)) { best_hillgain = hillgain[iblk]; best_block = iblk; } } } } } } return (best_block);}/*****************************************/static int get_lut_for_cluster (int inputs_avail, int clocks_avail, int cluster_size, int lut_size, int luts_used, boolean *is_clock, boolean allow_unrelated_clustering) {/* Finds the LUT with the the greatest gain that satisifies the * * input, clock and capacity constraints of a cluster that are * * passed in. If no suitable block is found it returns NO_CLUSTER. */ int best_block; if (luts_used == cluster_size) return (NO_CLUSTER); best_block = get_highest_gain_block (inputs_avail, clocks_avail, lut_size, is_clock, inputs_and_clocks_feasible, NOT_HILL_CLIMBING);/* If no blocks have any gain to the current cluster, the code above * * will not find anything. However, another block with no inputs in * * common with the cluster may still be inserted into the cluster. */ if (allow_unrelated_clustering) if (best_block == NO_CLUSTER) best_block = get_free_block_with_most_ext_inputs (lut_size, inputs_avail, clocks_avail); return (best_block);}/*****************************************/static int get_free_block_with_fewest_ext_inputs (int lut_size, int clocks_avail) {/* Used by the hill climbing code. Finds the LUT that has the * * fewest used inputs and is compatible with the cluster clocks. * * Returns NO_CLUSTER if no suitable block is found. */ int iblk, ext_inps; for (ext_inps=0;ext_inps<=lut_size;ext_inps++) { iblk = get_block_by_num_ext_inputs (ext_inps, lut_size, clocks_avail, LEAVE_CLUSTERED); if (iblk != NO_CLUSTER) return (iblk); } return (NO_CLUSTER);}/*****************************************/static int get_most_feasible_block (int inputs_avail, int clocks_avail, int lut_size) {/* Finds the block that is closest to being feasible for the hill * * climbing step. If no block can ever become feasible (all blocks * * already clustered or have incompatible clocking) the routine * * returns NO_CLUSTER. Otherwise, it returns the best block to add * * to the cluster and passes back the new number of inputs available * * and clocks available through the pointers. */ int best_connected_block, lowest_ext_input_block; best_connected_block = get_highest_gain_block (inputs_avail, clocks_avail, lut_size, NULL, clocks_feasible, HILL_CLIMBING);/* It could be that there are no clock feasible blocks with any * * connection to the current cluster, or that there are blocks * * that aren't connected to the cluster that have so few inputs * * they are the most feasible blocks. Hence code below. */ lowest_ext_input_block = get_free_block_with_fewest_ext_inputs ( lut_size, clocks_avail); if (best_connected_block == NO_CLUSTER) return (lowest_ext_input_block); else if (lowest_ext_input_block == NO_CLUSTER) return (best_connected_block); else { if (hillgain[best_connected_block] >= -num_ext_inputs( lowest_ext_input_block, lut_size)) return (best_connected_block); else return (lowest_ext_input_block); }}/*****************************************/static void hill_climbing_add_to_cluster (int new_blk, int cluster_index, int lut_size, boolean *is_clock, int *clocks_avail, boolean timing_driven, boolean connection_driven) {/* Adds new_blk to the currently open cluster. New_blk will be clock * * feasible but may not be input-feasible. */ int ipin, inet; cluster_of_block[new_blk] = cluster_index; inet = block[new_blk].nets[0]; /* Output pin first. */ if (!is_clock[inet]) mark_and_update_partial_gain (inet, GAIN, lut_size, new_blk, 0, is_clock, timing_driven, connection_driven); else mark_and_update_partial_gain (inet, NO_GAIN, lut_size, new_blk, 0, is_clock, timing_driven, connection_driven); net_output_in_cluster[inet] = TRUE; for (ipin=1;ipin<=lut_size;ipin++) { /* LUT input pins. */ inet = block[new_blk].nets[ipin]; if (inet != OPEN) mark_and_update_partial_gain (inet, GAIN, lut_size, new_blk, ipin, is_clock, timing_driven, connection_driven); }/* Note: The code below ONLY WORKS when nets that go to clock inputs * * NEVER go to the input of a LUT. It doesn't really make electrical * * sense for that to happen, and I check this in the check_clocks * * function. Don't disable that sanity check. */ inet = block[new_blk].nets[lut_size+1]; /* Clock input pin. */ if (inet != OPEN) { mark_and_update_partial_gain (inet, NO_GAIN, lut_size, new_blk, lut_size+1, is_clock, timing_driven, connection_driven); if (num_pins_of_net_in_cluster[inet] == 1) { (*clocks_avail)--; } else if (num_pins_of_net_in_cluster[inet] == 2 && net_output_in_cluster[inet]) { (*clocks_avail)--; }/* Note: unlike inputs, I'm currently assuming there is no internal * * connection in a cluster from LUT outputs to clock inputs. */ }}/*****************************************/static int hill_climbing (int initial_inputs_avail, int clocks_avail, int cluster_size, int lut_size, int initial_luts_used, int cluster_index, boolean *is_clock, boolean global_clocks, int *next_empty_location_in_cluster, int *block_in_cluster, boolean timing_driven, boolean connection_driven) {/* This routine is called when we find that no more luts can be packed * * into the open cluster. If this is because the cluster is full, * * we immediately exit since there's nothing to do. If there is still * * room in the cluster, however, we may be able to pack more luts in. * * This routine then looks for LUTs which meet the clocking * * constraints of the open cluster which will produce the smallest * * increase in the number of inputs used. Because adding a LUT can * * *reduce* the number of inputs required by a cluster (when all * * inputs are already in the cluster, and the output of the lut * * already is an input to the cluster), an infeasible cluster could * * become feasible after more luts are added to it. Hence the hill * * climbing name. Note also that the attraction function is different * * in this function than in the first pass clustering routine -- here * * we're just looking for luts that will fit, not ones with the most * * pins shared. The routine keeps track after each move of whether or * * not the resulting cluster is feasible. Once the cluster is full * * the cluster state is rolled back to its last feasible configuration.* * If this routine didn't accomplish anything that configuration will * * be the cluster config. when this routine was called. *//* I have to keep track of the key cluster information after each * * block is added so that I can later "roll back" the cluster to * * the last feasible configuration. *//*returns how many blocks added to the cluster*/ static int *inputs_avail = NULL; /* 0..cluster_size */ static int *block_added = NULL; /* Which block was added? */ int luts_used, iblk, i; int num_blocks_added; num_blocks_added = 0;/* Only allocate once. Ugly hack, but what can I do? Can't free the * * memory easily when it's done this way either. */ if (inputs_avail == NULL) { inputs_avail = (int *) my_malloc (cluster_size * sizeof(int)); block_added = (int *) my_malloc (cluster_size * sizeof(int)); } if (initial_luts_used == cluster_size) return num_blocks_added; luts_used = initial_luts_used; inputs_avail[luts_used-1] = initial_inputs_avail; while (luts_used != cluster_size) { iblk = get_most_feasible_block (inputs_avail[luts_used-1], clocks_avail, lut_size); if (iblk == NO_CLUSTER) break; luts_used++; inputs_avail[luts_used-1] = inputs_avail[luts_used-2] + hillgain[iblk]; block_added[luts_used-1] = iblk; num_blocks_added++; block_in_cluster[*next_empty_location_in_cluster]=iblk; (*next_empty_location_in_cluster)++; hill_climbing_add_to_cluster (iblk, cluster_index, lut_size, is_clock, &clocks_avail, timing_driven, connection_driven);/* Early exit check. Each LUT added can reduce the number of inputs * * required by at most one, so check if it's hopeless. */ if (inputs_avail[luts_used-1] + cluster_size - luts_used < 0) break; }/* Now roll back to the last feasible clustering. *//* NB: A future enhancement would be to try replicating blocks. When * * a clustering with i blocks isn't feasible below, try finding a * * block which if replicated would reduce the inputs used by one and * * replace the i'th block with it and so on. */ for (i=luts_used-1;i>=initial_luts_used;i--) { if (inputs_avail[i] >= 0){ /* Found feasible solution! */ break; }/* Infeasible. Back up one block. */ (*next_empty_location_in_cluster)--; num_blocks_added--; iblk = block_added[i]; cluster_of_block[iblk] = NO_CLUSTER; block_in_cluster[*next_empty_location_in_cluster]=NO_CLUSTER; } return num_blocks_added;}/*****************************************/static void alloc_and_load_cluster_info (int ***cluster_contents_ptr, int **cluster_occupancy_ptr, int num_clusters, int cluster_size) {/* Allocates and loads the cluster_contents and cluster_occupancy * * arrays. Also checks that all logic blocks have been assigned to * * a legal cluster, no clusters have more than cluster_size logic * * blocks, and that no pads have been put into clusters. */ int **cluster_contents; /* [0..num_clusters-1][0..cluster_size-1] */ int *cluster_occupancy; /* [0..num_clusters-1] */ int iblk, icluster; cluster_contents = (int **) alloc_matrix (0,num_clusters-1,0,cluster_size-1, sizeof(int)); cluster_occupancy = (int *) my_calloc (num_clusters, sizeof (int)); for (iblk=0;iblk<num_blocks;iblk++) { if (block[iblk].type == LUT || block[iblk].type == LATCH || block[iblk].type == LUT_AND_LATCH) { icluster = cluster_of_block[iblk]; if (icluster < 0 || icluster > num_clusters-1) { printf("Error in alloc_and_load_cluster_info: " "cluster number (%d) for block\n"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -