📄 place.c
字号:
} /* placement is done - find mst of all nets. * creating mst for each net; this gives me an ordering of sinks * by which I will direct search (A*) for. */ if(*mst) { for(inet = 0; inet < num_nets; inet++) { assert((*mst)[inet]); free((*mst)[inet]); } free(*mst); } *mst = (t_mst_edge **) my_malloc(sizeof(t_mst_edge *) * num_nets); for(inet = 0; inet < num_nets; inet++) { (*mst)[inet] = get_mst_of_net(inet); } free(x_lookup);}static intcount_connections(){ /*only count non-global connections */ int count, inet; count = 0; for(inet = 0; inet < num_nets; inet++) { if(net[inet].is_global) continue; count += net[inet].num_sinks; } return (count);}static voidcompute_net_pin_index_values(){ /*computes net_pin_index array, this array allows us to quickly */ /*find what pin on the net a block pin corresponds to */ int inet, netpin, blk, iblk, ipin; t_type_ptr type; /*initialize values to OPEN */ for(iblk = 0; iblk < num_blocks; iblk++) { type = block[iblk].type; for(ipin = 0; ipin < type->num_pins; ipin++) { net_pin_index[iblk][ipin] = OPEN; } } for(inet = 0; inet < num_nets; inet++) { if(net[inet].is_global) continue; for(netpin = 0; netpin <= net[inet].num_sinks; netpin++) { blk = net[inet].node_block[netpin]; net_pin_index[blk][net[inet].node_block_pin[netpin]] = netpin; } }}static doubleget_std_dev(int n, double sum_x_squared, double av_x){ /* Returns the standard deviation of data set x. There are n sample points, * * sum_x_squared is the summation over n of x^2 and av_x is the average x. * * All operations are done in double precision, since round off error can be * * a problem in the initial temp. std_dev calculation for big circuits. */ double std_dev; if(n <= 1) std_dev = 0.; else std_dev = (sum_x_squared - n * av_x * av_x) / (double)(n - 1); if(std_dev > 0.) /* Very small variances sometimes round negative */ std_dev = sqrt(std_dev); else std_dev = 0.; return (std_dev);}static voidupdate_rlim(float *rlim, float success_rat){ /* Update the range limited to keep acceptance prob. near 0.44. Use * * a floating point rlim to allow gradual transitions at low temps. */ float upper_lim; *rlim = (*rlim) * (1. - 0.44 + success_rat); upper_lim = max(nx, ny); *rlim = min(*rlim, upper_lim); *rlim = max(*rlim, 1.);}/* Update the temperature according to the annealing schedule selected. */static voidupdate_t(float *t, float std_dev, float rlim, float success_rat, struct s_annealing_sched annealing_sched){ /* float fac; */ if(annealing_sched.type == USER_SCHED) { *t = annealing_sched.alpha_t * (*t); } /* Old standard deviation based stuff is below. This bogs down horribly * for big circuits (alu4 and especially bigkey_mod). */ /* #define LAMBDA .7 */ /* ------------------------------------ */#if 0 else if(std_dev == 0.) { *t = 0.; } else { fac = exp(-LAMBDA * (*t) / std_dev); fac = max(0.5, fac); *t = (*t) * fac; }#endif /* ------------------------------------- */ else { /* AUTO_SCHED */ if(success_rat > 0.96) { *t = (*t) * 0.5; } else if(success_rat > 0.8) { *t = (*t) * 0.9; } else if(success_rat > 0.15 || rlim > 1.) { *t = (*t) * 0.95; } else { *t = (*t) * 0.8; } }}static intexit_crit(float t, float cost, struct s_annealing_sched annealing_sched){ /* Return 1 when the exit criterion is met. */ if(annealing_sched.type == USER_SCHED) { if(t < annealing_sched.exit_t) { return (1); } else { return (0); } } /* Automatic annealing schedule */ if(t < 0.005 * cost / num_nets) { return (1); } else { return (0); }}static floatstarting_t(float *cost_ptr, float *bb_cost_ptr, float *timing_cost_ptr, int place_cost_type, float **old_region_occ_x, float **old_region_occ_y, int num_regions, boolean fixed_pins, struct s_annealing_sched annealing_sched, int max_moves, float rlim, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost_ptr){ /* Finds the starting temperature (hot condition). */ int i, num_accepted, move_lim; double std_dev, av, sum_of_squares; /* Double important to avoid round off */ int *x_lookup; x_lookup = (int *)my_malloc(nx * sizeof(int)); if(annealing_sched.type == USER_SCHED) return (annealing_sched.init_t); move_lim = min(max_moves, num_blocks); num_accepted = 0; av = 0.; sum_of_squares = 0.; /* Try one move per block. Set t high so essentially all accepted. */ for(i = 0; i < move_lim; i++) { if(try_swap(1.e30, cost_ptr, bb_cost_ptr, timing_cost_ptr, rlim, place_cost_type, old_region_occ_x, old_region_occ_y, num_regions, fixed_pins, place_algorithm, timing_tradeoff, inverse_prev_bb_cost, inverse_prev_timing_cost, delay_cost_ptr, x_lookup) == 1) { num_accepted++; av += *cost_ptr; sum_of_squares += *cost_ptr * (*cost_ptr); } } if(num_accepted != 0) av /= num_accepted; else av = 0.; std_dev = get_std_dev(num_accepted, sum_of_squares, av);#ifdef DEBUG if(num_accepted != move_lim) { printf ("Warning: Starting t: %d of %d configurations accepted.\n", num_accepted, move_lim); }#endif#ifdef VERBOSE printf("std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev);#endif free(x_lookup); /* Set the initial temperature to 20 times the standard of deviation */ /* so that the initial temperature adjusts according to the circuit */ return (20. * std_dev);}static inttry_swap(float t, float *cost, float *bb_cost, float *timing_cost, float rlim, int place_cost_type, float **old_region_occ_x, float **old_region_occ_y, int num_regions, boolean fixed_pins, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost, int *x_lookup){ /* Picks some block and moves it to another spot. If this spot is * * occupied, switch the blocks. Assess the change in cost function * * and accept or reject the move. If rejected, return 0. If * * accepted return 1. Pass back the new value of the cost function. * * rlim is the range limiter. */ int b_from, x_to, y_to, z_to, x_from, y_from, z_from, b_to; int i, k, inet, keep_switch, num_of_pins, max_pins_per_fb; int num_nets_affected, bb_index; float delta_c, bb_delta_c, timing_delta_c, delay_delta_c, newcost; static struct s_bb *bb_coord_new = NULL; static struct s_bb *bb_edge_new = NULL; static int *nets_to_update = NULL, *net_block_moved = NULL; max_pins_per_fb = 0; for(i = 0; i < num_types; i++) { max_pins_per_fb = max(max_pins_per_fb, type_descriptors[i].num_pins); } /* Allocate the local bb_coordinate storage, etc. only once. */ if(bb_coord_new == NULL) { bb_coord_new = (struct s_bb *)my_malloc(2 * max_pins_per_fb * sizeof(struct s_bb)); bb_edge_new = (struct s_bb *)my_malloc(2 * max_pins_per_fb * sizeof(struct s_bb)); nets_to_update = (int *)my_malloc(2 * max_pins_per_fb * sizeof(int)); net_block_moved = (int *)my_malloc(2 * max_pins_per_fb * sizeof(int)); } delay_delta_c = 0.0; b_from = my_irand(num_blocks - 1); /* If the pins are fixed we never move them from their initial * * random locations. The code below could be made more efficient * * by using the fact that pins appear first in the block list, * * but this shouldn't cause any significant slowdown and won't be * * broken if I ever change the parser so that the pins aren't * * necessarily at the start of the block list. */ if(fixed_pins == TRUE) { while(block[b_from].type == IO_TYPE) { b_from = my_irand(num_blocks - 1); } } x_from = block[b_from].x; y_from = block[b_from].y; z_from = block[b_from].z; if(!find_to (x_from, y_from, block[b_from].type, rlim, x_lookup, &x_to, &y_to)) return FALSE; /* Make the switch in order to make computing the new bounding * * box simpler. If the cost increase is too high, switch them * * back. (block data structures switched, clbs not switched * * until success of move is determined.) */ z_to = 0; if(grid[x_to][y_to].type->capacity > 1) { z_to = my_irand(grid[x_to][y_to].type->capacity - 1); } if(grid[x_to][y_to].blocks[z_to] == EMPTY) { /* Moving to an empty location */ b_to = EMPTY; block[b_from].x = x_to; block[b_from].y = y_to; block[b_from].z = z_to; } else { /* Swapping two blocks */ b_to = grid[x_to][y_to].blocks[z_to]; block[b_to].x = x_from; block[b_to].y = y_from; block[b_to].z = z_from; block[b_from].x = x_to; block[b_from].y = y_to; block[b_from].z = z_to; } /* Now update the cost function. May have to do major optimizations * * here later. */ /* I'm using negative values of temp_net_cost as a flag, so DO NOT * * use cost functions that can go negative. */ delta_c = 0; /* Change in cost due to this swap. */ bb_delta_c = 0; timing_delta_c = 0; num_of_pins = block[b_from].type->num_pins; num_nets_affected = find_affected_nets(nets_to_update, net_block_moved, b_from, b_to, num_of_pins); if(place_cost_type == NONLINEAR_CONG) { save_region_occ(old_region_occ_x, old_region_occ_y, num_regions); } bb_index = 0; /* Index of new bounding box. */ for(k = 0; k < num_nets_affected; k++) { inet = nets_to_update[k]; /* If we swapped two blocks connected to the same net, its bounding box * * doesn't change. */ if(net_block_moved[k] == FROM_AND_TO) continue; if(net[inet].num_sinks < SMALL_NET) { get_non_updateable_bb(inet, &bb_coord_new[bb_index]); } else { if(net_block_moved[k] == FROM) update_bb(inet, &bb_coord_new[bb_index], &bb_edge_new[bb_index], x_from, y_from, x_to, y_to); else update_bb(inet, &bb_coord_new[bb_index], &bb_edge_new[bb_index], x_to, y_to, x_from, y_from); } if(place_cost_type != NONLINEAR_CONG) { temp_net_cost[inet] = get_net_cost(inet, &bb_coord_new[bb_index]); bb_delta_c += temp_net_cost[inet] - net_cost[inet]; } else { /* Rip up, then replace with new bb. */ update_region_occ(inet, &bb_coords[inet], -1, num_regions); update_region_occ(inet, &bb_coord_new[bb_index], 1, num_regions); } bb_index++; } if(place_cost_type == NONLINEAR_CONG) { newcost = nonlinear_cong_cost(num_regions); bb_delta_c = newcost - *bb_cost; } if(place_algorithm == NET_TIMING_DRIVEN_PLACE ||
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -