📄 place.c
字号:
/*#include <stdlib.h> */#include <stdio.h>#include <math.h>#include <assert.h>#include "util.h"#include "vpr_types.h"#include "globals.h"#include "mst.h"#include "place.h"#include "read_place.h"#include "draw.h"#include "place_and_route.h"#include "net_delay.h"#include "path_delay.h"#include "timing_place_lookup.h"#include "timing_place.h"#include "place_stats.h"/************** Types and defines local to place.c ***************************/#define SMALL_NET 4 /* Cut off for incremental bounding box updates. *//* 4 is fastest -- I checked. *//* For comp_cost. NORMAL means use the method that generates updateable * * bounding boxes for speed. CHECK means compute all bounding boxes from * * scratch using a very simple routine to allow checks of the other * * costs. */enum cost_methods{ NORMAL, CHECK };#define FROM 0 /* What block connected to a net has moved? */#define TO 1#define FROM_AND_TO 2#define ERROR_TOL .001#define MAX_MOVES_BEFORE_RECOMPUTE 100000/********************** Variables local to place.c ***************************//* [0..num_nets-1] 0 if net never connects to the same block more than * * once, otherwise it gives the number of duplicate connections. */static int *duplicate_pins;/* [0..num_nets-1][0..num_unique_blocks-1] Contains a list of blocks with * * no duplicated blocks for ONLY those nets that had duplicates. */static int **unique_pin_list;/* Cost of a net, and a temporary cost of a net used during move assessment. */static float *net_cost = NULL, *temp_net_cost = NULL; /* [0..num_nets-1] *//* [0..num_nets-1][1..num_pins-1]. What is the value of the timing *//* driven portion of the cost function. These arrays will be set to *//* (criticality * delay) for each point to point connection. */static float **point_to_point_timing_cost = NULL;static float **temp_point_to_point_timing_cost = NULL;/* [0..num_nets-1][1..num_pins-1]. What is the value of the delay *//* for each connection in the circuit */static float **point_to_point_delay_cost = NULL;static float **temp_point_to_point_delay_cost = NULL;/* [0..num_blocks-1][0..pins_per_clb-1]. Indicates which pin on the net *//* this block corresponds to, this is only required during timing-driven *//* placement. It is used to allow us to update individual connections on *//* each net */static int **net_pin_index = NULL;/* [0..num_nets-1]. Store the bounding box coordinates and the number of * * blocks on each of a net's bounding box (to allow efficient updates), * * respectively. */static struct s_bb *bb_coords = NULL, *bb_num_on_edges = NULL;/* Stores the maximum and expected occupancies, plus the cost, of each * * region in the placement. Used only by the NONLINEAR_CONG cost * * function. [0..num_region-1][0..num_region-1]. Place_region_x and * * y give the situation for the x and y directed channels, respectively. */static struct s_place_region **place_region_x, **place_region_y;/* Used only with nonlinear congestion. [0..num_regions]. */static float *place_region_bounds_x, *place_region_bounds_y;/* The arrays below are used to precompute the inverse of the average * * number of tracks per channel between [subhigh] and [sublow]. Access * * them as chan?_place_cost_fac[subhigh][sublow]. They are used to * * speed up the computation of the cost function that takes the length * * of the net bounding box in each dimension, divided by the average * * number of tracks in that direction; for other cost functions they * * will never be used. */static float **chanx_place_cost_fac, **chany_place_cost_fac;/* Expected crossing counts for nets with different #'s of pins. From * * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me). */static const float cross_count[50] = { /* [0..49] */ 1.0, 1.0, 1.0, 1.0828, 1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974, 1.5455, 1.5937, 1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924, 1.9288, 1.9652, 2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016, 2.2334, 2.2646, 2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772, 2.5064, 2.5356, 2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671, 2.7933};/********************* Static subroutines local to place.c *******************/static void alloc_and_load_unique_pin_list(void);static void free_unique_pin_list(void);static void alloc_place_regions(int num_regions);static void load_place_regions(int num_regions);static void free_place_regions(int num_regions);static void alloc_and_load_placement_structs(int place_cost_type, int num_regions, float place_cost_exp, float ***old_region_occ_x, float ***old_region_occ_y, struct s_placer_opts placer_opts);static void free_placement_structs(int place_cost_type, int num_regions, float **old_region_occ_x, float **old_region_occ_y, struct s_placer_opts placer_opts);static void alloc_and_load_for_fast_cost_update(float place_cost_exp);static void initial_placement(enum e_pad_loc_type pad_loc_type, char *pad_loc_file);static float comp_bb_cost(int method, int place_cost_type, int num_regions);static int try_swap(float t, float *cost, float *bb_cost, float *timing_cost, float rlim, int place_cost_type, float **old_region_occ_x, float **old_region_occ_y, int num_regions, boolean fixed_pins, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost, int *x_lookup);static void check_place(float bb_cost, float timing_cost, int place_cost_type, int num_regions, enum e_place_algorithm place_algorithm, float delay_cost);static float starting_t(float *cost_ptr, float *bb_cost_ptr, float *timing_cost_ptr, int place_cost_type, float **old_region_occ_x, float **old_region_occ_y, int num_regions, boolean fixed_pins, struct s_annealing_sched annealing_sched, int max_moves, float rlim, enum e_place_algorithm place_algorithm, float timing_tradeoff, float inverse_prev_bb_cost, float inverse_prev_timing_cost, float *delay_cost_ptr);static void update_t(float *t, float std_dev, float rlim, float success_rat, struct s_annealing_sched annealing_sched);static void update_rlim(float *rlim, float success_rat);static int exit_crit(float t, float cost, struct s_annealing_sched annealing_sched);static int count_connections(void);static void compute_net_pin_index_values(void);static double get_std_dev(int n, double sum_x_squared, double av_x);static void free_fast_cost_update_structs(void);static float recompute_bb_cost(int place_cost_type, int num_regions);static float comp_td_point_to_point_delay(int inet, int ipin);static void update_td_cost(int b_from, int b_to, int num_of_pins);static void comp_delta_td_cost(int b_from, int b_to, int num_of_pins, float *delta_timing, float *delta_delay);static void comp_td_costs(float *timing_cost, float *connection_delay_sum);static int assess_swap(float delta_c, float t);static boolean find_to(int x_from, int y_from, t_type_ptr type, float rlim, int *x_lookup, int *x_to, int *y_to);static void get_non_updateable_bb(int inet, struct s_bb *bb_coord_new);static void update_bb(int inet, struct s_bb *bb_coord_new, struct s_bb *bb_edge_new, int xold, int yold, int xnew, int ynew);static int find_affected_nets(int *nets_to_update, int *net_block_moved, int b_from, int b_to, int num_of_pins);static float get_net_cost(int inet, struct s_bb *bb_ptr);static float nonlinear_cong_cost(int num_regions);static void update_region_occ(int inet, struct s_bb *coords, int add_or_sub, int num_regions);static void save_region_occ(float **old_region_occ_x, float **old_region_occ_y, int num_regions);static void restore_region_occ(float **old_region_occ_x, float **old_region_occ_y, int num_regions);static void get_bb_from_scratch(int inet, struct s_bb *coords, struct s_bb *num_on_edges);static double get_net_wirelength_estimate(int inet, struct s_bb *bbptr);/*****************************************************************************//* RESEARCH TODO: Bounding Box and rlim need to be redone for heterogeneous to prevent a QoR penalty */voidtry_place(struct s_placer_opts placer_opts, struct s_annealing_sched annealing_sched, t_chan_width_dist chan_width_dist, struct s_router_opts router_opts, struct s_det_routing_arch det_routing_arch, t_segment_inf * segment_inf, t_timing_inf timing_inf, t_subblock_data * subblock_data_ptr, t_mst_edge *** mst){ /* Does almost all the work of placing a circuit. Width_fac gives the * * width of the widest channel. Place_cost_exp says what exponent the * * width should be taken to when calculating costs. This allows a * * greater bias for anisotropic architectures. Place_cost_type * * determines which cost function is used. num_regions is used only * * the place_cost_type is NONLINEAR_CONG. */ int tot_iter, inner_iter, success_sum; int move_lim, moves_since_cost_recompute, width_fac; float t, success_rat, rlim, d_max, est_crit; float cost, timing_cost, bb_cost, new_bb_cost, new_timing_cost; float delay_cost, new_delay_cost, place_delay_value; float inverse_prev_bb_cost, inverse_prev_timing_cost; float oldt; double av_cost, av_bb_cost, av_timing_cost, av_delay_cost, sum_of_squares, std_dev; float **old_region_occ_x, **old_region_occ_y; char msg[BUFSIZE]; boolean fixed_pins; /* Can pads move or not? */ int num_connections; int inet, ipin, outer_crit_iter_count, inner_crit_iter_count, inner_recompute_limit; float **net_slack, **net_delay; float crit_exponent; float first_rlim, final_rlim, inverse_delta_rlim; float **remember_net_delay_original_ptr; /*used to free net_delay if it is re-assigned */ int *x_lookup; /* Used to quickly determine valid swap columns */ /* Allocated here because it goes into timing critical code where each memory allocation is expensive */ x_lookup = my_malloc(nx * sizeof(int)); remember_net_delay_original_ptr = NULL; /*prevents compiler warning */ if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || placer_opts.enable_timing_computations) { /*do this before the initial placement to avoid messing up the initial placement */ alloc_lookups_and_criticalities(chan_width_dist, router_opts, det_routing_arch, segment_inf, timing_inf, *subblock_data_ptr, &net_delay, &net_slack); remember_net_delay_original_ptr = net_delay; /*#define PRINT_LOWER_BOUND */#ifdef PRINT_LOWER_BOUND /*print the crit_path, assuming delay between blocks that are* *block_dist apart*/ if(placer_opts.block_dist <= nx) place_delay_value = delta_clb_to_clb[placer_opts.block_dist][0]; else if(placer_opts.block_dist <= ny) place_delay_value = delta_clb_to_clb[0][placer_opts.block_dist]; else place_delay_value = delta_clb_to_clb[nx][ny]; printf("\nLower bound assuming delay of %g\n", place_delay_value); load_constant_net_delay(net_delay, place_delay_value); load_timing_graph_net_delays(net_delay); d_max = load_net_slack(net_slack, 0);#ifdef CREATE_ECHO_FILES print_critical_path("Placement_Lower_Bound.echo"); print_sink_delays("Placement_Lower_Bound_Sink_Delays.echo");#endif /* CREATE_ECHO_FILES */ /*also print sink delays assuming 0 delay between blocks, * this tells us how much logic delay is on each path */ load_constant_net_delay(net_delay, 0); load_timing_graph_net_delays(net_delay); d_max = load_net_slack(net_slack, 0);#ifdef CREATE_ECHO_FILES print_sink_delays("Placement_Logic_Sink_Delays.echo");#endif /* CREATE_ECHO_FILES */#endif } width_fac = placer_opts.place_chan_width; if(placer_opts.pad_loc_type == FREE) fixed_pins = FALSE; else fixed_pins = TRUE; init_chan(width_fac, chan_width_dist); alloc_and_load_placement_structs(placer_opts.place_cost_type, placer_opts.num_regions, placer_opts.place_cost_exp, &old_region_occ_x, &old_region_occ_y, placer_opts); initial_placement(placer_opts.pad_loc_type, placer_opts.pad_loc_file); init_draw_coords((float)width_fac); /* Storing the number of pins on each type of block makes the swap routine * * slightly more efficient. */ /* Gets initial cost and loads bounding boxes. */ if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE || placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type, placer_opts.num_regions); crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ compute_net_pin_index_values(); num_connections = count_connections(); printf ("\nThere are %d point to point connections in this circuit\n\n", num_connections); if(placer_opts.place_algorithm == NET_TIMING_DRIVEN_PLACE) { for(inet = 0; inet < num_nets; inet++) for(ipin = 1; ipin <= net[inet].num_sinks; ipin++) timing_place_crit[inet][ipin] = 0; /*dummy crit values */ comp_td_costs(&timing_cost, &delay_cost); /*first pass gets delay_cost, which is used * in criticality computations in the next call * to comp_td_costs. */ place_delay_value = delay_cost / num_connections; /*used for computing criticalities */ load_constant_net_delay(net_delay, place_delay_value); } else place_delay_value = 0; if(placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { net_delay = point_to_point_delay_cost; /*this keeps net_delay up to date with * * *the same values that the placer is using * * *point_to_point_delay_cost is computed each* * *time that comp_td_costs is called, and is * * *also updated after any swap is accepted */ } load_timing_graph_net_delays(net_delay); d_max = load_net_slack(net_slack, 0); load_criticalities(placer_opts, net_slack, d_max, crit_exponent); outer_crit_iter_count = 1; /*now we can properly compute costs */ comp_td_costs(&timing_cost, &delay_cost); /*also puts proper values into point_to_point_delay_cost */ inverse_prev_timing_cost = 1 / timing_cost; inverse_prev_bb_cost = 1 / bb_cost; cost = 1; /*our new cost function uses normalized values of */ /*bb_cost and timing_cost, the value of cost will be reset */ /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */ } else { /*BOUNDING_BOX_PLACE */ cost = bb_cost = comp_bb_cost(NORMAL, placer_opts.place_cost_type,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -