cg_print.c

来自「基于4个mips核的noc设计」· C语言 代码 · 共 1,268 行 · 第 1/3 页

C
1,268
字号
		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",		  "", "", "", "", arc->count, "");	  print_name (child);	  printf ("\n");	}      else	{	  /* Regular child of parent.  */	  printf (bsd_style_output		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",		  "", "",		  arc->time / hz, arc->child_time / hz,		  arc->count, child->cg.cyc.head->ncalls);	  print_name (child);	  printf ("\n");	}    }}static voidDEFUN (print_line, (np), Sym * np){  char buf[BUFSIZ];  sprintf (buf, "[%d]", np->cg.index);  printf (bsd_style_output	  ? "%-6.6s %5.1f %7.2f %11.2f"	  : "%-6.6s %5.1f %7.2f %7.2f", buf,	  100 * (np->cg.prop.self + np->cg.prop.child) / print_time,	  np->cg.prop.self / hz, np->cg.prop.child / hz);  if ((np->ncalls + np->cg.self_calls) != 0)    {      printf (" %7lu", np->ncalls);      if (np->cg.self_calls != 0)	  printf ("+%-7lu ", np->cg.self_calls);      else	  printf (" %7.7s ", "");    }  else    {      printf (" %7.7s %7.7s ", "", "");    }  print_name (np);  printf ("\n");}/* Print dynamic call graph.  */voidDEFUN (cg_print, (timesortsym), Sym ** timesortsym){  unsigned int index;  Sym *parent;  if (print_descriptions && bsd_style_output)    bsd_callg_blurb (stdout);  print_header ();  for (index = 0; index < symtab.len + num_cycles; ++index)    {      parent = timesortsym[index];      if ((ignore_zeros && parent->ncalls == 0	   && parent->cg.self_calls == 0 && parent->cg.prop.self == 0	   && parent->cg.prop.child == 0)	  || !parent->cg.print_flag	  || (line_granularity && ! parent->is_func))	continue;      if (!parent->name && parent->cg.cyc.num != 0)	{	  /* Cycle header.  */	  print_cycle (parent);	  print_members (parent);	}      else	{	  print_parents (parent);	  print_line (parent);	  print_children (parent);	}      if (bsd_style_output)	printf ("\n");      printf ("-----------------------------------------------\n");      if (bsd_style_output)	printf ("\n");    }  free (timesortsym);  if (print_descriptions && !bsd_style_output)    fsf_callg_blurb (stdout);}static intDEFUN (cmp_name, (left, right), const PTR left AND const PTR right){  const Sym **npp1 = (const Sym **) left;  const Sym **npp2 = (const Sym **) right;  return strcmp ((*npp1)->name, (*npp2)->name);}voidDEFUN_VOID (cg_print_index){  unsigned int index;  unsigned int nnames, todo, i, j;  int col, starting_col;  Sym **name_sorted_syms, *sym;  const char *filename;  char buf[20];  int column_width = (output_width - 1) / 3;	/* Don't write in last col!  */  /* Now, sort regular function name     alphabetically to create an index.  */  name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));  for (index = 0, nnames = 0; index < symtab.len; index++)    {      if (ignore_zeros && symtab.base[index].ncalls == 0	  && symtab.base[index].hist.time == 0)	continue;      name_sorted_syms[nnames++] = &symtab.base[index];    }  qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);  for (index = 1, todo = nnames; index <= num_cycles; index++)    name_sorted_syms[todo++] = &cycle_header[index];  printf ("\f\n");  printf (_("Index by function name\n\n"));  index = (todo + 2) / 3;  for (i = 0; i < index; i++)    {      col = 0;      starting_col = 0;      for (j = i; j < todo; j += index)	{	  sym = name_sorted_syms[j];	  if (sym->cg.print_flag)	    sprintf (buf, "[%d]", sym->cg.index);	  else	    sprintf (buf, "(%d)", sym->cg.index);	  if (j < nnames)	    {	      if (bsd_style_output)		{		  printf ("%6.6s %-19.19s", buf, sym->name);		}	      else		{		  col += strlen (buf);		  for (; col < starting_col + 5; ++col)		    putchar (' ');		  printf (" %s ", buf);		  col += print_name_only (sym);		  if (!line_granularity && sym->is_static && sym->file)		    {		      filename = sym->file->name;		      if (!print_path)			{			  filename = strrchr (filename, '/');			  if (filename)			    ++filename;			  else			    filename = sym->file->name;			}		      printf (" (%s)", filename);		      col += strlen (filename) + 3;		    }		}	    }	  else	    {	      if (bsd_style_output)		{		  printf ("%6.6s ", buf);		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);		  printf ("%-19.19s", buf);		}	      else		{		  col += strlen (buf);		  for (; col < starting_col + 5; ++col)		    putchar (' ');		  printf (" %s ", buf);		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);		  printf ("%s", buf);		  col += strlen (buf);		}	    }	  starting_col += column_width;	}      printf ("\n");    }  free (name_sorted_syms);}/* Compare two arcs based on their usage counts.   We want to sort in descending order.  */static intDEFUN (cmp_arc_count, (left, right), const PTR left AND const PTR right){  const Arc **npp1 = (const Arc **) left;  const Arc **npp2 = (const Arc **) right;  if ((*npp1)->count > (*npp2)->count)    return -1;  else if ((*npp1)->count < (*npp2)->count)    return 1;  else    return 0;}/* Compare two funtions based on their usage counts.   We want to sort in descending order.  */static intDEFUN (cmp_fun_nuses, (left, right), const PTR left AND const PTR right){  const Sym **npp1 = (const Sym **) left;  const Sym **npp2 = (const Sym **) right;  if ((*npp1)->nuses > (*npp2)->nuses)    return -1;  else if ((*npp1)->nuses < (*npp2)->nuses)    return 1;  else    return 0;}/* Print a suggested function ordering based on the profiling data.   We perform 4 major steps when ordering functions:	* Group unused functions together and place them at the	end of the function order.	* Search the highest use arcs (those which account for 90% of	the total arc count) for functions which have several parents.	Group those with the most call sites together (currently the	top 1.25% which have at least five different call sites).	These are emitted at the start of the function order.	* Use a greedy placement algorithm to place functions which	occur in the top 99% of the arcs in the profile.  Some provisions	are made to handle high usage arcs where the parent and/or	child has already been placed.	* Run the same greedy placement algorithm on the remaining	arcs to place the leftover functions.   The various "magic numbers" should (one day) be tuneable by command   line options.  They were arrived at by benchmarking a few applications   with various values to see which values produced better overall function   orderings.   Of course, profiling errors, machine limitations (PA long calls), and   poor cutoff values for the placement algorithm may limit the usefullness   of the resulting function order.  Improvements would be greatly appreciated.   Suggestions:	* Place the functions with many callers near the middle of the	list to reduce long calls.	* Propagate arc usage changes as functions are placed.  Ie if	func1 and func2 are placed together, arcs to/from those arcs	to the same parent/child should be combined, then resort the	arcs to choose the next one.	* Implement some global positioning algorithm to place the	chains made by the greedy local positioning algorithm.  Probably	by examining arcs which haven't been placed yet to tie two	chains together.	* Take a function's size and time into account in the algorithm;	size in particular is important on the PA (long calls).  Placing	many small functions onto their own page may be wise.	* Use better profiling information; many published algorithms	are based on call sequences through time, rather than just	arc counts.	* Prodecure cloning could improve performance when a small number	of arcs account for most of the calls to a particular function.	* Use relocation information to avoid moving unused functions	completely out of the code stream; this would avoid severe lossage	when the profile data bears little resemblance to actual runs.	* Propagation of arc usages should also improve .o link line	ordering which shares the same arc placement algorithm with	the function ordering code (in fact it is a degenerate case	of function ordering).  */voidDEFUN_VOID (cg_print_function_ordering){  unsigned long index, used, unused, scratch_index;  unsigned long  unplaced_arc_count, high_arc_count, scratch_arc_count;#ifdef __GNUC__  unsigned long long total_arcs, tmp_arcs_count;#else  unsigned long total_arcs, tmp_arcs_count;#endif  Sym **unused_syms, **used_syms, **scratch_syms;  Arc **unplaced_arcs, **high_arcs, **scratch_arcs;  index = 0;  used = 0;  unused = 0;  scratch_index = 0;  unplaced_arc_count = 0;  high_arc_count = 0;  scratch_arc_count = 0;  /* First group all the unused functions together.  */  unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));  used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));  scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));  high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));  scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));  unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));  /* Walk through all the functions; mark those which are never     called as placed (we'll emit them as a group later).  */  for (index = 0, used = 0, unused = 0; index < symtab.len; index++)    {      if (symtab.base[index].ncalls == 0)	{	  /* Filter out gprof generated names.  */	  if (strcmp (symtab.base[index].name, "<locore>")	      && strcmp (symtab.base[index].name, "<hicore>"))	    {	      unused_syms[unused++] = &symtab.base[index];	      symtab.base[index].has_been_placed = 1;	    }	}      else	{	  used_syms[used++] = &symtab.base[index];	  symtab.base[index].has_been_placed = 0;	  symtab.base[index].next = 0;	  symtab.base[index].prev = 0;	  symtab.base[index].nuses = 0;	}    }  /* Sort the arcs from most used to least used.  */  qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);  /* Compute the total arc count.  Also mark arcs as unplaced.     Note we don't compensate for overflow if that happens!     Overflow is much less likely when this file is compiled     with GCC as it can double-wide integers via long long.  */  total_arcs = 0;  for (index = 0; index < numarcs; index++)    {      total_arcs += arcs[index]->count;      arcs[index]->has_been_placed = 0;    }  /* We want to pull out those functions which are referenced     by many highly used arcs and emit them as a group.  This     could probably use some tuning.  */  tmp_arcs_count = 0;  for (index = 0; index < numarcs; index++)    {      tmp_arcs_count += arcs[index]->count;      /* Count how many times each parent and child are used up	 to our threshhold of arcs (90%).  */      if ((double)tmp_arcs_count / (double)total_arcs > 0.90)	break;      arcs[index]->child->nuses++;    }  /* Now sort a temporary symbol table based on the number of     times each function was used in the highest used arcs.  */  memcpy (scratch_syms, used_syms, used * sizeof (Sym *));  qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);  /* Now pick out those symbols we're going to emit as     a group.  We take up to 1.25% of the used symbols.  */  for (index = 0; index < used / 80; index++)    {      Sym *sym = scratch_syms[index];

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?