cg_print.c
来自「基于4个mips核的noc设计」· C语言 代码 · 共 1,268 行 · 第 1/3 页
C
1,268 行
? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s " : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ", "", "", "", "", arc->count, ""); print_name (child); printf ("\n"); } else { /* Regular child of parent. */ printf (bsd_style_output ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu " : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ", "", "", arc->time / hz, arc->child_time / hz, arc->count, child->cg.cyc.head->ncalls); print_name (child); printf ("\n"); } }}static voidDEFUN (print_line, (np), Sym * np){ char buf[BUFSIZ]; sprintf (buf, "[%d]", np->cg.index); printf (bsd_style_output ? "%-6.6s %5.1f %7.2f %11.2f" : "%-6.6s %5.1f %7.2f %7.2f", buf, 100 * (np->cg.prop.self + np->cg.prop.child) / print_time, np->cg.prop.self / hz, np->cg.prop.child / hz); if ((np->ncalls + np->cg.self_calls) != 0) { printf (" %7lu", np->ncalls); if (np->cg.self_calls != 0) printf ("+%-7lu ", np->cg.self_calls); else printf (" %7.7s ", ""); } else { printf (" %7.7s %7.7s ", "", ""); } print_name (np); printf ("\n");}/* Print dynamic call graph. */voidDEFUN (cg_print, (timesortsym), Sym ** timesortsym){ unsigned int index; Sym *parent; if (print_descriptions && bsd_style_output) bsd_callg_blurb (stdout); print_header (); for (index = 0; index < symtab.len + num_cycles; ++index) { parent = timesortsym[index]; if ((ignore_zeros && parent->ncalls == 0 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0 && parent->cg.prop.child == 0) || !parent->cg.print_flag || (line_granularity && ! parent->is_func)) continue; if (!parent->name && parent->cg.cyc.num != 0) { /* Cycle header. */ print_cycle (parent); print_members (parent); } else { print_parents (parent); print_line (parent); print_children (parent); } if (bsd_style_output) printf ("\n"); printf ("-----------------------------------------------\n"); if (bsd_style_output) printf ("\n"); } free (timesortsym); if (print_descriptions && !bsd_style_output) fsf_callg_blurb (stdout);}static intDEFUN (cmp_name, (left, right), const PTR left AND const PTR right){ const Sym **npp1 = (const Sym **) left; const Sym **npp2 = (const Sym **) right; return strcmp ((*npp1)->name, (*npp2)->name);}voidDEFUN_VOID (cg_print_index){ unsigned int index; unsigned int nnames, todo, i, j; int col, starting_col; Sym **name_sorted_syms, *sym; const char *filename; char buf[20]; int column_width = (output_width - 1) / 3; /* Don't write in last col! */ /* Now, sort regular function name alphabetically to create an index. */ name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *)); for (index = 0, nnames = 0; index < symtab.len; index++) { if (ignore_zeros && symtab.base[index].ncalls == 0 && symtab.base[index].hist.time == 0) continue; name_sorted_syms[nnames++] = &symtab.base[index]; } qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name); for (index = 1, todo = nnames; index <= num_cycles; index++) name_sorted_syms[todo++] = &cycle_header[index]; printf ("\f\n"); printf (_("Index by function name\n\n")); index = (todo + 2) / 3; for (i = 0; i < index; i++) { col = 0; starting_col = 0; for (j = i; j < todo; j += index) { sym = name_sorted_syms[j]; if (sym->cg.print_flag) sprintf (buf, "[%d]", sym->cg.index); else sprintf (buf, "(%d)", sym->cg.index); if (j < nnames) { if (bsd_style_output) { printf ("%6.6s %-19.19s", buf, sym->name); } else { col += strlen (buf); for (; col < starting_col + 5; ++col) putchar (' '); printf (" %s ", buf); col += print_name_only (sym); if (!line_granularity && sym->is_static && sym->file) { filename = sym->file->name; if (!print_path) { filename = strrchr (filename, '/'); if (filename) ++filename; else filename = sym->file->name; } printf (" (%s)", filename); col += strlen (filename) + 3; } } } else { if (bsd_style_output) { printf ("%6.6s ", buf); sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num); printf ("%-19.19s", buf); } else { col += strlen (buf); for (; col < starting_col + 5; ++col) putchar (' '); printf (" %s ", buf); sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num); printf ("%s", buf); col += strlen (buf); } } starting_col += column_width; } printf ("\n"); } free (name_sorted_syms);}/* Compare two arcs based on their usage counts. We want to sort in descending order. */static intDEFUN (cmp_arc_count, (left, right), const PTR left AND const PTR right){ const Arc **npp1 = (const Arc **) left; const Arc **npp2 = (const Arc **) right; if ((*npp1)->count > (*npp2)->count) return -1; else if ((*npp1)->count < (*npp2)->count) return 1; else return 0;}/* Compare two funtions based on their usage counts. We want to sort in descending order. */static intDEFUN (cmp_fun_nuses, (left, right), const PTR left AND const PTR right){ const Sym **npp1 = (const Sym **) left; const Sym **npp2 = (const Sym **) right; if ((*npp1)->nuses > (*npp2)->nuses) return -1; else if ((*npp1)->nuses < (*npp2)->nuses) return 1; else return 0;}/* Print a suggested function ordering based on the profiling data. We perform 4 major steps when ordering functions: * Group unused functions together and place them at the end of the function order. * Search the highest use arcs (those which account for 90% of the total arc count) for functions which have several parents. Group those with the most call sites together (currently the top 1.25% which have at least five different call sites). These are emitted at the start of the function order. * Use a greedy placement algorithm to place functions which occur in the top 99% of the arcs in the profile. Some provisions are made to handle high usage arcs where the parent and/or child has already been placed. * Run the same greedy placement algorithm on the remaining arcs to place the leftover functions. The various "magic numbers" should (one day) be tuneable by command line options. They were arrived at by benchmarking a few applications with various values to see which values produced better overall function orderings. Of course, profiling errors, machine limitations (PA long calls), and poor cutoff values for the placement algorithm may limit the usefullness of the resulting function order. Improvements would be greatly appreciated. Suggestions: * Place the functions with many callers near the middle of the list to reduce long calls. * Propagate arc usage changes as functions are placed. Ie if func1 and func2 are placed together, arcs to/from those arcs to the same parent/child should be combined, then resort the arcs to choose the next one. * Implement some global positioning algorithm to place the chains made by the greedy local positioning algorithm. Probably by examining arcs which haven't been placed yet to tie two chains together. * Take a function's size and time into account in the algorithm; size in particular is important on the PA (long calls). Placing many small functions onto their own page may be wise. * Use better profiling information; many published algorithms are based on call sequences through time, rather than just arc counts. * Prodecure cloning could improve performance when a small number of arcs account for most of the calls to a particular function. * Use relocation information to avoid moving unused functions completely out of the code stream; this would avoid severe lossage when the profile data bears little resemblance to actual runs. * Propagation of arc usages should also improve .o link line ordering which shares the same arc placement algorithm with the function ordering code (in fact it is a degenerate case of function ordering). */voidDEFUN_VOID (cg_print_function_ordering){ unsigned long index, used, unused, scratch_index; unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;#ifdef __GNUC__ unsigned long long total_arcs, tmp_arcs_count;#else unsigned long total_arcs, tmp_arcs_count;#endif Sym **unused_syms, **used_syms, **scratch_syms; Arc **unplaced_arcs, **high_arcs, **scratch_arcs; index = 0; used = 0; unused = 0; scratch_index = 0; unplaced_arc_count = 0; high_arc_count = 0; scratch_arc_count = 0; /* First group all the unused functions together. */ unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *)); /* Walk through all the functions; mark those which are never called as placed (we'll emit them as a group later). */ for (index = 0, used = 0, unused = 0; index < symtab.len; index++) { if (symtab.base[index].ncalls == 0) { /* Filter out gprof generated names. */ if (strcmp (symtab.base[index].name, "<locore>") && strcmp (symtab.base[index].name, "<hicore>")) { unused_syms[unused++] = &symtab.base[index]; symtab.base[index].has_been_placed = 1; } } else { used_syms[used++] = &symtab.base[index]; symtab.base[index].has_been_placed = 0; symtab.base[index].next = 0; symtab.base[index].prev = 0; symtab.base[index].nuses = 0; } } /* Sort the arcs from most used to least used. */ qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count); /* Compute the total arc count. Also mark arcs as unplaced. Note we don't compensate for overflow if that happens! Overflow is much less likely when this file is compiled with GCC as it can double-wide integers via long long. */ total_arcs = 0; for (index = 0; index < numarcs; index++) { total_arcs += arcs[index]->count; arcs[index]->has_been_placed = 0; } /* We want to pull out those functions which are referenced by many highly used arcs and emit them as a group. This could probably use some tuning. */ tmp_arcs_count = 0; for (index = 0; index < numarcs; index++) { tmp_arcs_count += arcs[index]->count; /* Count how many times each parent and child are used up to our threshhold of arcs (90%). */ if ((double)tmp_arcs_count / (double)total_arcs > 0.90) break; arcs[index]->child->nuses++; } /* Now sort a temporary symbol table based on the number of times each function was used in the highest used arcs. */ memcpy (scratch_syms, used_syms, used * sizeof (Sym *)); qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses); /* Now pick out those symbols we're going to emit as a group. We take up to 1.25% of the used symbols. */ for (index = 0; index < used / 80; index++) { Sym *sym = scratch_syms[index];
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?