📄 profiler.c
字号:
/* * Contribution of Mtve. */#include <fcntl.h>#include <unistd.h>#include <stdio.h>#include <errno.h>#include <string.h>#define THREADED 1#if THREADED#include <pthread.h>#endif/* * Call to profiling routine .mcount is automatically inserted by gcc -p. * * However, standard .mcount from (g)libc is not working well for me * with optimized (-O3 -fomit-frame-pointer) threaded code, * at least because it doesn't save all registers. * * So here is another square wheel. It works only on IA32 (i386). * * Theory: * .mcount is called like this * * 08048479 <some_func>: * 8048679: 55 push %ebp # can be * 804847a: 89 e5 movl %esp,%ebp # absent * 804847c: 83 ec 1c subl $28,%esp * 804847f: 55 pushl %ebp * 8048480: 57 pushl %edi * 8048481: 56 pushl %esi * 8048482: 53 pushl %ebx * 8048483: e8 94 fe ff ff call .mcount * 08048488 <some_func_x> * * So in the entrance of .mcount we have in stack * * %esp -> some_func_x (dword), where mcount should return * saved registers (4 dwords in example) * stack frame (28 bytes in example) * some_func_callee (dword) * * We will: * - check if the code of some_func matches this pattern * - find some_func address and depth of stack * - modify stack by replacing some_func_callee address to ours * - collect statistic *//* better to be a prime number */#define FUNCSMAX 32749#define CSTACKSIZE 256#if THREADED/* better to be a prime number */#define THREADSMAX 37#else#define THREADSMAX 1#endifstatic struct { int addr; int enters; int exits; int aways; int rets; long long timetotal; long long timeoutside;} arr[FUNCSMAX + 1];static struct {#if THREADED pthread_t tid;#endif int depth; int ret[CSTACKSIZE]; int func[CSTACKSIZE];} cstack[THREADSMAX];#define core() (*(char *)0 = 0)#if __GNUC__ > 2#define NOPROF __attribute__ ((no_instrument_function))/* forward declaration of all functions */static inline long long curtime() NOPROF;static inline int findaddr() NOPROF;static inline int findthread() NOPROF;static void stat_enter() NOPROF;static void stat_exit() NOPROF;static void stat_away() NOPROF;static void stat_ret() NOPROF;void profiler__asm_enter_stub() NOPROF;void profiler__asm_exit_stub() NOPROF;void profiler__c_enter() NOPROF;void profiler__c_exit() NOPROF;void profiler_savestat() NOPROF;#else#warning be sure to compile profiler.c WITHOUT -p flag#endifstatic inline long long curtime(void){ long long t; asm volatile(".byte 15;.byte 49" : "=A"(t)); /* RDTSC */ return t;}static inline int findaddr(int addr){ int i,j; i = j = addr % FUNCSMAX; do { if (arr[i].addr == addr) { return i; } else if (arr[i].addr == 0) { arr[i].addr = addr; return i; } i = (i+1) % FUNCSMAX; } while (i != j); core(); /* increase FUNCSMAX */ return(FUNCSMAX);}static inline int findthread(void){#if THREADED int i,j; pthread_t k = pthread_self(); i = j = (int)k % THREADSMAX; do { if (cstack[i].tid == k) { return i; } else if (cstack[i].tid == 0) { cstack[i].tid = k; return i; } i = (i+1) % THREADSMAX; } while (i != j); core(); /* increase THREADSMAX */#endif return(0);}static void stat_enter(int slot){ arr[slot].enters++; arr[slot].timetotal -= curtime();}static void stat_exit(int slot){ arr[slot].exits++; arr[slot].timetotal += curtime();}static void stat_away(int slot){ arr[slot].aways++; arr[slot].timeoutside -= curtime();}static void stat_ret(int slot){ arr[slot].rets++; arr[slot].timeoutside += curtime();}void profiler__asm_enter(void);void profiler__asm_exit(void);#define A __asm__/* * that't really weird but compatible with both gcc2 and gcc3 * * things i don't want to care of * - what size on stack pusha/pops use * - what current function framing is */void profiler__asm_enter_stub(void){ A(" .globl .mcount "); A(" .globl profiler__asm_enter "); A("profiler__asm_enter: "); A(".mcount: "); A(" pushl %eax "); /* save %eax */ A(" movl %esp,%eax "); /* %eax = old %esp - 4 */ A(" pusha "); /* save all registers */ A(" push %eax "); /* push parameter to stack */ A(" call profiler__c_enter "); /* call c routine */ A(" pop %eax "); /* clear parameter from stack */ A(" popa "); /* restore all registers */ A(" pop %eax "); /* restore %eax */ A(" ret "); /* return */}void profiler__asm_exit_stub(void){ A("profiler__asm_exit: "); A(" pushl $0xdeadbeaf "); /* placeholder to return address */ A(" pushl %eax "); /* save %eax */ A(" movl %esp,%eax "); /* %eax = addr of placeholder - 4 */ A(" pusha "); /* save all registers */ A(" pushl %eax "); /* push parameter to stack */ A(" call profiler__c_exit "); /* call C routine */ A(" popl %eax "); /* clear parameter from stack */ A(" popa "); /* restore all registers */ A(" popl %eax "); /* restore %eax */ A(" ret "); /* return */}void profiler__c_enter(int *sp_1){ unsigned char *pc; int stdepth = 2, i, thr, slot, gcc2 = 1; if (sizeof(int) != 4) core(); /* sizeof int != 4 */ if (sizeof(long long) != 8) core(); /* sizeof long long != 8 */ if (sizeof(void *) != 4) core(); /* sizeof pointer != 4 */ pc = (char *)(sp_1[1]); pc -= 5; if (*pc != 0xe8) /* call <relative> */ core(); /* called not by 0xe8 */ if ((int)pc + 5 + *(int *)(pc+1) != (int)profiler__asm_enter) core(); /* call points not to .mcount */ if (pc[-1] == 0x53) /* push %ebx */ pc--, stdepth++; if (pc[-1] == 0x56) /* push %esi */ pc--, stdepth++; if (pc[-1] == 0x57) /* push %edi */ pc--, stdepth++; if (pc[-1] == 0x55) /* push %ebp */ pc--, stdepth++; if (pc[-6]==0x81 && pc[-5]==0xec && pc[-2]==0 && pc[-1]==0) { /* sub <dword>,%esp */ stdepth += *(int *)(pc - 4)/4; pc -= 6; } else if (pc[-3]==0x83 && pc[-2]==0xec && pc[-1]%4==0) { /* sub <byte>,%esp */ stdepth += pc[-1]/4; pc -= 3; } else gcc2 = 0; while (pc[-1] >= 0x50 && pc[-1] <= 0x57) /* push %e[reg] */ pc--, stdepth++; /* "pushl %ebp; movl %esp,%ebp;" */ if (pc[-3]==0x55 && pc[-2]==0x89 && pc[-1]==0xe5) { stdepth++; pc -= 3; } else if(!gcc2) core(); /* unknown prologue, examine x/10i pc-10 */ /* * Now we know that it's standard prologue, so we modify the stack */ thr = findthread(); slot = findaddr((int)pc); i = cstack[thr].depth++; if(i >= CSTACKSIZE) core(); /* call stack overflow */ cstack[thr].func[i] = slot; cstack[thr].ret[i] = sp_1[stdepth]; sp_1[stdepth] = (int)profiler__asm_exit; if (i > 0) stat_away(cstack[thr].func[i - 1]); stat_enter(slot);}void profiler__c_exit(int *sp){ int i, thr; thr = findthread(); i = --cstack[thr].depth; if (i < 0) core(); /* call stack underflow */ sp[1] = cstack[thr].ret[i]; stat_exit(cstack[thr].func[i]); if (i > 0) stat_ret(cstack[thr].func[i - 1]);#if THREADED else cstack[thr].tid = 0; /* free this stack */#endif}#ifndef PROFILE_FILE#error define PROFILE_FILE where to save statistic#endifstatic void mywrite(int fd,char *str){ int len, i; for (len = strlen(str); len > 0; str += i, len -= i) if ((i = write(fd,str,len)) < 0) return;}void profiler_savestat(void){ int i, fd; char buf[1024]; fd = open(PROFILE_FILE,O_CREAT | O_TRUNC | O_WRONLY,0666); if (fd < 0) { mywrite(2,"open " PROFILE_FILE " failed - "); mywrite(2,strerror(errno)); mywrite(2,"\n"); return; } snprintf(buf,sizeof(buf),"\nProfiling statistic %s at time %lld:\n" "\n%8s %10s %10s %10s %10s %20s %20s\n",PROFILE_FILE,curtime(), "Function","Enters","Exits","Aways","Returns", "Cycles_Total","Cycles_Inside"); mywrite(fd,buf); for (i = 0; i < FUNCSMAX; i++) if (arr[i].addr) { snprintf(buf,sizeof(buf),"%08x %10d %10d %10d %10d %20lld %20lld\n", arr[i].addr,arr[i].enters,arr[i].exits, arr[i].aways,arr[i].rets, arr[i].timetotal + (arr[i].enters-arr[i].exits) * curtime(), arr[i].timetotal - arr[i].timeoutside + curtime() * (arr[i].enters-arr[i].exits-arr[i].aways+arr[i].rets)); mywrite(fd,buf); } close(fd);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -