📄 fbt.c
字号:
/* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only. * See the file usr/src/LICENSING.NOTICE in this distribution or * http://www.opensolaris.org/license/ for details. */#pragma ident "@(#)fbt.c 1.11 04/12/18 SMI"#include <sys/modctl.h>#include <sys/dtrace.h>#include <sys/kobj.h>#include <sys/stat.h>#include <sys/ddi.h>#include <sys/sunddi.h>#include <sys/conf.h>#define FBT_PUSHL_EBP 0x55#define FBT_MOVL_ESP_EBP0_V0 0x8b#define FBT_MOVL_ESP_EBP1_V0 0xec#define FBT_MOVL_ESP_EBP0_V1 0x89#define FBT_MOVL_ESP_EBP1_V1 0xe5#define FBT_REX_RSP_RBP 0x48#define FBT_POPL_EBP 0x5d#define FBT_RET 0xc3#define FBT_RET_IMM16 0xc2#define FBT_LEAVE 0xc9#ifdef __amd64#define FBT_PATCHVAL 0xcc#else#define FBT_PATCHVAL 0xf0#endif#define FBT_ENTRY "entry"#define FBT_RETURN "return"#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)#define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */typedef struct fbt_probe { struct fbt_probe *fbtp_hashnext; uint8_t *fbtp_patchpoint; int8_t fbtp_rval; uint8_t fbtp_patchval; uint8_t fbtp_savedval; uintptr_t fbtp_roffset; dtrace_id_t fbtp_id; char *fbtp_name; struct modctl *fbtp_ctl; int fbtp_loadcnt; int fbtp_symndx; int fbtp_primary; struct fbt_probe *fbtp_next;} fbt_probe_t;static dev_info_t *fbt_devi;static dtrace_provider_id_t fbt_id;static fbt_probe_t **fbt_probetab;static int fbt_probetab_size;static int fbt_probetab_mask;static int fbt_verbose = 0;static intfbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval){ uintptr_t stack0, stack1, stack2, stack3, stack4; fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { if ((uintptr_t)fbt->fbtp_patchpoint == addr) { if (fbt->fbtp_roffset == 0) { /* * When accessing the arguments on the stack, * we must protect against accessing beyond * the stack. We can safely set NOFAULT here * -- we know that interrupts are already * disabled. */ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); CPU->cpu_dtrace_caller = stack[0]; stack0 = stack[1]; stack1 = stack[2]; stack2 = stack[3]; stack3 = stack[4]; stack4 = stack[5]; DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); dtrace_probe(fbt->fbtp_id, stack0, stack1, stack2, stack3, stack4); CPU->cpu_dtrace_caller = NULL; } else {#ifdef __amd64 /* * On amd64, we instrument the ret, not the * leave. We therefore need to set the caller * to assure that the top frame of a stack() * action is correct. */ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); CPU->cpu_dtrace_caller = stack[0]; DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);#endif dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0); CPU->cpu_dtrace_caller = NULL; } return (fbt->fbtp_rval); } } return (0);}/*ARGSUSED*/static voidfbt_provide_module(void *arg, struct modctl *ctl){ struct module *mp = ctl->mod_mp; char *str = mp->strings; int nsyms = mp->nsyms; Shdr *symhdr = mp->symhdr; char *modname = ctl->mod_modname; char *name; fbt_probe_t *fbt, *retfbt; size_t symsize; int i, size; /* * Employees of dtrace and their families are ineligible. Void * where prohibited. */ if (strcmp(modname, "dtrace") == 0) return; if (ctl->mod_requisites != NULL) { struct modctl_list *list; list = (struct modctl_list *)ctl->mod_requisites; for (; list != NULL; list = list->modl_next) { if (strcmp(list->modl_modp->mod_modname, "dtrace") == 0) return; } } /* * KMDB is ineligible for instrumentation -- it may execute in * any context, including probe context. */ if (strcmp(modname, "kmdbmod") == 0) return; if (str == NULL || symhdr == NULL || symhdr->sh_addr == NULL) { /* * If this module doesn't (yet) have its string or symbol * table allocated, clear out. */ return; } symsize = symhdr->sh_entsize; if (mp->fbt_nentries) { /* * This module has some FBT entries allocated; we're afraid * to screw with it. */ return; } for (i = 1; i < nsyms; i++) { uint8_t *instr, *limit; Sym *sym = (Sym *)(symhdr->sh_addr + i * symsize); if (ELF_ST_TYPE(sym->st_info) != STT_FUNC) continue; /* * Weak symbols are not candidates. This could be made to * work (where weak functions and their underlying function * appear as two disjoint probes), but it's not simple. */ if (ELF_ST_BIND(sym->st_info) == STB_WEAK) continue; name = str + sym->st_name; if (strstr(name, "dtrace_") == name && strstr(name, "dtrace_safe_") != name) { /* * Anything beginning with "dtrace_" may be called * from probe context unless it explitly indicates * that it won't be called from probe context by * using the prefix "dtrace_safe_". */ continue; } if (strstr(name, "kdi_") == name) { /* * Anything beginning with "kdi_" is a part of the * kernel debugger interface and may be called in * arbitrary context -- including probe context. */ continue; } /* * Due to 4524008, _init and _fini may have a bloated st_size. * While this bug was fixed quite some time ago, old drivers * may be lurking. We need to develop a better solution to * this problem, such that correct _init and _fini functions * (the vast majority) may be correctly traced. One solution * may be to scan through the entire symbol table to see if * any symbol overlaps with _init. If none does, set a bit in * the module structure that this module has correct _init and * _fini sizes. This will cause some pain the first time a * module is scanned, but at least it would be O(N) instead of * O(N log N)... */ if (strcmp(name, "_init") == 0) continue; if (strcmp(name, "_fini") == 0) continue; /* * In order to be eligible, the function must begin with the * following sequence: * * pushl %esp * movl %esp, %ebp * * Note that there are two variants of encodings that generate * the movl; we must check for both. For 64-bit, we would * normally insist that a function begin with the following * sequence: * * pushq %rbp * movq %rsp, %rbp * * However, the compiler for 64-bit often splits these two * instructions -- and the first instruction in the function * is often not the pushq. As a result, on 64-bit we look * for any "pushq %rbp" in the function and we instrument * this with a breakpoint instruction. */ instr = (uint8_t *)sym->st_value; limit = (uint8_t *)(sym->st_value + sym->st_size);#ifdef __amd64 while (instr < limit) { if (*instr == FBT_PUSHL_EBP) break; if ((size = dtrace_instr_size(instr)) <= 0) break; instr += size; } if (instr >= limit || *instr != FBT_PUSHL_EBP) { /* * We either don't save the frame pointer in this * function, or we ran into some disassembly * screw-up. Either way, we bail. */ continue; }#else if (instr[0] != FBT_PUSHL_EBP) continue; if (!(instr[1] == FBT_MOVL_ESP_EBP0_V0 && instr[2] == FBT_MOVL_ESP_EBP1_V0) && !(instr[1] == FBT_MOVL_ESP_EBP0_V1 && instr[2] == FBT_MOVL_ESP_EBP1_V1)) continue;#endif fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); fbt->fbtp_name = name; fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, 3, fbt); fbt->fbtp_patchpoint = instr; fbt->fbtp_ctl = ctl; fbt->fbtp_loadcnt = ctl->mod_loadcnt; fbt->fbtp_rval = DTRACE_INVOP_PUSHL_EBP; fbt->fbtp_savedval = *instr; fbt->fbtp_patchval = FBT_PATCHVAL; fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; fbt->fbtp_symndx = i; fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; mp->fbt_nentries++; retfbt = NULL;again: if (instr >= limit) continue; /* * If this disassembly fails, then we've likely walked off into * a jump table or some other unsuitable area. Bail out of the * disassembly now. */ if ((size = dtrace_instr_size(instr)) <= 0) continue;#ifdef __amd64 /* * We only instrument "ret" on amd64 -- we don't yet instrument * ret imm16, largely because the compiler doesn't seem to * (yet) emit them in the kernel... */ if (*instr != FBT_RET) { instr += size; goto again; }#else if (!(size == 1 && (*instr == FBT_POPL_EBP || *instr == FBT_LEAVE) && (*(instr + 1) == FBT_RET || *(instr + 1) == FBT_RET_IMM16))) { instr += size; goto again; }#endif /* * We have a winner! */ fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); fbt->fbtp_name = name; if (retfbt == NULL) { fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_RETURN, 3, fbt); } else { retfbt->fbtp_next = fbt; fbt->fbtp_id = retfbt->fbtp_id; } retfbt = fbt; fbt->fbtp_patchpoint = instr; fbt->fbtp_ctl = ctl; fbt->fbtp_loadcnt = ctl->mod_loadcnt;#ifndef __amd64 if (*instr == FBT_POPL_EBP) { fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP; } else { ASSERT(*instr == FBT_LEAVE); fbt->fbtp_rval = DTRACE_INVOP_LEAVE; } fbt->fbtp_roffset = (uintptr_t)(instr - (uint8_t *)sym->st_value) + 1;#else ASSERT(*instr == FBT_RET); fbt->fbtp_rval = DTRACE_INVOP_RET; fbt->fbtp_roffset = (uintptr_t)(instr - (uint8_t *)sym->st_value);#endif fbt->fbtp_savedval = *instr; fbt->fbtp_patchval = FBT_PATCHVAL; fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; fbt->fbtp_symndx = i; fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; mp->fbt_nentries++; instr += size; goto again; }}/*ARGSUSED*/static voidfbt_destroy(void *arg, dtrace_id_t id, void *parg){ fbt_probe_t *fbt = parg, *next, *hash, *last; struct modctl *ctl = fbt->fbtp_ctl;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -