📄 dtrace.c

📁 Sun Solaris 10 中的 DTrace 组件的源代码。请参看: http://www.sun.com/software/solaris/observability.jsp
💻 C
📖 第 1 页 / 共 5 页
字号:
	 * First, check to see if the address is in scratch space...	 */	a = mstate->dtms_scratch_base;	s = mstate->dtms_scratch_size;	if (addr - a < s && addr + sz <= a + s)		return (1);	/*	 * Now check the clause-local variables for this CPU...	 */	if (vstate->dtvs_locals != NULL) {		a = (uintptr_t)vstate->dtvs_locals[CPU->cpu_id];		s = vstate->dtvs_nlocals * sizeof (vstate->dtvs_locals[0][0]);		if (addr - a < s && addr + sz <= a + s)			return (1);	}	/*	 * And any dynamic variables... (This includes both the thread-local	 * variables and any global dynamically-allocated variables.)	 */	a = (uintptr_t)vstate->dtvs_dynvars.dtds_base;	s = vstate->dtvs_dynvars.dtds_size;	if (addr - a < s && addr + sz <= a + s)		return (1);	/*	 * Finally, search the statically-allocated global variables.  This is	 * the most time-consuming check, so we perform it last.	 */	for (i = 0; i < vstate->dtvs_nglobals; i++) {		dtrace_globvar_t *glob = vstate->dtvs_globals[i];		if (glob == NULL)			continue;		if (glob->dtgv_var.dtdv_type.dtdt_flags & DIF_TF_BYREF) {			a = glob->dtgv_data;			s = glob->dtgv_var.dtdv_type.dtdt_size;			if (addr - a < s && addr + sz <= a + s)				return (1);		}	}	return (0);}/* * Compare two strings using safe loads. */static intdtrace_strcmp(char *s1, char *s2){	uint8_t c1, c2;	volatile uint16_t *flags;	if (s1 == s2)		return (0);	flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;	do {		if (s1 == NULL) {			c1 = '\0';		} else {			c1 = dtrace_load8((uintptr_t)s1++);		}		if (s2 == NULL) {			c2 = '\0';		} else {			c2 = dtrace_load8((uintptr_t)s2++);		}		if (c1 != c2)			return (c1 - c2);	} while (c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));	return (0);}/* * Compute strlen(s) for a string using safe memory accesses.  The additional * len parameter is used to specify a maximum length to ensure completion. */static size_tdtrace_strlen(const char *s, size_t lim){	uint_t len;	for (len = 0; len != lim; len++) {		if (dtrace_load8((uintptr_t)s++) == '\0')			break;	}	return (len);}/* * Check if an address falls within a toxic region. */static intdtrace_istoxic(uintptr_t kaddr, size_t size){	uintptr_t taddr, tsize;	int i;	for (i = 0; i < dtrace_toxranges; i++) {		taddr = dtrace_toxrange[i].dtt_base;		tsize = dtrace_toxrange[i].dtt_limit - taddr;		if (kaddr - taddr < tsize) {			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);			cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr;			return (1);		}		if (taddr - kaddr < size) {			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);			cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr;			return (1);		}	}	return (0);}/* * Copy src to dst using safe memory accesses.  The src is assumed to be unsafe * memory specified by the DIF program.  The dst is assumed to be safe memory * that we can store to directly because it is managed by DTrace.  As with * standard bcopy, overlapping copies are handled properly. */static voiddtrace_bcopy(const void *src, void *dst, size_t len){	if (len != 0) {		uint8_t *s1 = dst;		const uint8_t *s2 = src;		if (s1 <= s2) {			do {				*s1++ = dtrace_load8((uintptr_t)s2++);			} while (--len != 0);		} else {			s2 += len;			s1 += len;			do {				*--s1 = dtrace_load8((uintptr_t)--s2);			} while (--len != 0);		}	}}/* * Copy src to dst using safe memory accesses, up to either the specified * length, or the point that a nul byte is encountered.  The src is assumed to * be unsafe memory specified by the DIF program.  The dst is assumed to be * safe memory that we can store to directly because it is managed by DTrace. * Unlike dtrace_bcopy(), overlapping regions are not handled. */static voiddtrace_strcpy(const void *src, void *dst, size_t len){	if (len != 0) {		uint8_t *s1 = dst, c;		const uint8_t *s2 = src;		do {			*s1++ = c = dtrace_load8((uintptr_t)s2++);		} while (--len != 0 && c != '\0');	}}/* * Copy src to dst, deriving the size and type from the specified (BYREF) * variable type.  The src is assumed to be unsafe memory specified by the DIF * program.  The dst is assumed to be DTrace variable memory that is of the * specified type; we assume that we can store to directly. */static voiddtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type){	ASSERT(type->dtdt_flags & DIF_TF_BYREF);	if (type->dtdt_kind == DIF_TYPE_STRING) {		dtrace_strcpy(src, dst, type->dtdt_size);	} else {		dtrace_bcopy(src, dst, type->dtdt_size);	}}/* * Compare s1 to s2 using safe memory accesses.  The s1 data is assumed to be * unsafe memory specified by the DIF program.  The s2 data is assumed to be * safe memory that we can access directly because it is managed by DTrace. */static intdtrace_bcmp(const void *s1, const void *s2, size_t len){	volatile uint16_t *flags;	flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;	if (s1 == s2)		return (0);	if (s1 == NULL || s2 == NULL)		return (1);	if (s1 != s2 && len != 0) {		const uint8_t *ps1 = s1;		const uint8_t *ps2 = s2;		do {			if (dtrace_load8((uintptr_t)ps1++) != *ps2++)				return (1);		} while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));	}	return (0);}/* * Zero the specified region using a simple byte-by-byte loop.  Note that this * is for safe DTrace-managed memory only. */static voiddtrace_bzero(void *dst, size_t len){	uchar_t *cp;	for (cp = dst; len != 0; len--)		*cp++ = 0;}/* * This privilege checks should be used by actions and subroutines to * verify the credentials of the process that enabled the invoking ECB. */static intdtrace_priv_proc_common(dtrace_state_t *state){	uid_t uid = state->dts_cred.dcr_uid;	gid_t gid = state->dts_cred.dcr_gid;	cred_t *cr;	proc_t *proc;	if ((cr = CRED()) != NULL &&	    uid == cr->cr_uid &&	    uid == cr->cr_ruid &&	    uid == cr->cr_suid &&	    gid == cr->cr_gid &&	    gid == cr->cr_rgid &&	    gid == cr->cr_sgid &&	    (proc = ttoproc(curthread)) != NULL &&	    !(proc->p_flag & SNOCD))		return (1);	cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;	return (0);}static intdtrace_priv_proc_destructive(dtrace_state_t *state){	if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_DESTRUCTIVE)		return (1);	return (dtrace_priv_proc_common(state));}static intdtrace_priv_proc_control(dtrace_state_t *state){	if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)		return (1);	return (dtrace_priv_proc_common(state));}static intdtrace_priv_proc(dtrace_state_t *state){	if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)		return (1);	cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;	return (0);}static intdtrace_priv_kernel(dtrace_state_t *state){	if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)		return (1);	cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;	return (0);}static intdtrace_priv_kernel_destructive(dtrace_state_t *state){	if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE)		return (1);	cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;	return (0);}/* * Note:  not called from probe context.  This function is called * asynchronously (and at a regular interval) from outside of probe context to * clean the dirty dynamic variable lists on all CPUs.  Dynamic variable * cleaning is explained in detail in <sys/dtrace_impl.h>. */voiddtrace_dynvar_clean(dtrace_dstate_t *dstate){	dtrace_dynvar_t *dirty;	dtrace_dstate_percpu_t *dcpu;	int i, work = 0;	for (i = 0; i < NCPU; i++) {		dcpu = &dstate->dtds_percpu[i];		ASSERT(dcpu->dtdsc_rinsing == NULL);		/*		 * If the dirty list is NULL, there is no dirty work to do.		 */		if (dcpu->dtdsc_dirty == NULL)			continue;		/*		 * If the clean list is non-NULL, then we're not going to do		 * any work for this CPU -- it means that there has not been		 * a dtrace_dynvar() allocation on this CPU (or from this CPU)		 * since the last time we cleaned house.		 */		if (dcpu->dtdsc_clean != NULL)			continue;		work = 1;		/*		 * Atomically move the dirty list aside.		 */		do {			dirty = dcpu->dtdsc_dirty;			/*			 * Before we zap the dirty list, set the rinsing list.			 * (This allows for a potential assertion in			 * dtrace_dynvar():  if a free dynamic variable appears			 * on a hash chain, either the dirty list or the			 * rinsing list for some CPU must be non-NULL.)			 */			dcpu->dtdsc_rinsing = dirty;			dtrace_membar_producer();		} while (dtrace_casptr(&dcpu->dtdsc_dirty,		    dirty, NULL) != dirty);	}	if (!work) {		/*		 * We have no work to do; we can simply return.		 */		return;	}	dtrace_sync();	for (i = 0; i < NCPU; i++) {		dcpu = &dstate->dtds_percpu[i];		if (dcpu->dtdsc_rinsing == NULL)			continue;		/*		 * We are now guaranteed that no hash chain contains a pointer		 * into this dirty list; we can make it clean.		 */		ASSERT(dcpu->dtdsc_clean == NULL);		dcpu->dtdsc_clean = dcpu->dtdsc_rinsing;		dcpu->dtdsc_rinsing = NULL;	}	/*	 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make	 * sure that all CPUs have seen all of the dtdsc_clean pointers.	 * This prevents a race whereby a CPU incorrectly decides that	 * the state should be something other than DTRACE_DSTATE_CLEAN	 * after dtrace_dynvar_clean() has completed.	 */	dtrace_sync();	dstate->dtds_state = DTRACE_DSTATE_CLEAN;}/* * Depending on the value of the op parameter, this function looks-up, * allocates or deallocates an arbitrarily-keyed dynamic variable.  If an * allocation is requested, this function will return a pointer to a * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no * variable can be allocated.  If NULL is returned, the appropriate counter * will be incremented. */dtrace_dynvar_t *dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys,    dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op){	uint64_t hashval = 1;	dtrace_dynhash_t *hash = dstate->dtds_hash;	dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL;	processorid_t me = CPU->cpu_id, cpu = me;	dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me];	size_t bucket, ksize;	size_t chunksize = dstate->dtds_chunksize;	uintptr_t kdata, lock, nstate;	uint_t i;	ASSERT(nkeys != 0);	/*	 * Hash the key.  As with aggregations, we use Jenkins' "One-at-a-time"	 * algorithm.  For the by-value portions, we perform the algorithm in	 * 16-bit chunks (as opposed to 8-bit chunks).  This speeds things up a	 * bit, and seems to have only a minute effect on distribution.  For	 * the by-reference data, we perform "One-at-a-time" iterating (safely)	 * over each referenced byte.  It's painful to do this, but it's much	 * better than pathological hash distribution.  The efficacy of the	 * hashing algorithm (and a comparison with other algorithms) may be	 * found by running the ::dtrace_dynstat MDB dcmd.	 */	for (i = 0; i < nkeys; i++) {		if (key[i].dttk_size == 0) {			uint64_t val = key[i].dttk_value;			hashval += (val >> 48) & 0xffff;			hashval += (hashval << 10);			hashval ^= (hashval >> 6);			hashval += (val >> 32) & 0xffff;			hashval += (hashval << 10);			hashval ^= (hashval >> 6);			hashval += (val >> 16) & 0xffff;			hashval += (hashval << 10);			hashval ^= (hashval >> 6);			hashval += val & 0xffff;			hashval += (hashval << 10);			hashval ^= (hashval >> 6);		} else {			/*			 * This is incredibly painful, but it beats the hell			 * out of the alternative.			 */			uint64_t j, size = key[i].dttk_size;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -