📄 dtrace_impl.h

📁 Sun Solaris 10 中的 DTrace 组件的源代码。请参看: http://www.sun.com/software/solaris/observability.jsp
💻 H
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* * Copyright 2005 Sun Microsystems, Inc.  All rights reserved. * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only. * See the file usr/src/LICENSING.NOTICE in this distribution or * http://www.opensolaris.org/license/ for details. */#ifndef _SYS_DTRACE_IMPL_H#define	_SYS_DTRACE_IMPL_H#pragma ident	"@(#)dtrace_impl.h	1.10	04/11/22 SMI"#ifdef	__cplusplusextern "C" {#endif/* * DTrace Dynamic Tracing Software: Kernel Implementation Interfaces * * Note: The contents of this file are private to the implementation of the * Solaris system and DTrace subsystem and are subject to change at any time * without notice.  Applications and drivers using these interfaces will fail * to run on future releases.  These interfaces should not be used for any * purpose except those expressly outlined in dtrace(7D) and libdtrace(3LIB). * Please refer to the "Solaris Dynamic Tracing Guide" for more information. */#include <sys/dtrace.h>/* * DTrace Implementation Constants and Typedefs */#define	DTRACE_MAXPROPLEN		128#define	DTRACE_DYNVAR_CHUNKSIZE		256struct dtrace_probe;struct dtrace_ecb;struct dtrace_predicate;struct dtrace_action;struct dtrace_provider;struct dtrace_state;typedef struct dtrace_probe dtrace_probe_t;typedef struct dtrace_ecb dtrace_ecb_t;typedef struct dtrace_predicate dtrace_predicate_t;typedef struct dtrace_action dtrace_action_t;typedef struct dtrace_provider dtrace_provider_t;typedef struct dtrace_meta dtrace_meta_t;typedef struct dtrace_state dtrace_state_t;typedef uint32_t dtrace_optid_t;typedef uint32_t dtrace_specid_t;/* * DTrace Probes * * The probe is the fundamental unit of the DTrace architecture.  Probes are * created by DTrace providers, and managed by the DTrace framework.  A probe * is identified by a unique <provider, module, function, name> tuple, and has * a unique probe identifier assigned to it.  (Some probes are not associated * with a specific point in text; these are called _unanchored probes_ and have * no module or function associated with them.)  Probes are represented as a * dtrace_probe structure.  To allow quick lookups based on each element of the * probe tuple, probes are hashed by each of provider, module, function and * name.  (If a lookup is performed based on a regular expression, a * dtrace_probekey is prepared, and a linear search is performed.) Each probe * is additionally pointed to by a linear array indexed by its identifier.  The * identifier is the provider's mechanism for indicating to the DTrace * framework that a probe has fired:  the identifier is passed as the first * argument to dtrace_probe(), where it is then mapped into the corresponding * dtrace_probe structure.  From the dtrace_probe structure, dtrace_probe() can * iterate over the probe's list of enabling control blocks; see "DTrace * Enabling Control Blocks", below.) */struct dtrace_probe {	dtrace_id_t dtpr_id;			/* probe identifier */	dtrace_ecb_t *dtpr_ecb;			/* ECB list; see below */	dtrace_ecb_t *dtpr_ecb_last;		/* last ECB in list */	void *dtpr_arg;				/* provider argument */	dtrace_cacheid_t dtpr_predcache;	/* predicate cache ID */	int dtpr_aframes;			/* artificial frames */	dtrace_provider_t *dtpr_provider;	/* pointer to provider */	char *dtpr_mod;				/* probe's module name */	char *dtpr_func;			/* probe's function name */	char *dtpr_name;			/* probe's name */	dtrace_probe_t *dtpr_nextmod;		/* next in module hash */	dtrace_probe_t *dtpr_prevmod;		/* previous in module hash */	dtrace_probe_t *dtpr_nextfunc;		/* next in function hash */	dtrace_probe_t *dtpr_prevfunc;		/* previous in function hash */	dtrace_probe_t *dtpr_nextname;		/* next in name hash */	dtrace_probe_t *dtpr_prevname;		/* previous in name hash */};typedef int dtrace_probekey_f(const char *, const char *, int);typedef struct dtrace_probekey {	const char *dtpk_prov;			/* provider name to match */	dtrace_probekey_f *dtpk_pmatch;		/* provider matching function */	const char *dtpk_mod;			/* module name to match */	dtrace_probekey_f *dtpk_mmatch;		/* module matching function */	const char *dtpk_func;			/* func name to match */	dtrace_probekey_f *dtpk_fmatch;		/* func matching function */	const char *dtpk_name;			/* name to match */	dtrace_probekey_f *dtpk_nmatch;		/* name matching function */	dtrace_id_t dtpk_id;			/* identifier to match */} dtrace_probekey_t;typedef struct dtrace_hashbucket {	struct dtrace_hashbucket *dthb_next;	/* next on hash chain */	dtrace_probe_t *dthb_chain;		/* chain of probes */	int dthb_len;				/* number of probes here */} dtrace_hashbucket_t;typedef struct dtrace_hash {	dtrace_hashbucket_t **dth_tab;		/* hash table */	int dth_size;				/* size of hash table */	int dth_mask;				/* mask to index into table */	int dth_nbuckets;			/* total number of buckets */	uintptr_t dth_nextoffs;			/* offset of next in probe */	uintptr_t dth_prevoffs;			/* offset of prev in probe */	uintptr_t dth_stroffs;			/* offset of str in probe */} dtrace_hash_t;/* * DTrace Enabling Control Blocks * * When a provider wishes to fire a probe, it calls into dtrace_probe(), * passing the probe identifier as the first argument.  As described above, * dtrace_probe() maps the identifier into a pointer to a dtrace_probe_t * structure.  This structure contains information about the probe, and a * pointer to the list of Enabling Control Blocks (ECBs).  Each ECB points to * DTrace consumer state, and contains an optional predicate, and a list of * actions.  (Shown schematically below.)  The ECB abstraction allows a single * probe to be multiplexed across disjoint consumers, or across disjoint * enablings of a single probe within one consumer. * *   Enabling Control Block *        dtrace_ecb_t * +------------------------+ * | dtrace_epid_t ---------+--------------> Enabled Probe ID (EPID) * | dtrace_state_t * ------+--------------> State associated with this ECB * | dtrace_predicate_t * --+---------+ * | dtrace_action_t * -----+----+    | * | dtrace_ecb_t * ---+    |    |    |       Predicate (if any) * +-------------------+----+    |    |       dtrace_predicate_t *                     |         |    +---> +--------------------+ *                     |         |          | dtrace_difo_t * ---+----> DIFO *                     |         |          +--------------------+ *                     |         | *            Next ECB |         |           Action *            (if any) |         |       dtrace_action_t *                     :         +--> +-------------------+ *                     :              | dtrace_actkind_t -+------> kind *                     v              | dtrace_difo_t * --+------> DIFO (if any) *                                    | dtrace_recdesc_t -+------> record descr. *                                    | dtrace_action_t * +------+ *                                    +-------------------+      | *                                                               | Next action *                               +-------------------------------+  (if any) *                               | *                               |           Action *                               |       dtrace_action_t *                               +--> +-------------------+ *                                    | dtrace_actkind_t -+------> kind *                                    | dtrace_difo_t * --+------> DIFO (if any) *                                    | dtrace_action_t * +------+ *                                    +-------------------+      | *                                                               | Next action *                               +-------------------------------+  (if any) *                               | *                               : *                               v * * * dtrace_probe() iterates over the ECB list.  If the ECB needs less space * than is available in the principal buffer, the ECB is processed:  if the * predicate is non-NULL, the DIF object is executed.  If the result is * non-zero, the action list is processed, with each action being executed * accordingly.  When the action list has been completely executed, processing * advances to the next ECB.  processing advances to the next ECB.  If the * result is non-zero; For each ECB, it first determines the The ECB * abstraction allows disjoint consumers to multiplex on single probes. */struct dtrace_ecb {	dtrace_epid_t dte_epid;			/* enabled probe ID */	uint32_t dte_alignment;			/* required alignment */	size_t dte_needed;			/* bytes needed */	size_t dte_size;			/* total size of payload */	dtrace_predicate_t *dte_predicate;	/* predicate, if any */	dtrace_action_t *dte_action;		/* actions, if any */	dtrace_ecb_t *dte_next;			/* next ECB on probe */	dtrace_state_t *dte_state;		/* pointer to state */	uint32_t dte_cond;			/* security condition */	dtrace_probe_t *dte_probe;		/* pointer to probe */	dtrace_action_t *dte_action_last;	/* last action on ECB */	uint64_t dte_uarg;			/* library argument */};struct dtrace_predicate {	dtrace_difo_t *dtp_difo;		/* DIF object */	dtrace_cacheid_t dtp_cacheid;		/* cache identifier */	int dtp_refcnt;				/* reference count */};struct dtrace_action {	dtrace_actkind_t dta_kind;		/* kind of action */	uint16_t dta_intuple;			/* boolean:  in aggregation */	uint32_t dta_refcnt;			/* reference count */	dtrace_difo_t *dta_difo;		/* pointer to DIFO */	dtrace_recdesc_t dta_rec;		/* record description */	dtrace_action_t *dta_prev;		/* previous action */	dtrace_action_t *dta_next;		/* next action */};typedef struct dtrace_aggregation {	dtrace_action_t dtag_action;		/* action; must be first */	dtrace_aggid_t dtag_id;			/* identifier */	dtrace_ecb_t *dtag_ecb;			/* corresponding ECB */	dtrace_action_t *dtag_first;		/* first action in tuple */	uint32_t dtag_base;			/* base of aggregation */	uint64_t dtag_initial;			/* initial value */	void (*dtag_aggregate)(uint64_t *, uint64_t);} dtrace_aggregation_t;/* * DTrace Buffers * * Principal buffers, aggregation buffers, and speculative buffers are all * managed with the dtrace_buffer structure.  By default, this structure * includes twin data buffers -- dtb_tomax and dtb_xamot -- that serve as the * active and passive buffers, respectively.  For speculative buffers, * dtb_xamot will be NULL; for "ring" and "fill" buffers, dtb_xamot will point * to a scratch buffer.  For all buffer types, the dtrace_buffer structure is * always allocated on a per-CPU basis; a single dtrace_buffer structure is * never shared among CPUs.  (That is, there is never true sharing of the * dtrace_buffer structure; to prevent false sharing of the structure, it must * always be aligned to the coherence granularity -- generally 64 bytes.) * * One of the critical design decisions of DTrace is that a given ECB always * stores the same quantity and type of data.  This is done to assure that the * only metadata required for an ECB's traced data is the EPID.  That is, from * the EPID, the consumer can determine the data layout.  (The data buffer * layout is shown schematically below.)  By assuring that one can determine * data layout from the EPID, the metadata stream can be separated from the * data stream -- simplifying the data stream enormously. * *      base of data buffer --->  +------+--------------------+------+ *                                | EPID | data               | EPID | *                                +------+--------+------+----+------+ *                                | data          | EPID | data      | *                                +---------------+------+-----------+ *                                | data, cont.                      | *                                +------+--------------------+------+ *                                | EPID | data               |      | *                                +------+--------------------+      | *                                |                ||                | *                                |                ||                | *                                |                \/                | *                                :                                  : *                                .                                  . *                                .                                  . *                                .                                  . *                                :                                  : *                                |                                  | *     limit of data buffer --->  +----------------------------------+ * * When evaluating an ECB, dtrace_probe() determines if the ECB's needs of the * principal buffer (both scratch and payload) exceed the available space.  If * the ECB's needs exceed available space (and if the principal buffer policy * is the default "switch" policy), the ECB is dropped, the buffer's drop count * is incremented, and processing advances to the next ECB.  If the ECB's needs * can be met with the available space, the ECB is processed, but the offset in * the principal buffer is only advanced if the ECB completes processing * without error. * * When a buffer is to be switched (either because the buffer is the principal * buffer with a "switch" policy or because it is an aggregation buffer), a * cross call is issued to the CPU associated with the buffer.  In the cross * call context, interrupts are disabled, and the active and the inactive * buffers are atomically switched.  This involves switching the data pointers, * copying the various state fields (offset, drops, errors, etc.) into their * inactive equivalents, and clearing the state fields.  Because interrupts are * disabled during this procedure, the switch is guaranteed to appear atomic to * dtrace_probe(). * * DTrace Ring Buffering * * To process a ring buffer correctly, one must know the oldest valid record. * Processing starts at the oldest record in the buffer and continues until * the end of the buffer is reached.  Processing then resumes starting with * the record stored at offset 0 in the buffer, and continues until the * youngest record is processed.  If trace records are of a fixed-length, * determining the oldest record is trivial: * *   - If the ring buffer has not wrapped, the oldest record is the record *     stored at offset 0. * *   - If the ring buffer has wrapped, the oldest record is the record stored *     at the current offset. * * With variable length records, however, just knowing the current offset * doesn't suffice for determining the oldest valid record:  assuming that one * allows for arbitrary data, one has no way of searching forward from the * current offset to find the oldest valid record.  (That is, one has no way * of separating data from metadata.) It would be possible to simply refuse to * process any data in the ring buffer between the current offset and the * limit, but this leaves (potentially) an enormous amount of otherwise valid * data unprocessed. * * To effect ring buffering, we track two offsets in the buffer:  the current
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -