📄 dtrace_impl.h
字号:
/* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only. * See the file usr/src/LICENSING.NOTICE in this distribution or * http://www.opensolaris.org/license/ for details. */#ifndef _SYS_DTRACE_IMPL_H#define _SYS_DTRACE_IMPL_H#pragma ident "@(#)dtrace_impl.h 1.10 04/11/22 SMI"#ifdef __cplusplusextern "C" {#endif/* * DTrace Dynamic Tracing Software: Kernel Implementation Interfaces * * Note: The contents of this file are private to the implementation of the * Solaris system and DTrace subsystem and are subject to change at any time * without notice. Applications and drivers using these interfaces will fail * to run on future releases. These interfaces should not be used for any * purpose except those expressly outlined in dtrace(7D) and libdtrace(3LIB). * Please refer to the "Solaris Dynamic Tracing Guide" for more information. */#include <sys/dtrace.h>/* * DTrace Implementation Constants and Typedefs */#define DTRACE_MAXPROPLEN 128#define DTRACE_DYNVAR_CHUNKSIZE 256struct dtrace_probe;struct dtrace_ecb;struct dtrace_predicate;struct dtrace_action;struct dtrace_provider;struct dtrace_state;typedef struct dtrace_probe dtrace_probe_t;typedef struct dtrace_ecb dtrace_ecb_t;typedef struct dtrace_predicate dtrace_predicate_t;typedef struct dtrace_action dtrace_action_t;typedef struct dtrace_provider dtrace_provider_t;typedef struct dtrace_meta dtrace_meta_t;typedef struct dtrace_state dtrace_state_t;typedef uint32_t dtrace_optid_t;typedef uint32_t dtrace_specid_t;/* * DTrace Probes * * The probe is the fundamental unit of the DTrace architecture. Probes are * created by DTrace providers, and managed by the DTrace framework. A probe * is identified by a unique <provider, module, function, name> tuple, and has * a unique probe identifier assigned to it. (Some probes are not associated * with a specific point in text; these are called _unanchored probes_ and have * no module or function associated with them.) Probes are represented as a * dtrace_probe structure. To allow quick lookups based on each element of the * probe tuple, probes are hashed by each of provider, module, function and * name. (If a lookup is performed based on a regular expression, a * dtrace_probekey is prepared, and a linear search is performed.) Each probe * is additionally pointed to by a linear array indexed by its identifier. The * identifier is the provider's mechanism for indicating to the DTrace * framework that a probe has fired: the identifier is passed as the first * argument to dtrace_probe(), where it is then mapped into the corresponding * dtrace_probe structure. From the dtrace_probe structure, dtrace_probe() can * iterate over the probe's list of enabling control blocks; see "DTrace * Enabling Control Blocks", below.) */struct dtrace_probe { dtrace_id_t dtpr_id; /* probe identifier */ dtrace_ecb_t *dtpr_ecb; /* ECB list; see below */ dtrace_ecb_t *dtpr_ecb_last; /* last ECB in list */ void *dtpr_arg; /* provider argument */ dtrace_cacheid_t dtpr_predcache; /* predicate cache ID */ int dtpr_aframes; /* artificial frames */ dtrace_provider_t *dtpr_provider; /* pointer to provider */ char *dtpr_mod; /* probe's module name */ char *dtpr_func; /* probe's function name */ char *dtpr_name; /* probe's name */ dtrace_probe_t *dtpr_nextmod; /* next in module hash */ dtrace_probe_t *dtpr_prevmod; /* previous in module hash */ dtrace_probe_t *dtpr_nextfunc; /* next in function hash */ dtrace_probe_t *dtpr_prevfunc; /* previous in function hash */ dtrace_probe_t *dtpr_nextname; /* next in name hash */ dtrace_probe_t *dtpr_prevname; /* previous in name hash */};typedef int dtrace_probekey_f(const char *, const char *, int);typedef struct dtrace_probekey { const char *dtpk_prov; /* provider name to match */ dtrace_probekey_f *dtpk_pmatch; /* provider matching function */ const char *dtpk_mod; /* module name to match */ dtrace_probekey_f *dtpk_mmatch; /* module matching function */ const char *dtpk_func; /* func name to match */ dtrace_probekey_f *dtpk_fmatch; /* func matching function */ const char *dtpk_name; /* name to match */ dtrace_probekey_f *dtpk_nmatch; /* name matching function */ dtrace_id_t dtpk_id; /* identifier to match */} dtrace_probekey_t;typedef struct dtrace_hashbucket { struct dtrace_hashbucket *dthb_next; /* next on hash chain */ dtrace_probe_t *dthb_chain; /* chain of probes */ int dthb_len; /* number of probes here */} dtrace_hashbucket_t;typedef struct dtrace_hash { dtrace_hashbucket_t **dth_tab; /* hash table */ int dth_size; /* size of hash table */ int dth_mask; /* mask to index into table */ int dth_nbuckets; /* total number of buckets */ uintptr_t dth_nextoffs; /* offset of next in probe */ uintptr_t dth_prevoffs; /* offset of prev in probe */ uintptr_t dth_stroffs; /* offset of str in probe */} dtrace_hash_t;/* * DTrace Enabling Control Blocks * * When a provider wishes to fire a probe, it calls into dtrace_probe(), * passing the probe identifier as the first argument. As described above, * dtrace_probe() maps the identifier into a pointer to a dtrace_probe_t * structure. This structure contains information about the probe, and a * pointer to the list of Enabling Control Blocks (ECBs). Each ECB points to * DTrace consumer state, and contains an optional predicate, and a list of * actions. (Shown schematically below.) The ECB abstraction allows a single * probe to be multiplexed across disjoint consumers, or across disjoint * enablings of a single probe within one consumer. * * Enabling Control Block * dtrace_ecb_t * +------------------------+ * | dtrace_epid_t ---------+--------------> Enabled Probe ID (EPID) * | dtrace_state_t * ------+--------------> State associated with this ECB * | dtrace_predicate_t * --+---------+ * | dtrace_action_t * -----+----+ | * | dtrace_ecb_t * ---+ | | | Predicate (if any) * +-------------------+----+ | | dtrace_predicate_t * | | +---> +--------------------+ * | | | dtrace_difo_t * ---+----> DIFO * | | +--------------------+ * | | * Next ECB | | Action * (if any) | | dtrace_action_t * : +--> +-------------------+ * : | dtrace_actkind_t -+------> kind * v | dtrace_difo_t * --+------> DIFO (if any) * | dtrace_recdesc_t -+------> record descr. * | dtrace_action_t * +------+ * +-------------------+ | * | Next action * +-------------------------------+ (if any) * | * | Action * | dtrace_action_t * +--> +-------------------+ * | dtrace_actkind_t -+------> kind * | dtrace_difo_t * --+------> DIFO (if any) * | dtrace_action_t * +------+ * +-------------------+ | * | Next action * +-------------------------------+ (if any) * | * : * v * * * dtrace_probe() iterates over the ECB list. If the ECB needs less space * than is available in the principal buffer, the ECB is processed: if the * predicate is non-NULL, the DIF object is executed. If the result is * non-zero, the action list is processed, with each action being executed * accordingly. When the action list has been completely executed, processing * advances to the next ECB. processing advances to the next ECB. If the * result is non-zero; For each ECB, it first determines the The ECB * abstraction allows disjoint consumers to multiplex on single probes. */struct dtrace_ecb { dtrace_epid_t dte_epid; /* enabled probe ID */ uint32_t dte_alignment; /* required alignment */ size_t dte_needed; /* bytes needed */ size_t dte_size; /* total size of payload */ dtrace_predicate_t *dte_predicate; /* predicate, if any */ dtrace_action_t *dte_action; /* actions, if any */ dtrace_ecb_t *dte_next; /* next ECB on probe */ dtrace_state_t *dte_state; /* pointer to state */ uint32_t dte_cond; /* security condition */ dtrace_probe_t *dte_probe; /* pointer to probe */ dtrace_action_t *dte_action_last; /* last action on ECB */ uint64_t dte_uarg; /* library argument */};struct dtrace_predicate { dtrace_difo_t *dtp_difo; /* DIF object */ dtrace_cacheid_t dtp_cacheid; /* cache identifier */ int dtp_refcnt; /* reference count */};struct dtrace_action { dtrace_actkind_t dta_kind; /* kind of action */ uint16_t dta_intuple; /* boolean: in aggregation */ uint32_t dta_refcnt; /* reference count */ dtrace_difo_t *dta_difo; /* pointer to DIFO */ dtrace_recdesc_t dta_rec; /* record description */ dtrace_action_t *dta_prev; /* previous action */ dtrace_action_t *dta_next; /* next action */};typedef struct dtrace_aggregation { dtrace_action_t dtag_action; /* action; must be first */ dtrace_aggid_t dtag_id; /* identifier */ dtrace_ecb_t *dtag_ecb; /* corresponding ECB */ dtrace_action_t *dtag_first; /* first action in tuple */ uint32_t dtag_base; /* base of aggregation */ uint64_t dtag_initial; /* initial value */ void (*dtag_aggregate)(uint64_t *, uint64_t);} dtrace_aggregation_t;/* * DTrace Buffers * * Principal buffers, aggregation buffers, and speculative buffers are all * managed with the dtrace_buffer structure. By default, this structure * includes twin data buffers -- dtb_tomax and dtb_xamot -- that serve as the * active and passive buffers, respectively. For speculative buffers, * dtb_xamot will be NULL; for "ring" and "fill" buffers, dtb_xamot will point * to a scratch buffer. For all buffer types, the dtrace_buffer structure is * always allocated on a per-CPU basis; a single dtrace_buffer structure is * never shared among CPUs. (That is, there is never true sharing of the * dtrace_buffer structure; to prevent false sharing of the structure, it must * always be aligned to the coherence granularity -- generally 64 bytes.) * * One of the critical design decisions of DTrace is that a given ECB always * stores the same quantity and type of data. This is done to assure that the * only metadata required for an ECB's traced data is the EPID. That is, from * the EPID, the consumer can determine the data layout. (The data buffer * layout is shown schematically below.) By assuring that one can determine * data layout from the EPID, the metadata stream can be separated from the * data stream -- simplifying the data stream enormously. * * base of data buffer ---> +------+--------------------+------+ * | EPID | data | EPID | * +------+--------+------+----+------+ * | data | EPID | data | * +---------------+------+-----------+ * | data, cont. | * +------+--------------------+------+ * | EPID | data | | * +------+--------------------+ | * | || | * | || | * | \/ | * : : * . . * . . * . . * : : * | | * limit of data buffer ---> +----------------------------------+ * * When evaluating an ECB, dtrace_probe() determines if the ECB's needs of the * principal buffer (both scratch and payload) exceed the available space. If * the ECB's needs exceed available space (and if the principal buffer policy * is the default "switch" policy), the ECB is dropped, the buffer's drop count * is incremented, and processing advances to the next ECB. If the ECB's needs * can be met with the available space, the ECB is processed, but the offset in * the principal buffer is only advanced if the ECB completes processing * without error. * * When a buffer is to be switched (either because the buffer is the principal * buffer with a "switch" policy or because it is an aggregation buffer), a * cross call is issued to the CPU associated with the buffer. In the cross * call context, interrupts are disabled, and the active and the inactive * buffers are atomically switched. This involves switching the data pointers, * copying the various state fields (offset, drops, errors, etc.) into their * inactive equivalents, and clearing the state fields. Because interrupts are * disabled during this procedure, the switch is guaranteed to appear atomic to * dtrace_probe(). * * DTrace Ring Buffering * * To process a ring buffer correctly, one must know the oldest valid record. * Processing starts at the oldest record in the buffer and continues until * the end of the buffer is reached. Processing then resumes starting with * the record stored at offset 0 in the buffer, and continues until the * youngest record is processed. If trace records are of a fixed-length, * determining the oldest record is trivial: * * - If the ring buffer has not wrapped, the oldest record is the record * stored at offset 0. * * - If the ring buffer has wrapped, the oldest record is the record stored * at the current offset. * * With variable length records, however, just knowing the current offset * doesn't suffice for determining the oldest valid record: assuming that one * allows for arbitrary data, one has no way of searching forward from the * current offset to find the oldest valid record. (That is, one has no way * of separating data from metadata.) It would be possible to simply refuse to * process any data in the ring buffer between the current offset and the * limit, but this leaves (potentially) an enormous amount of otherwise valid * data unprocessed. * * To effect ring buffering, we track two offsets in the buffer: the current
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -