dt_lex.l

来自「Sun Solaris 10 中的 DTrace 组件的源代码。请参看: htt」· L 代码 · 共 794 行 · 第 1/2 页

L
794
字号
#else			bcopy(s, ((char *)&yylval.l_int) +			    sizeof (yylval.l_int) - nbytes, nbytes);#endif			return (DT_TOK_INT);		}<S0>"/*"	|<S2>"/*"	{			yypcb->pcb_cstate = (YYSTATE);			BEGIN(S1);		}<S0>{RGX_INTERP} |<S2>{RGX_INTERP}	; /* discard any #! lines */<S0>{RGX_CTL}	|<S2>{RGX_CTL}	{			assert(yypragma == NULL);			yypcb->pcb_cstate = (YYSTATE);			BEGIN(S3);		}<S0>"/"		{			int c, tok;			/*			 * The use of "/" as the predicate delimiter and as the			 * integer division symbol requires special lookahead			 * to avoid a shift/reduce conflict in the D grammar.			 * We look ahead to the next non-whitespace character.			 * If we encounter EOF, ";", "{", or "/", then this "/"			 * closes the predicate and we return DT_TOK_EPRED.			 * If we encounter anything else, it's DT_TOK_DIV.			 */			while ((c = input()) != 0) {				if (strchr("\f\n\r\t\v ", c) == NULL)					break;			}			if (c == 0 || c == ';' || c == '{' || c == '/') {				if (yypcb->pcb_parens != 0) {					yyerror("closing ) expected in "					    "predicate before /\n");				}				if (yypcb->pcb_brackets != 0) {					yyerror("closing ] expected in "					    "predicate before /\n");				}				tok = DT_TOK_EPRED;			} else				tok = DT_TOK_DIV;			unput(c);			return (tok);		}<S0>"("		{			yypcb->pcb_parens++;			return (DT_TOK_LPAR);		}<S0>")"		{			if (--yypcb->pcb_parens < 0)				yyerror("extra ) in input stream\n");			return (DT_TOK_RPAR);		}<S0>"["		{			yypcb->pcb_brackets++;			return (DT_TOK_LBRAC);		}<S0>"]"		{			if (--yypcb->pcb_brackets < 0)				yyerror("extra ] in input stream\n");			return (DT_TOK_RBRAC);		}<S0>"{"		|<S2>"{"		{			yypcb->pcb_braces++;			return ('{');		}<S0>"}"		{			if (--yypcb->pcb_braces < 0)				yyerror("extra } in input stream\n");			return ('}');		}<S0>"|"		return (DT_TOK_BOR);<S0>"^"		return (DT_TOK_XOR);<S0>"&"		return (DT_TOK_BAND);<S0>"&&"	return (DT_TOK_LAND);<S0>"^^"	return (DT_TOK_LXOR);<S0>"||"	return (DT_TOK_LOR);<S0>"=="	return (DT_TOK_EQU);<S0>"!="	return (DT_TOK_NEQ);<S0>"<"		return (DT_TOK_LT);<S0>"<="	return (DT_TOK_LE);<S0>">"		return (DT_TOK_GT);<S0>">="	return (DT_TOK_GE);<S0>"<<"	return (DT_TOK_LSH);<S0>">>"	return (DT_TOK_RSH);<S0>"+"		return (DT_TOK_ADD);<S0>"-"		return (DT_TOK_SUB);<S0>"*"		return (DT_TOK_MUL);<S0>"%"		return (DT_TOK_MOD);<S0>"~"		return (DT_TOK_BNEG);<S0>"!"		return (DT_TOK_LNEG);<S0>"?"		return (DT_TOK_QUESTION);<S0>":"		return (DT_TOK_COLON);<S0>"."		return (DT_TOK_DOT);<S0>"->"	return (DT_TOK_PTR);<S0>"="		return (DT_TOK_ASGN);<S0>"+="	return (DT_TOK_ADD_EQ);<S0>"-="	return (DT_TOK_SUB_EQ);<S0>"*="	return (DT_TOK_MUL_EQ);<S0>"/="	return (DT_TOK_DIV_EQ);<S0>"%="	return (DT_TOK_MOD_EQ);<S0>"&="	return (DT_TOK_AND_EQ);<S0>"^="	return (DT_TOK_XOR_EQ);<S0>"|="	return (DT_TOK_OR_EQ);<S0>"<<="	return (DT_TOK_LSH_EQ);<S0>">>="	return (DT_TOK_RSH_EQ);<S0>"++"	return (DT_TOK_ADDADD);<S0>"--"	return (DT_TOK_SUBSUB);<S0>"..."	return (DT_TOK_ELLIPSIS);<S0>","		return (DT_TOK_COMMA);<S0>";"		return (';');<S0>{RGX_WS}	; /* discard */<S0>"\\"\n	; /* discard */<S0>.		yyerror("syntax error near \"%c\"\n", yytext[0]);<S1>"*/"	BEGIN(yypcb->pcb_cstate);<S1>.|\n	; /* discard */<S2>{RGX_PSPEC}	{			/*			 * S2 has an ambiguity because RGX_PSPEC includes '*'			 * as a glob character and '*' also can be DT_TOK_STAR.			 * Since lex always matches the longest token, this			 * rule can be matched by an input string like "int*",			 * which could begin a global variable declaration such			 * as "int*x;" or could begin a RGX_PSPEC with globbing			 * such as "int* { trace(timestamp); }".  If C_PSPEC is			 * not set, we must resolve the ambiguity in favor of			 * the type and perform lexer pushback if the fragment			 * before '*' or entire fragment matches a type name.			 * If C_PSPEC is set, we always return a PSPEC token.			 * If C_PSPEC is off, the user can avoid ambiguity by			 * including a ':' delimiter in the specifier, which			 * they should be doing anyway to specify the provider.			 */			if (!(yypcb->pcb_cflags & DTRACE_C_PSPEC) &&			    strchr(yytext, ':') == NULL) {				char *p = strchr(yytext, '*');				char *q = yytext + yyleng - 1;				if (p != NULL && p > yytext)					*p = '\0'; /* prune yytext */				if (dt_type_lookup(yytext, NULL) == 0) {					yylval.l_str = strdup(yytext);					if (yylval.l_str == NULL) {						longjmp(yypcb->pcb_jmpbuf,						    EDT_NOMEM);					}					if (p != NULL && p > yytext) {						for (*p = '*'; q >= p; q--)							unput(*q);					}					yybegin(YYS_EXPR);					return (DT_TOK_TNAME);				}				if (p != NULL && p > yytext)					*p = '*'; /* restore yytext */			}			if ((yylval.l_str = strdup(yytext)) == NULL)				longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);			return (DT_TOK_PSPEC);		}<S2>"/"		return (DT_TOK_DIV);<S2>","		return (DT_TOK_COMMA);<S2>{RGX_WS}	; /* discard */<S2>.		yyerror("syntax error near \"%c\"\n", yytext[0]);<S3>\n		{			dt_pragma(yypragma);			yypragma = NULL;			BEGIN(yypcb->pcb_cstate);		}<S3>[\f\t\v ]+	; /* discard */<S3>[^\f\n\t\v "]+ {			dt_node_t *dnp;			if ((yylval.l_str = strdup(yytext)) == NULL)				longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);			/*			 * We want to call dt_node_ident() here, but we can't			 * because it will expand inlined identifiers, which we			 * don't want to do from #pragma context in order to			 * support pragmas that apply to the ident itself.  We			 * call dt_node_string() and then reset dn_op instead.			 */			dnp = dt_node_string(yylval.l_str);			dnp->dn_kind = DT_NODE_IDENT;			dnp->dn_op = DT_TOK_IDENT;			yypragma = dt_node_link(yypragma, dnp);		}<S3>.		yyerror("syntax error near \"%c\"\n", yytext[0]);%%/* * yybegin provides a wrapper for use from C code around the lex BEGIN() macro. * We use two main states for lexing because probe descriptions use a syntax * that is incompatible with the normal D tokens (e.g. names can contain "-"). * yybegin also handles the job of switching between two lists of dt_nodes * as we allocate persistent definitions, like inlines, and transient nodes * that will be freed once we are done parsing the current program file. */voidyybegin(yystate_t state){#ifdef	YYDEBUG	yydebug = _dtrace_debug;#endif	if (yypcb->pcb_yystate == state)		return; /* nothing to do if we're in the state already */	if (yypcb->pcb_yystate == YYS_DEFINE) {        	yypcb->pcb_list = yypcb->pcb_hold;        	yypcb->pcb_hold = NULL;	}	switch (state) {	case YYS_CLAUSE:		BEGIN(S2);		break;	case YYS_DEFINE:		assert(yypcb->pcb_hold == NULL);		yypcb->pcb_hold = yypcb->pcb_list;		yypcb->pcb_list = NULL;		/*FALLTHRU*/	case YYS_EXPR:		BEGIN(S0);		break;	default:		xyerror(D_UNKNOWN, "internal error -- bad yystate %d\n", state);	}	yypcb->pcb_yystate = state;}voidyyinit(dt_pcb_t *pcb){	yypcb = pcb;	yylineno = 1;	yypragma = NULL;	yysptr = yysbuf;}/* * Given a lexeme 's' (typically yytext), set yylval and return an appropriate * token to the parser indicating either an identifier or a typedef name. * User-defined global variables always take precedence over types, but we do * use some heuristics because D programs can look at an ever-changing set of * kernel types and also can implicitly instantiate variables by assignment, * unlike in C.  The code here is ordered carefully as lookups are not cheap. */static intid_or_type(const char *s){	dtrace_hdl_t *dtp = yypcb->pcb_hdl;	int c0, c1, ttok = DT_TOK_TNAME;	dt_ident_t *idp;	if ((s = yylval.l_str = strdup(s)) == NULL)		longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);	/*	 * If the lexeme is a global variable or likely identifier or *not* a	 * type_name, then it is an identifier token.	 */	if (dt_idhash_lookup(dtp->dt_globals, s) != NULL ||	    dt_idhash_lookup(yypcb->pcb_idents, s) != NULL ||	    dt_type_lookup(s, NULL) != 0)		return (DT_TOK_IDENT);	/*	 * If the lexeme is a type name and we are not in a program clause,	 * then always interpret it as a type and return DT_TOK_TNAME.	 */	if ((YYSTATE) != S0)		return (DT_TOK_TNAME);	/*	 * If the lexeme matches a type name but is in a program clause, then	 * it could be a type or it could be an undefined variable.  Peek at	 * the next token to decide.  If we see ++, --, [, or =, we know there	 * might be an assignment that is trying to create a global variable,	 * so we optimistically return DT_TOK_IDENT.  There is no harm in being	 * wrong: a type_name followed by ++, --, [, or = is a syntax error.	 */	while ((c0 = input()) != 0) {		if (strchr("\f\n\r\t\v ", c0) == NULL)			break;	}	switch (c0) {	case '+':	case '-':		if ((c1 = input()) == c0)			ttok = DT_TOK_IDENT;		unput(c1);		break;	case '=':		if ((c1 = input()) != c0)			ttok = DT_TOK_IDENT;		unput(c1);		break;	case '[':		ttok = DT_TOK_IDENT;		break;	}	if (ttok == DT_TOK_IDENT) {		idp = dt_idhash_insert(yypcb->pcb_idents, s, DT_IDENT_SCALAR, 0,		    0, _dtrace_defattr, 0, &dt_idops_thaw, NULL, dtp->dt_gen);		if (idp == NULL)			longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);	}	unput(c0);	return (ttok);}static intinput(void){	int c;	if (yysptr > yysbuf)		c = *--yysptr;	else if (yypcb->pcb_fileptr != NULL)		c = fgetc(yypcb->pcb_fileptr);	else if (yypcb->pcb_strptr < yypcb->pcb_string + yypcb->pcb_strlen)		c = *yypcb->pcb_strptr++;	else		c = EOF;	if (c == '\n')		yylineno++;	if (c != EOF)		return (c);	if ((YYSTATE) == S1)		yyerror("end-of-file encountered before matching */\n");	if ((YYSTATE) == S3)		yyerror("end-of-file encountered before end of control line\n");	if (yypcb->pcb_fileptr != NULL && ferror(yypcb->pcb_fileptr))		longjmp(yypcb->pcb_jmpbuf, EDT_FIO);	return (0); /* EOF */}static voidunput(int c){	if (c == '\n')		yylineno--;	*yysptr++ = c;	yytchar = c;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?