dt_lex.l
来自「Sun Solaris 10 中的 DTrace 组件的源代码。请参看: htt」· L 代码 · 共 794 行 · 第 1/2 页
L
794 行
#else bcopy(s, ((char *)&yylval.l_int) + sizeof (yylval.l_int) - nbytes, nbytes);#endif return (DT_TOK_INT); }<S0>"/*" |<S2>"/*" { yypcb->pcb_cstate = (YYSTATE); BEGIN(S1); }<S0>{RGX_INTERP} |<S2>{RGX_INTERP} ; /* discard any #! lines */<S0>{RGX_CTL} |<S2>{RGX_CTL} { assert(yypragma == NULL); yypcb->pcb_cstate = (YYSTATE); BEGIN(S3); }<S0>"/" { int c, tok; /* * The use of "/" as the predicate delimiter and as the * integer division symbol requires special lookahead * to avoid a shift/reduce conflict in the D grammar. * We look ahead to the next non-whitespace character. * If we encounter EOF, ";", "{", or "/", then this "/" * closes the predicate and we return DT_TOK_EPRED. * If we encounter anything else, it's DT_TOK_DIV. */ while ((c = input()) != 0) { if (strchr("\f\n\r\t\v ", c) == NULL) break; } if (c == 0 || c == ';' || c == '{' || c == '/') { if (yypcb->pcb_parens != 0) { yyerror("closing ) expected in " "predicate before /\n"); } if (yypcb->pcb_brackets != 0) { yyerror("closing ] expected in " "predicate before /\n"); } tok = DT_TOK_EPRED; } else tok = DT_TOK_DIV; unput(c); return (tok); }<S0>"(" { yypcb->pcb_parens++; return (DT_TOK_LPAR); }<S0>")" { if (--yypcb->pcb_parens < 0) yyerror("extra ) in input stream\n"); return (DT_TOK_RPAR); }<S0>"[" { yypcb->pcb_brackets++; return (DT_TOK_LBRAC); }<S0>"]" { if (--yypcb->pcb_brackets < 0) yyerror("extra ] in input stream\n"); return (DT_TOK_RBRAC); }<S0>"{" |<S2>"{" { yypcb->pcb_braces++; return ('{'); }<S0>"}" { if (--yypcb->pcb_braces < 0) yyerror("extra } in input stream\n"); return ('}'); }<S0>"|" return (DT_TOK_BOR);<S0>"^" return (DT_TOK_XOR);<S0>"&" return (DT_TOK_BAND);<S0>"&&" return (DT_TOK_LAND);<S0>"^^" return (DT_TOK_LXOR);<S0>"||" return (DT_TOK_LOR);<S0>"==" return (DT_TOK_EQU);<S0>"!=" return (DT_TOK_NEQ);<S0>"<" return (DT_TOK_LT);<S0>"<=" return (DT_TOK_LE);<S0>">" return (DT_TOK_GT);<S0>">=" return (DT_TOK_GE);<S0>"<<" return (DT_TOK_LSH);<S0>">>" return (DT_TOK_RSH);<S0>"+" return (DT_TOK_ADD);<S0>"-" return (DT_TOK_SUB);<S0>"*" return (DT_TOK_MUL);<S0>"%" return (DT_TOK_MOD);<S0>"~" return (DT_TOK_BNEG);<S0>"!" return (DT_TOK_LNEG);<S0>"?" return (DT_TOK_QUESTION);<S0>":" return (DT_TOK_COLON);<S0>"." return (DT_TOK_DOT);<S0>"->" return (DT_TOK_PTR);<S0>"=" return (DT_TOK_ASGN);<S0>"+=" return (DT_TOK_ADD_EQ);<S0>"-=" return (DT_TOK_SUB_EQ);<S0>"*=" return (DT_TOK_MUL_EQ);<S0>"/=" return (DT_TOK_DIV_EQ);<S0>"%=" return (DT_TOK_MOD_EQ);<S0>"&=" return (DT_TOK_AND_EQ);<S0>"^=" return (DT_TOK_XOR_EQ);<S0>"|=" return (DT_TOK_OR_EQ);<S0>"<<=" return (DT_TOK_LSH_EQ);<S0>">>=" return (DT_TOK_RSH_EQ);<S0>"++" return (DT_TOK_ADDADD);<S0>"--" return (DT_TOK_SUBSUB);<S0>"..." return (DT_TOK_ELLIPSIS);<S0>"," return (DT_TOK_COMMA);<S0>";" return (';');<S0>{RGX_WS} ; /* discard */<S0>"\\"\n ; /* discard */<S0>. yyerror("syntax error near \"%c\"\n", yytext[0]);<S1>"*/" BEGIN(yypcb->pcb_cstate);<S1>.|\n ; /* discard */<S2>{RGX_PSPEC} { /* * S2 has an ambiguity because RGX_PSPEC includes '*' * as a glob character and '*' also can be DT_TOK_STAR. * Since lex always matches the longest token, this * rule can be matched by an input string like "int*", * which could begin a global variable declaration such * as "int*x;" or could begin a RGX_PSPEC with globbing * such as "int* { trace(timestamp); }". If C_PSPEC is * not set, we must resolve the ambiguity in favor of * the type and perform lexer pushback if the fragment * before '*' or entire fragment matches a type name. * If C_PSPEC is set, we always return a PSPEC token. * If C_PSPEC is off, the user can avoid ambiguity by * including a ':' delimiter in the specifier, which * they should be doing anyway to specify the provider. */ if (!(yypcb->pcb_cflags & DTRACE_C_PSPEC) && strchr(yytext, ':') == NULL) { char *p = strchr(yytext, '*'); char *q = yytext + yyleng - 1; if (p != NULL && p > yytext) *p = '\0'; /* prune yytext */ if (dt_type_lookup(yytext, NULL) == 0) { yylval.l_str = strdup(yytext); if (yylval.l_str == NULL) { longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); } if (p != NULL && p > yytext) { for (*p = '*'; q >= p; q--) unput(*q); } yybegin(YYS_EXPR); return (DT_TOK_TNAME); } if (p != NULL && p > yytext) *p = '*'; /* restore yytext */ } if ((yylval.l_str = strdup(yytext)) == NULL) longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); return (DT_TOK_PSPEC); }<S2>"/" return (DT_TOK_DIV);<S2>"," return (DT_TOK_COMMA);<S2>{RGX_WS} ; /* discard */<S2>. yyerror("syntax error near \"%c\"\n", yytext[0]);<S3>\n { dt_pragma(yypragma); yypragma = NULL; BEGIN(yypcb->pcb_cstate); }<S3>[\f\t\v ]+ ; /* discard */<S3>[^\f\n\t\v "]+ { dt_node_t *dnp; if ((yylval.l_str = strdup(yytext)) == NULL) longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); /* * We want to call dt_node_ident() here, but we can't * because it will expand inlined identifiers, which we * don't want to do from #pragma context in order to * support pragmas that apply to the ident itself. We * call dt_node_string() and then reset dn_op instead. */ dnp = dt_node_string(yylval.l_str); dnp->dn_kind = DT_NODE_IDENT; dnp->dn_op = DT_TOK_IDENT; yypragma = dt_node_link(yypragma, dnp); }<S3>. yyerror("syntax error near \"%c\"\n", yytext[0]);%%/* * yybegin provides a wrapper for use from C code around the lex BEGIN() macro. * We use two main states for lexing because probe descriptions use a syntax * that is incompatible with the normal D tokens (e.g. names can contain "-"). * yybegin also handles the job of switching between two lists of dt_nodes * as we allocate persistent definitions, like inlines, and transient nodes * that will be freed once we are done parsing the current program file. */voidyybegin(yystate_t state){#ifdef YYDEBUG yydebug = _dtrace_debug;#endif if (yypcb->pcb_yystate == state) return; /* nothing to do if we're in the state already */ if (yypcb->pcb_yystate == YYS_DEFINE) { yypcb->pcb_list = yypcb->pcb_hold; yypcb->pcb_hold = NULL; } switch (state) { case YYS_CLAUSE: BEGIN(S2); break; case YYS_DEFINE: assert(yypcb->pcb_hold == NULL); yypcb->pcb_hold = yypcb->pcb_list; yypcb->pcb_list = NULL; /*FALLTHRU*/ case YYS_EXPR: BEGIN(S0); break; default: xyerror(D_UNKNOWN, "internal error -- bad yystate %d\n", state); } yypcb->pcb_yystate = state;}voidyyinit(dt_pcb_t *pcb){ yypcb = pcb; yylineno = 1; yypragma = NULL; yysptr = yysbuf;}/* * Given a lexeme 's' (typically yytext), set yylval and return an appropriate * token to the parser indicating either an identifier or a typedef name. * User-defined global variables always take precedence over types, but we do * use some heuristics because D programs can look at an ever-changing set of * kernel types and also can implicitly instantiate variables by assignment, * unlike in C. The code here is ordered carefully as lookups are not cheap. */static intid_or_type(const char *s){ dtrace_hdl_t *dtp = yypcb->pcb_hdl; int c0, c1, ttok = DT_TOK_TNAME; dt_ident_t *idp; if ((s = yylval.l_str = strdup(s)) == NULL) longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); /* * If the lexeme is a global variable or likely identifier or *not* a * type_name, then it is an identifier token. */ if (dt_idhash_lookup(dtp->dt_globals, s) != NULL || dt_idhash_lookup(yypcb->pcb_idents, s) != NULL || dt_type_lookup(s, NULL) != 0) return (DT_TOK_IDENT); /* * If the lexeme is a type name and we are not in a program clause, * then always interpret it as a type and return DT_TOK_TNAME. */ if ((YYSTATE) != S0) return (DT_TOK_TNAME); /* * If the lexeme matches a type name but is in a program clause, then * it could be a type or it could be an undefined variable. Peek at * the next token to decide. If we see ++, --, [, or =, we know there * might be an assignment that is trying to create a global variable, * so we optimistically return DT_TOK_IDENT. There is no harm in being * wrong: a type_name followed by ++, --, [, or = is a syntax error. */ while ((c0 = input()) != 0) { if (strchr("\f\n\r\t\v ", c0) == NULL) break; } switch (c0) { case '+': case '-': if ((c1 = input()) == c0) ttok = DT_TOK_IDENT; unput(c1); break; case '=': if ((c1 = input()) != c0) ttok = DT_TOK_IDENT; unput(c1); break; case '[': ttok = DT_TOK_IDENT; break; } if (ttok == DT_TOK_IDENT) { idp = dt_idhash_insert(yypcb->pcb_idents, s, DT_IDENT_SCALAR, 0, 0, _dtrace_defattr, 0, &dt_idops_thaw, NULL, dtp->dt_gen); if (idp == NULL) longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM); } unput(c0); return (ttok);}static intinput(void){ int c; if (yysptr > yysbuf) c = *--yysptr; else if (yypcb->pcb_fileptr != NULL) c = fgetc(yypcb->pcb_fileptr); else if (yypcb->pcb_strptr < yypcb->pcb_string + yypcb->pcb_strlen) c = *yypcb->pcb_strptr++; else c = EOF; if (c == '\n') yylineno++; if (c != EOF) return (c); if ((YYSTATE) == S1) yyerror("end-of-file encountered before matching */\n"); if ((YYSTATE) == S3) yyerror("end-of-file encountered before end of control line\n"); if (yypcb->pcb_fileptr != NULL && ferror(yypcb->pcb_fileptr)) longjmp(yypcb->pcb_jmpbuf, EDT_FIO); return (0); /* EOF */}static voidunput(int c){ if (c == '\n') yylineno--; *yysptr++ = c; yytchar = c;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?