📄 lex.c
字号:
/* * lexical analysis and source input */#include "sh.h"#include <ctype.h>/* Structure to keep track of the lexing state and the various pieces of info * needed for each particular state. */typedef struct lex_state Lex_state;struct lex_state { int ls_state; union { /* $(...) */ struct scsparen_info { int nparen; /* count open parenthesis */ int csstate; /* XXX remove */#define ls_scsparen ls_info.u_scsparen } u_scsparen; /* $((...)) */ struct sasparen_info { int nparen; /* count open parenthesis */ int start; /* marks start of $(( in output str */#define ls_sasparen ls_info.u_sasparen } u_sasparen; /* ((...)) */ struct sletparen_info { int nparen; /* count open parenthesis */#define ls_sletparen ls_info.u_sletparen } u_sletparen; /* `...` */ struct sbquote_info { int indquotes; /* true if in double quotes: "`...`" */#define ls_sbquote ls_info.u_sbquote } u_sbquote; Lex_state *base; /* used to point to next state block */ } ls_info;};typedef struct State_info State_info;struct State_info { Lex_state *base; Lex_state *end;};static void readhere ARGS((struct ioword *iop));static int getsc__ ARGS((void));static void getsc_line ARGS((Source *s));static int getsc_bn ARGS((void));static char *get_brace_var ARGS((XString *wsp, char *wp));static int arraysub ARGS((char **strp));static const char *ungetsc ARGS((int c));static void gethere ARGS((void));static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));static int backslash_skip;static int ignore_backslash_newline;/* optimized getsc_bn() */#define getsc() (*source->str != '\0' && *source->str != '\\' \ && !backslash_skip ? *source->str++ : getsc_bn())/* optimized getsc__() */#define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())#define STATE_BSIZE 32#define PUSH_STATE(s) do { \ if (++statep == state_info.end) \ statep = push_state_(&state_info, statep); \ state = statep->ls_state = (s); \ } while (0)#define POP_STATE() do { \ if (--statep == state_info.base) \ statep = pop_state_(&state_info, statep); \ state = statep->ls_state; \ } while (0)/* * Lexical analyzer * * tokens are not regular expressions, they are LL(1). * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". * hence the state stack. */intyylex(cf) int cf;{ Lex_state states[STATE_BSIZE], *statep; State_info state_info; register int c, state; XString ws; /* expandable output word */ register char *wp; /* output word pointer */ char *sp, *dp; int c2; Again: states[0].ls_state = -1; states[0].ls_info.base = (Lex_state *) 0; statep = &states[1]; state_info.base = states; state_info.end = &states[STATE_BSIZE]; Xinit(ws, wp, 64, ATEMP); backslash_skip = 0; ignore_backslash_newline = 0; if (cf&ONEWORD) state = SWORD;#ifdef KSH else if (cf&LETEXPR) { *wp++ = OQUOTE; /* enclose arguments in (double) quotes */ state = SLETPAREN; statep->ls_sletparen.nparen = 0; }#endif /* KSH */ else { /* normal lexing */ state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; while ((c = getsc()) == ' ' || c == '\t') ; if (c == '#') { ignore_backslash_newline++; while ((c = getsc()) != '\0' && c != '\n') ; ignore_backslash_newline--; } ungetsc(c); } if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */ source->flags &= ~SF_ALIAS; /* In POSIX mode, a trailing space only counts if we are * parsing a simple command */ if (!Flag(FPOSIX) || (cf & CMDWORD)) cf |= ALIAS; } /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */ statep->ls_state = state; /* collect non-special or quoted characters to form word */ while (!((c = getsc()) == 0 || ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) { Xcheck(ws, wp); switch (state) { case SBASE: if (c == '[' && (cf & (VARASN|ARRAYVAR))) { *wp = EOS; /* temporary */ if (is_wdvarname(Xstring(ws, wp), FALSE)) { char *p, *tmp; if (arraysub(&tmp)) { *wp++ = CHAR; *wp++ = c; for (p = tmp; *p; ) { Xcheck(ws, wp); *wp++ = CHAR; *wp++ = *p++; } afree(tmp, ATEMP); break; } else { Source *s; s = pushs(SREREAD, source->areap); s->start = s->str = s->u.freeme = tmp; s->next = source; source = s; } } *wp++ = CHAR; *wp++ = c; break; } /* fall through.. */ Sbase1: /* includes *(...|...) pattern (*+?@!) */#ifdef KSH if (c == '*' || c == '@' || c == '+' || c == '?' || c == '!') { c2 = getsc(); if (c2 == '(' /*)*/ ) { *wp++ = OPAT; *wp++ = c; PUSH_STATE(SPATTERN); break; } ungetsc(c2); }#endif /* KSH */ /* fall through.. */ Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ switch (c) { case '\\': c = getsc();#ifdef OS2 if (isalnum(c)) { *wp++ = CHAR, *wp++ = '\\'; *wp++ = CHAR, *wp++ = c; } else #endif if (c) /* trailing \ is lost */ *wp++ = QCHAR, *wp++ = c; break; case '\'': *wp++ = OQUOTE; ignore_backslash_newline++; PUSH_STATE(SSQUOTE); break; case '"': *wp++ = OQUOTE; PUSH_STATE(SDQUOTE); break; default: goto Subst; } break; Subst: switch (c) { case '\\': c = getsc(); switch (c) { case '"': case '\\': case '$': case '`': *wp++ = QCHAR, *wp++ = c; break; default: Xcheck(ws, wp); if (c) { /* trailing \ is lost */ *wp++ = CHAR, *wp++ = '\\'; *wp++ = CHAR, *wp++ = c; } break; } break; case '$': c = getsc(); if (c == '(') /*)*/ { c = getsc(); if (c == '(') /*)*/ { PUSH_STATE(SASPAREN); statep->ls_sasparen.nparen = 2; statep->ls_sasparen.start = Xsavepos(ws, wp); *wp++ = EXPRSUB; } else { ungetsc(c); PUSH_STATE(SCSPAREN); statep->ls_scsparen.nparen = 1; statep->ls_scsparen.csstate = 0; *wp++ = COMSUB; } } else if (c == '{') /*}*/ { *wp++ = OSUBST; *wp++ = '{'; /*}*/ wp = get_brace_var(&ws, wp); c = getsc(); /* allow :# and :% (ksh88 compat) */ if (c == ':') { *wp++ = CHAR, *wp++ = c; c = getsc(); } /* If this is a trim operation, * treat (,|,) specially in STBRACE. */ if (c == '#' || c == '%') { ungetsc(c); PUSH_STATE(STBRACE); } else { ungetsc(c); PUSH_STATE(SBRACE); } } else if (ctype(c, C_ALPHA)) { *wp++ = OSUBST; *wp++ = 'X'; do { Xcheck(ws, wp); *wp++ = c; c = getsc(); } while (ctype(c, C_ALPHA|C_DIGIT)); *wp++ = '\0'; *wp++ = CSUBST; *wp++ = 'X'; ungetsc(c); } else if (ctype(c, C_DIGIT|C_VAR1)) { Xcheck(ws, wp); *wp++ = OSUBST; *wp++ = 'X'; *wp++ = c; *wp++ = '\0'; *wp++ = CSUBST; *wp++ = 'X'; } else { *wp++ = CHAR, *wp++ = '$'; ungetsc(c); } break; case '`': PUSH_STATE(SBQUOTE); *wp++ = COMSUB; /* Need to know if we are inside double quotes * since sh/at&t-ksh translate the \" to " in * "`..\"..`". * This is not done in posix mode (section * 3.2.3, Double Quotes: "The backquote shall * retain its special meaning introducing the * other form of command substitution (see * 3.6.3). The portion of the quoted string * from the initial backquote and the * characters up to the next backquote that * is not preceded by a backslash (having * escape characters removed) defines that * command whose output replaces `...` when * the word is expanded." * Section 3.6.3, Command Substitution: * "Within the backquoted style of command * substitution, backslash shall retain its * literal meaning, except when followed by * $ ` \."). */ statep->ls_sbquote.indquotes = 0; if (!Flag(FPOSIX)) { Lex_state *s = statep; Lex_state *base = state_info.base; while (1) { for (; s != base; s--) { if (s->ls_state == SDQUOTE) { statep->ls_sbquote.indquotes = 1; break; } } if (s != base) break; if (!(s = s->ls_info.base)) break; base = s-- - STATE_BSIZE; } } break; default: *wp++ = CHAR, *wp++ = c; } break; case SSQUOTE: if (c == '\'') { POP_STATE(); *wp++ = CQUOTE; ignore_backslash_newline--; } else *wp++ = QCHAR, *wp++ = c; break; case SDQUOTE: if (c == '"') { POP_STATE(); *wp++ = CQUOTE; } else goto Subst; break; case SCSPAREN: /* $( .. ) */ /* todo: deal with $(...) quoting properly * kludge to partly fake quoting inside $(..): doesn't * really work because nested $(..) or ${..} inside * double quotes aren't dealt with. */ switch (statep->ls_scsparen.csstate) { case 0: /* normal */ switch (c) { case '(': statep->ls_scsparen.nparen++; break; case ')': statep->ls_scsparen.nparen--; break; case '\\': statep->ls_scsparen.csstate = 1; break; case '"': statep->ls_scsparen.csstate = 2; break; case '\'': statep->ls_scsparen.csstate = 4; ignore_backslash_newline++; break; } break; case 1: /* backslash in normal mode */ case 3: /* backslash in double quotes */ --statep->ls_scsparen.csstate; break; case 2: /* double quotes */ if (c == '"') statep->ls_scsparen.csstate = 0; else if (c == '\\') statep->ls_scsparen.csstate = 3; break; case 4: /* single quotes */ if (c == '\'') { statep->ls_scsparen.csstate = 0; ignore_backslash_newline--; } break; } if (statep->ls_scsparen.nparen == 0) { POP_STATE(); *wp++ = 0; /* end of COMSUB */ } else *wp++ = c; break; case SASPAREN: /* $(( .. )) */ /* todo: deal with $((...); (...)) properly */ /* XXX should nest using existing state machine * (embed "..", $(...), etc.) */ if (c == '(') statep->ls_sasparen.nparen++; else if (c == ')') { statep->ls_sasparen.nparen--; if (statep->ls_sasparen.nparen == 1) { /*(*/ if ((c2 = getsc()) == ')') { POP_STATE(); *wp++ = 0; /* end of EXPRSUB */ break; } else { char *s; ungetsc(c2); /* mismatched parenthesis - * assume we were really * parsing a $(..) expression */ s = Xrestpos(ws, wp, statep->ls_sasparen.start); memmove(s + 1, s, wp - s); *s++ = COMSUB; *s = '('; /*)*/ wp++; statep->ls_scsparen.nparen = 1; statep->ls_scsparen.csstate = 0; state = statep->ls_state = SCSPAREN; } } } *wp++ = c; break; case SBRACE: /*{*/ if (c == '}') { POP_STATE(); *wp++ = CSUBST; *wp++ = /*{*/ '}'; } else goto Sbase1; break; case STBRACE: /* Same as SBRACE, except (,|,) treated specially */ /*{*/ if (c == '}') { POP_STATE(); *wp++ = CSUBST; *wp++ = /*{*/ '}'; } else if (c == '|') { *wp++ = SPAT; } else if (c == '(') { *wp++ = OPAT; *wp++ = ' '; /* simile for @ */ PUSH_STATE(SPATTERN); } else goto Sbase1; break; case SBQUOTE: if (c == '`') { *wp++ = 0; POP_STATE(); } else if (c == '\\') { switch (c = getsc()) { case '\\': case '$': case '`': *wp++ = c; break; case '"': if (statep->ls_sbquote.indquotes) { *wp++ = c; break; } /* fall through.. */ default: if (c) { /* trailing \ is lost */ *wp++ = '\\'; *wp++ = c; } break; } } else *wp++ = c; break; case SWORD: /* ONEWORD */ goto Subst;#ifdef KSH case SLETPAREN: /* LETEXPR: (( ... )) */ /*(*/ if (c == ')') { if (statep->ls_sletparen.nparen > 0) --statep->ls_sletparen.nparen; /*(*/ else if ((c2 = getsc()) == ')') { c = 0; *wp++ = CQUOTE; goto Done; } else ungetsc(c2); } else if (c == '(') /* parenthesis inside quotes and backslashes * are lost, but at&t ksh doesn't count them * either */ ++statep->ls_sletparen.nparen; goto Sbase2;#endif /* KSH */ case SHEREDELIM: /* <<,<<- delimiter */ /* XXX chuck this state (and the next) - use * the existing states ($ and \`..` should be * stripped of their specialness after the * fact). */ /* here delimiters need a special case since * $ and `..` are not to be treated specially */ if (c == '\\') { c = getsc(); if (c) { /* trailing \ is lost */ *wp++ = QCHAR; *wp++ = c; } } else if (c == '\'') { PUSH_STATE(SSQUOTE); *wp++ = OQUOTE; ignore_backslash_newline++; } else if (c == '"') { state = statep->ls_state = SHEREDQUOTE; *wp++ = OQUOTE; } else { *wp++ = CHAR; *wp++ = c; } break; case SHEREDQUOTE: /* " in <<,<<- delimiter */ if (c == '"') { *wp++ = CQUOTE; state = statep->ls_state = SHEREDELIM; } else { if (c == '\\') { switch (c = getsc()) { case '\\': case '"': case '$': case '`': break; default: if (c) { /* trailing \ lost */ *wp++ = CHAR; *wp++ = '\\'; } break; } } *wp++ = CHAR; *wp++ = c; } break; case SPATTERN: /* in *(...|...) pattern (*+?@!) */ if ( /*(*/ c == ')') { *wp++ = CPAT; POP_STATE(); } else if (c == '|') { *wp++ = SPAT; } else if (c == '(') { *wp++ = OPAT; *wp++ = ' '; /* simile for @ */ PUSH_STATE(SPATTERN); } else goto Sbase1; break; } }Done: Xcheck(ws, wp); if (statep != &states[1]) /* XXX figure out what is missing */ yyerror("no closing quote\n"); /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ if (state == SHEREDELIM) state = SBASE; dp = Xstring(ws, wp); if ((c == '<' || c == '>') && state == SBASE && ((c2 = Xlength(ws, wp)) == 0 || (c2 == 2 && dp[0] == CHAR && digit(dp[1])))) { struct ioword *iop = (struct ioword *) alloc(sizeof(*iop), ATEMP); if (c2 == 2) iop->unit = dp[1] - '0'; else iop->unit = c == '>'; /* 0 for <, 1 for > */ c2 = getsc(); /* <<, >>, <> are ok, >< is not */ if (c == c2 || (c == '<' && c2 == '>')) { iop->flag = c == c2 ? (c == '>' ? IOCAT : IOHERE) : IORDWR; if (iop->flag == IOHERE) if ((c2 = getsc()) == '-') iop->flag |= IOSKIP; else ungetsc(c2); } else if (c2 == '&') iop->flag = IODUP | (c == '<' ? IORDUP : 0); else { iop->flag = c == '>' ? IOWRITE : IOREAD; if (c == '>' && c2 == '|') iop->flag |= IOCLOB; else ungetsc(c2); } iop->name = (char *) 0; iop->delim = (char *) 0; iop->heredoc = (char *) 0; Xfree(ws, wp); /* free word */ yylval.iop = iop; return REDIR; } if (wp == dp && state == SBASE) { Xfree(ws, wp); /* free word */ /* no word, process LEX1 character */ switch (c) { default: return c; case '|': case '&': case ';': if ((c2 = getsc()) == c) c = (c == ';') ? BREAK : (c == '|') ? LOGOR : (c == '&') ? LOGAND : YYERRCODE;#ifdef KSH else if (c == '|' && c2 == '&') c = COPROC;#endif /* KSH */ else ungetsc(c2); return c; case '\n': gethere(); if (cf & CONTIN) goto Again;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -