📄 lex.c

📁 一个开放源代码的 AT&T 的 Korn Shell 的复制品, 支持大多数 ksh89 的特性。
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * lexical analysis and source input */#include "sh.h"#include <ctype.h>/* Structure to keep track of the lexing state and the various pieces of info * needed for each particular state. */typedef struct lex_state Lex_state;struct lex_state {	int ls_state;	union {	    /* $(...) */	    struct scsparen_info {		    int nparen;		/* count open parenthesis */		    int csstate; /* XXX remove */#define ls_scsparen ls_info.u_scsparen	    } u_scsparen;	    /* $((...)) */	    struct sasparen_info {		    int nparen;		/* count open parenthesis */		    int start;		/* marks start of $(( in output str */#define ls_sasparen ls_info.u_sasparen	    } u_sasparen;	    /* ((...)) */	    struct sletparen_info {		    int nparen;		/* count open parenthesis */#define ls_sletparen ls_info.u_sletparen	    } u_sletparen;	    /* `...` */	    struct sbquote_info {		    int indquotes;	/* true if in double quotes: "`...`" */#define ls_sbquote ls_info.u_sbquote	    } u_sbquote;	    Lex_state *base;		/* used to point to next state block */	} ls_info;};typedef struct State_info State_info;struct State_info {	Lex_state	*base;	Lex_state	*end;};static void	readhere ARGS((struct ioword *iop));static int	getsc__ ARGS((void));static void	getsc_line ARGS((Source *s));static int	getsc_bn ARGS((void));static char	*get_brace_var ARGS((XString *wsp, char *wp));static int	arraysub ARGS((char **strp));static const char *ungetsc ARGS((int c));static void	gethere ARGS((void));static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));static int backslash_skip;static int ignore_backslash_newline;/* optimized getsc_bn() */#define getsc()		(*source->str != '\0' && *source->str != '\\' \			 && !backslash_skip ? *source->str++ : getsc_bn())/* optimized getsc__() */#define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())#define STATE_BSIZE	32#define PUSH_STATE(s)	do { \			    if (++statep == state_info.end) \				statep = push_state_(&state_info, statep); \			    state = statep->ls_state = (s); \			} while (0)#define POP_STATE()	do { \			    if (--statep == state_info.base) \				statep = pop_state_(&state_info, statep); \			    state = statep->ls_state; \			} while (0)/* * Lexical analyzer * * tokens are not regular expressions, they are LL(1). * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". * hence the state stack. */intyylex(cf)	int cf;{	Lex_state states[STATE_BSIZE], *statep;	State_info state_info;	register int c, state;	XString ws;		/* expandable output word */	register char *wp;	/* output word pointer */	char *sp, *dp;	int c2;  Again:	states[0].ls_state = -1;	states[0].ls_info.base = (Lex_state *) 0;	statep = &states[1];	state_info.base = states;	state_info.end = &states[STATE_BSIZE];	Xinit(ws, wp, 64, ATEMP);	backslash_skip = 0;	ignore_backslash_newline = 0;	if (cf&ONEWORD)		state = SWORD;#ifdef KSH	else if (cf&LETEXPR) {		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */		state = SLETPAREN;			statep->ls_sletparen.nparen = 0;	}#endif /* KSH */	else {		/* normal lexing */		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;		while ((c = getsc()) == ' ' || c == '\t')			;		if (c == '#') {			ignore_backslash_newline++;			while ((c = getsc()) != '\0' && c != '\n')				;			ignore_backslash_newline--;		}		ungetsc(c);	}	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */		source->flags &= ~SF_ALIAS;		/* In POSIX mode, a trailing space only counts if we are		 * parsing a simple command		 */		if (!Flag(FPOSIX) || (cf & CMDWORD))			cf |= ALIAS;	}	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */	statep->ls_state = state;	/* collect non-special or quoted characters to form word */	while (!((c = getsc()) == 0		 || ((state == SBASE || state == SHEREDELIM)		     && ctype(c, C_LEX1))))	{		Xcheck(ws, wp);		switch (state) {		  case SBASE:			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {				*wp = EOS; /* temporary */				if (is_wdvarname(Xstring(ws, wp), FALSE))				{					char *p, *tmp;					if (arraysub(&tmp)) {						*wp++ = CHAR;						*wp++ = c;						for (p = tmp; *p; ) {							Xcheck(ws, wp);							*wp++ = CHAR;							*wp++ = *p++;						}						afree(tmp, ATEMP);						break;					} else {						Source *s;						s = pushs(SREREAD,							  source->areap);						s->start = s->str							= s->u.freeme = tmp;						s->next = source;						source = s;					}				}				*wp++ = CHAR;				*wp++ = c;				break;			}			/* fall through.. */		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */#ifdef KSH			if (c == '*' || c == '@' || c == '+' || c == '?'			    || c == '!')			{				c2 = getsc();				if (c2 == '(' /*)*/ ) {					*wp++ = OPAT;					*wp++ = c;					PUSH_STATE(SPATTERN);					break;				}				ungetsc(c2);			}#endif /* KSH */			/* fall through.. */		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */			switch (c) {			  case '\\':				c = getsc();#ifdef OS2				if (isalnum(c)) {					*wp++ = CHAR, *wp++ = '\\';					*wp++ = CHAR, *wp++ = c;				} else #endif				if (c) /* trailing \ is lost */					*wp++ = QCHAR, *wp++ = c;				break;			  case '\'':				*wp++ = OQUOTE;				ignore_backslash_newline++;				PUSH_STATE(SSQUOTE);				break;			  case '"':				*wp++ = OQUOTE;				PUSH_STATE(SDQUOTE);				break;			  default:				goto Subst;			}			break;		  Subst:			switch (c) {			  case '\\':				c = getsc();				switch (c) {				  case '"': case '\\':				  case '$': case '`':					*wp++ = QCHAR, *wp++ = c;					break;				  default:					Xcheck(ws, wp);					if (c) { /* trailing \ is lost */						*wp++ = CHAR, *wp++ = '\\';						*wp++ = CHAR, *wp++ = c;					}					break;				}				break;			  case '$':				c = getsc();				if (c == '(') /*)*/ {					c = getsc();					if (c == '(') /*)*/ {						PUSH_STATE(SASPAREN);						statep->ls_sasparen.nparen = 2;						statep->ls_sasparen.start =							Xsavepos(ws, wp);						*wp++ = EXPRSUB;					} else {						ungetsc(c);						PUSH_STATE(SCSPAREN);						statep->ls_scsparen.nparen = 1;						statep->ls_scsparen.csstate = 0;						*wp++ = COMSUB;					}				} else if (c == '{') /*}*/ {					*wp++ = OSUBST;					*wp++ = '{'; /*}*/					wp = get_brace_var(&ws, wp);					c = getsc();					/* allow :# and :% (ksh88 compat) */					if (c == ':') {						*wp++ = CHAR, *wp++ = c;						c = getsc();					}					/* If this is a trim operation,					 * treat (,|,) specially in STBRACE.					 */					if (c == '#' || c == '%') {						ungetsc(c);						PUSH_STATE(STBRACE);					} else {						ungetsc(c);						PUSH_STATE(SBRACE);					}				} else if (ctype(c, C_ALPHA)) {					*wp++ = OSUBST;					*wp++ = 'X';					do {						Xcheck(ws, wp);						*wp++ = c;						c = getsc();					} while (ctype(c, C_ALPHA|C_DIGIT));					*wp++ = '\0';					*wp++ = CSUBST;					*wp++ = 'X';					ungetsc(c);				} else if (ctype(c, C_DIGIT|C_VAR1)) {					Xcheck(ws, wp);					*wp++ = OSUBST;					*wp++ = 'X';					*wp++ = c;					*wp++ = '\0';					*wp++ = CSUBST;					*wp++ = 'X';				} else {					*wp++ = CHAR, *wp++ = '$';					ungetsc(c);				}				break;			  case '`':				PUSH_STATE(SBQUOTE);				*wp++ = COMSUB;				/* Need to know if we are inside double quotes				 * since sh/at&t-ksh translate the \" to " in				 * "`..\"..`".				 * This is not done in posix mode (section				 * 3.2.3, Double Quotes: "The backquote shall				 * retain its special meaning introducing the				 * other form of command substitution (see				 * 3.6.3). The portion of the quoted string				 * from the initial backquote and the				 * characters up to the next backquote that				 * is not preceded by a backslash (having				 * escape characters removed) defines that				 * command whose output replaces `...` when				 * the word is expanded."				 * Section 3.6.3, Command Substitution:				 * "Within the backquoted style of command				 * substitution, backslash shall retain its				 * literal meaning, except when followed by				 * $ ` \.").				 */				statep->ls_sbquote.indquotes = 0;				if (!Flag(FPOSIX)) {					Lex_state *s = statep;					Lex_state *base = state_info.base;					while (1) {						for (; s != base; s--) {							if (s->ls_state == SDQUOTE) {								statep->ls_sbquote.indquotes = 1;								break;							}						}						if (s != base)							break;						if (!(s = s->ls_info.base))							break;						base = s-- - STATE_BSIZE;					}				}				break;			  default:				*wp++ = CHAR, *wp++ = c;			}			break;		  case SSQUOTE:			if (c == '\'') {				POP_STATE();				*wp++ = CQUOTE;				ignore_backslash_newline--;			} else				*wp++ = QCHAR, *wp++ = c;			break;		  case SDQUOTE:			if (c == '"') {				POP_STATE();				*wp++ = CQUOTE;			} else				goto Subst;			break;		  case SCSPAREN: /* $( .. ) */			/* todo: deal with $(...) quoting properly			 * kludge to partly fake quoting inside $(..): doesn't			 * really work because nested $(..) or ${..} inside			 * double quotes aren't dealt with.			 */			switch (statep->ls_scsparen.csstate) {			  case 0: /* normal */				switch (c) {				  case '(':					statep->ls_scsparen.nparen++;					break;				  case ')':					statep->ls_scsparen.nparen--;					break;				  case '\\':					statep->ls_scsparen.csstate = 1;					break;				  case '"':					statep->ls_scsparen.csstate = 2;					break;				  case '\'':					statep->ls_scsparen.csstate = 4;					ignore_backslash_newline++;					break;				}				break;			  case 1: /* backslash in normal mode */			  case 3: /* backslash in double quotes */				--statep->ls_scsparen.csstate;				break;			  case 2: /* double quotes */				if (c == '"')					statep->ls_scsparen.csstate = 0;				else if (c == '\\')					statep->ls_scsparen.csstate = 3;				break;			  case 4: /* single quotes */				if (c == '\'') {					statep->ls_scsparen.csstate = 0;					ignore_backslash_newline--;				}				break;			}			if (statep->ls_scsparen.nparen == 0) {				POP_STATE();				*wp++ = 0; /* end of COMSUB */			} else				*wp++ = c;			break;		  case SASPAREN: /* $(( .. )) */			/* todo: deal with $((...); (...)) properly */			/* XXX should nest using existing state machine			 *     (embed "..", $(...), etc.) */			if (c == '(')				statep->ls_sasparen.nparen++;			else if (c == ')') {				statep->ls_sasparen.nparen--;				if (statep->ls_sasparen.nparen == 1) {					/*(*/					if ((c2 = getsc()) == ')') {						POP_STATE();						*wp++ = 0; /* end of EXPRSUB */						break;					} else {						char *s;						ungetsc(c2);						/* mismatched parenthesis -						 * assume we were really						 * parsing a $(..) expression						 */						s = Xrestpos(ws, wp,						     statep->ls_sasparen.start);						memmove(s + 1, s, wp - s);						*s++ = COMSUB;						*s = '('; /*)*/						wp++;						statep->ls_scsparen.nparen = 1;						statep->ls_scsparen.csstate = 0;						state = statep->ls_state							= SCSPAREN;											}				}			}			*wp++ = c;			break;		  case SBRACE:			/*{*/			if (c == '}') {				POP_STATE();				*wp++ = CSUBST;				*wp++ = /*{*/ '}';			} else				goto Sbase1;			break;		  case STBRACE:			/* Same as SBRACE, except (,|,) treated specially */			/*{*/			if (c == '}') {				POP_STATE();				*wp++ = CSUBST;				*wp++ = /*{*/ '}';			} else if (c == '|') {				*wp++ = SPAT;			} else if (c == '(') {				*wp++ = OPAT;				*wp++ = ' ';	/* simile for @ */				PUSH_STATE(SPATTERN);			} else				goto Sbase1;			break;		  case SBQUOTE:			if (c == '`') {				*wp++ = 0;				POP_STATE();			} else if (c == '\\') {				switch (c = getsc()) {				  case '\\':				  case '$': case '`':					*wp++ = c;					break;				  case '"':					if (statep->ls_sbquote.indquotes) {						*wp++ = c;						break;					}					/* fall through.. */				  default:					if (c) { /* trailing \ is lost */						*wp++ = '\\';						*wp++ = c;					}					break;				}			} else				*wp++ = c;			break;		  case SWORD:	/* ONEWORD */			goto Subst;#ifdef KSH		  case SLETPAREN:	/* LETEXPR: (( ... )) */			/*(*/			if (c == ')') {				if (statep->ls_sletparen.nparen > 0)				    --statep->ls_sletparen.nparen;				/*(*/				else if ((c2 = getsc()) == ')') {					c = 0;					*wp++ = CQUOTE;					goto Done;				} else					ungetsc(c2);			} else if (c == '(')				/* parenthesis inside quotes and backslashes				 * are lost, but at&t ksh doesn't count them				 * either				 */				++statep->ls_sletparen.nparen;			goto Sbase2;#endif /* KSH */		  case SHEREDELIM:	/* <<,<<- delimiter */			/* XXX chuck this state (and the next) - use			 * the existing states ($ and \`..` should be			 * stripped of their specialness after the			 * fact).			 */			/* here delimiters need a special case since			 * $ and `..` are not to be treated specially			 */			if (c == '\\') {				c = getsc();				if (c) { /* trailing \ is lost */					*wp++ = QCHAR;					*wp++ = c;				}			} else if (c == '\'') {				PUSH_STATE(SSQUOTE);				*wp++ = OQUOTE;				ignore_backslash_newline++;			} else if (c == '"') {				state = statep->ls_state = SHEREDQUOTE;				*wp++ = OQUOTE;			} else {				*wp++ = CHAR;				*wp++ = c;			}			break;		  case SHEREDQUOTE:	/* " in <<,<<- delimiter */			if (c == '"') {				*wp++ = CQUOTE;				state = statep->ls_state = SHEREDELIM;			} else {				if (c == '\\') {					switch (c = getsc()) {					  case '\\': case '"':					  case '$': case '`':						break;					  default:						if (c) { /* trailing \ lost */							*wp++ = CHAR;							*wp++ = '\\';						}						break;					}				}				*wp++ = CHAR;				*wp++ = c;			}			break;		  case SPATTERN:	/* in *(...|...) pattern (*+?@!) */			if ( /*(*/ c == ')') {				*wp++ = CPAT;				POP_STATE();			} else if (c == '|') {				*wp++ = SPAT;			} else if (c == '(') {				*wp++ = OPAT;				*wp++ = ' ';	/* simile for @ */				PUSH_STATE(SPATTERN);			} else				goto Sbase1;			break;		}	}Done:	Xcheck(ws, wp);	if (statep != &states[1])		/* XXX figure out what is missing */		yyerror("no closing quote\n");	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */	if (state == SHEREDELIM)		state = SBASE;	dp = Xstring(ws, wp);	if ((c == '<' || c == '>') && state == SBASE	    && ((c2 = Xlength(ws, wp)) == 0	        || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))	{		struct ioword *iop =				(struct ioword *) alloc(sizeof(*iop), ATEMP);		if (c2 == 2)			iop->unit = dp[1] - '0';		else			iop->unit = c == '>'; /* 0 for <, 1 for > */		c2 = getsc();		/* <<, >>, <> are ok, >< is not */		if (c == c2 || (c == '<' && c2 == '>')) {			iop->flag = c == c2 ?				  (c == '>' ? IOCAT : IOHERE) : IORDWR;			if (iop->flag == IOHERE)				if ((c2 = getsc()) == '-')					iop->flag |= IOSKIP;				else					ungetsc(c2);		} else if (c2 == '&')			iop->flag = IODUP | (c == '<' ? IORDUP : 0);		else {			iop->flag = c == '>' ? IOWRITE : IOREAD;			if (c == '>' && c2 == '|')				iop->flag |= IOCLOB;			else				ungetsc(c2);		}		iop->name = (char *) 0;		iop->delim = (char *) 0;		iop->heredoc = (char *) 0;		Xfree(ws, wp);	/* free word */		yylval.iop = iop;		return REDIR;	}	if (wp == dp && state == SBASE) {		Xfree(ws, wp);	/* free word */		/* no word, process LEX1 character */		switch (c) {		  default:			return c;		  case '|':		  case '&':		  case ';':			if ((c2 = getsc()) == c)				c = (c == ';') ? BREAK :				    (c == '|') ? LOGOR :				    (c == '&') ? LOGAND :				    YYERRCODE;#ifdef KSH			else if (c == '|' && c2 == '&')				c = COPROC;#endif /* KSH */			else				ungetsc(c2);			return c;		  case '\n':			gethere();			if (cf & CONTIN)				goto Again;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -