📄 regc_lex.c
字号:
NOTE(REG_UBOUNDS); INTOCON(L_EBND); RET('{'); } assert(NOTREACHED); break; case CHR('('): /* parenthesis, or advanced extension */ if ((v->cflags & REG_ADVF) && NEXT1('?')) { NOTE(REG_UNONPOSIX); v->now++; switch (*v->now++) { case CHR(':'): /* non-capturing paren */ RETV('(', 0); break; case CHR('#'): /* comment */ while (!ATEOS() && *v->now != CHR(')')) v->now++; if (!ATEOS()) v->now++; assert(v->nexttype == v->lasttype); return next(v); break; case CHR('='): /* positive lookahead */ NOTE(REG_ULOOKAHEAD); RETV(LACON, 1); break; case CHR('!'): /* negative lookahead */ NOTE(REG_ULOOKAHEAD); RETV(LACON, 0); break; default: FAILW(REG_BADRPT); break; } assert(NOTREACHED); } if (v->cflags & REG_NOSUB) RETV('(', 0); /* all parens non-capturing */ else RETV('(', 1); break; case CHR(')'): if (LASTTYPE('(')) NOTE(REG_UUNSPEC); RETV(')', c); break; case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ if (HAVE(6) && *(v->now + 0) == CHR('[') && *(v->now + 1) == CHR(':') && (*(v->now + 2) == CHR('<') || *(v->now + 2) == CHR('>')) && *(v->now + 3) == CHR(':') && *(v->now + 4) == CHR(']') && *(v->now + 5) == CHR(']')) { c = *(v->now + 2); v->now += 6; NOTE(REG_UNONPOSIX); RET((c == CHR('<')) ? '<' : '>'); } INTOCON(L_BRACK); if (NEXT1('^')) { v->now++; RETV('[', 0); } RETV('[', 1); break; case CHR('.'): RET('.'); break; case CHR('^'): RET('^'); break; case CHR('$'): RET('$'); break; case CHR('\\'): /* mostly punt backslashes to code below */ if (ATEOS()) FAILW(REG_EESCAPE); break; default: /* ordinary character */ RETV(PLAIN, c); break; } /* ERE/ARE backslash handling; backslash already eaten */ assert(!ATEOS()); if (!(v->cflags & REG_ADVF)) { /* only AREs have non-trivial escapes */ if (iscalnum(*v->now)) { NOTE(REG_UBSALNUM); NOTE(REG_UUNSPEC); } RETV(PLAIN, *v->now++); } (DISCARD) lexescape(v); if (ISERR()) FAILW(REG_EESCAPE); if (v->nexttype == CCLASS) { /* fudge at lexical level */ switch (v->nextvalue) { case 'd': lexnest(v, backd, ENDOF(backd)); break; case 'D': lexnest(v, backD, ENDOF(backD)); break; case 's': lexnest(v, backs, ENDOF(backs)); break; case 'S': lexnest(v, backS, ENDOF(backS)); break; case 'w': lexnest(v, backw, ENDOF(backw)); break; case 'W': lexnest(v, backW, ENDOF(backW)); break; default: assert(NOTREACHED); FAILW(REG_ASSERT); break; } /* lexnest done, back up and try again */ v->nexttype = v->lasttype; return next(v); } /* otherwise, lexescape has already done the work */ return !ISERR();}/* * lexescape - parse an ARE backslash escape (backslash already eaten) * Note slightly nonstandard use of the CCLASS type code. */static int /* not actually used, but convenient for RETV */lexescape(struct vars * v){ chr c; static chr alert[] = { CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t') }; static chr esc[] = { CHR('E'), CHR('S'), CHR('C') }; chr *save; assert(v->cflags & REG_ADVF); assert(!ATEOS()); c = *v->now++; if (!iscalnum(c)) RETV(PLAIN, c); NOTE(REG_UNONPOSIX); switch (c) { case CHR('a'): RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); break; case CHR('A'): RETV(SBEGIN, 0); break; case CHR('b'): RETV(PLAIN, CHR('\b')); break; case CHR('B'): RETV(PLAIN, CHR('\\')); break; case CHR('c'): NOTE(REG_UUNPORT); if (ATEOS()) FAILW(REG_EESCAPE); RETV(PLAIN, (chr) (*v->now++ & 037)); break; case CHR('d'): NOTE(REG_ULOCALE); RETV(CCLASS, 'd'); break; case CHR('D'): NOTE(REG_ULOCALE); RETV(CCLASS, 'D'); break; case CHR('e'): NOTE(REG_UUNPORT); RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); break; case CHR('f'): RETV(PLAIN, CHR('\f')); break; case CHR('m'): RET('<'); break; case CHR('M'): RET('>'); break; case CHR('n'): RETV(PLAIN, CHR('\n')); break; case CHR('r'): RETV(PLAIN, CHR('\r')); break; case CHR('s'): NOTE(REG_ULOCALE); RETV(CCLASS, 's'); break; case CHR('S'): NOTE(REG_ULOCALE); RETV(CCLASS, 'S'); break; case CHR('t'): RETV(PLAIN, CHR('\t')); break; case CHR('u'): c = lexdigits(v, 16, 4, 4); if (ISERR()) FAILW(REG_EESCAPE); RETV(PLAIN, c); break; case CHR('U'): c = lexdigits(v, 16, 8, 8); if (ISERR()) FAILW(REG_EESCAPE); RETV(PLAIN, c); break; case CHR('v'): RETV(PLAIN, CHR('\v')); break; case CHR('w'): NOTE(REG_ULOCALE); RETV(CCLASS, 'w'); break; case CHR('W'): NOTE(REG_ULOCALE); RETV(CCLASS, 'W'); break; case CHR('x'): NOTE(REG_UUNPORT); c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ if (ISERR()) FAILW(REG_EESCAPE); RETV(PLAIN, c); break; case CHR('y'): NOTE(REG_ULOCALE); RETV(WBDRY, 0); break; case CHR('Y'): NOTE(REG_ULOCALE); RETV(NWBDRY, 0); break; case CHR('Z'): RETV(SEND, 0); break; case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): case CHR('9'): save = v->now; v->now--; /* put first digit back */ c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */ if (ISERR()) FAILW(REG_EESCAPE); /* ugly heuristic (first test is "exactly 1 digit?") */ if (v->now - save == 0 || ((int) c > 0 && (int) c <= v->nsubexp)) { NOTE(REG_UBACKREF); RETV(BACKREF, (chr) c); } /* oops, doesn't look like it's a backref after all... */ v->now = save; /* and fall through into octal number */ case CHR('0'): NOTE(REG_UUNPORT); v->now--; /* put first digit back */ c = lexdigits(v, 8, 1, 3); if (ISERR()) FAILW(REG_EESCAPE); RETV(PLAIN, c); break; default: assert(iscalpha(c)); FAILW(REG_EESCAPE); /* unknown alphabetic escape */ break; } assert(NOTREACHED);}/* * lexdigits - slurp up digits and return chr value */static chr /* chr value; errors signalled via ERR */lexdigits(struct vars * v, int base, int minlen, int maxlen){ uchr n; /* unsigned to avoid overflow misbehavior */ int len; chr c; int d; const uchr ub = (uchr) base; n = 0; for (len = 0; len < maxlen && !ATEOS(); len++) { c = *v->now++; switch (c) { case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): case CHR('9'): d = DIGITVAL(c); break; case CHR('a'): case CHR('A'): d = 10; break; case CHR('b'): case CHR('B'): d = 11; break; case CHR('c'): case CHR('C'): d = 12; break; case CHR('d'): case CHR('D'): d = 13; break; case CHR('e'): case CHR('E'): d = 14; break; case CHR('f'): case CHR('F'): d = 15; break; default: v->now--; /* oops, not a digit at all */ d = -1; break; } if (d >= base) { /* not a plausible digit */ v->now--; d = -1; } if (d < 0) break; /* NOTE BREAK OUT */ n = n * ub + (uchr) d; } if (len < minlen) ERR(REG_EESCAPE); return (chr) n;}/* * brenext - get next BRE token * * This is much like EREs except for all the stupid backslashes and the * context-dependency of some things. */static int /* 1 normal, 0 failure */brenext(struct vars * v, chr pc){ chr c = (chr) pc; switch (c) { case CHR('*'): if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) RETV(PLAIN, c); RET('*'); break; case CHR('['): if (HAVE(6) && *(v->now + 0) == CHR('[') && *(v->now + 1) == CHR(':') && (*(v->now + 2) == CHR('<') || *(v->now + 2) == CHR('>')) && *(v->now + 3) == CHR(':') && *(v->now + 4) == CHR(']') && *(v->now + 5) == CHR(']')) { c = *(v->now + 2); v->now += 6; NOTE(REG_UNONPOSIX); RET((c == CHR('<')) ? '<' : '>'); } INTOCON(L_BRACK); if (NEXT1('^')) { v->now++; RETV('[', 0); } RETV('[', 1); break; case CHR('.'): RET('.'); break; case CHR('^'): if (LASTTYPE(EMPTY)) RET('^'); if (LASTTYPE('(')) { NOTE(REG_UUNSPEC); RET('^'); } RETV(PLAIN, c); break; case CHR('$'): if (v->cflags & REG_EXPANDED) skip(v); if (ATEOS()) RET('$'); if (NEXT2('\\', ')')) { NOTE(REG_UUNSPEC); RET('$'); } RETV(PLAIN, c); break; case CHR('\\'): break; /* see below */ default: RETV(PLAIN, c); break; } assert(c == CHR('\\')); if (ATEOS()) FAILW(REG_EESCAPE); c = *v->now++; switch (c) { case CHR('{'): INTOCON(L_BBND); NOTE(REG_UBOUNDS); RET('{'); break; case CHR('('): RETV('(', 1); break; case CHR(')'): RETV(')', c); break; case CHR('<'): NOTE(REG_UNONPOSIX); RET('<'); break; case CHR('>'): NOTE(REG_UNONPOSIX); RET('>'); break; case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): case CHR('9'): NOTE(REG_UBACKREF); RETV(BACKREF, (chr) DIGITVAL(c)); break; default: if (iscalnum(c)) { NOTE(REG_UBSALNUM); NOTE(REG_UUNSPEC); } RETV(PLAIN, c); break; } assert(NOTREACHED); return 0;}/* * skip - skip white space and comments in expanded form */static voidskip(struct vars * v){ chr *start = v->now; assert(v->cflags & REG_EXPANDED); for (;;) { while (!ATEOS() && iscspace(*v->now)) v->now++; if (ATEOS() || *v->now != CHR('#')) break; /* NOTE BREAK OUT */ assert(NEXT1('#')); while (!ATEOS() && *v->now != CHR('\n')) v->now++; /* leave the newline to be picked up by the iscspace loop */ } if (v->now != start) NOTE(REG_UNONPOSIX);}/* * newline - return the chr for a newline * * This helps confine use of CHR to this source file. */static chrnewline(void){ return CHR('\n');}/* * chrnamed - return the chr known by a given (chr string) name * * The code is a bit clumsy, but this routine gets only such specialized * use that it hardly matters. */static chrchrnamed(struct vars * v, chr *startp, /* start of name */ chr *endp, /* just past end of name */ chr lastresort) /* what to return if name lookup fails */{ celt c; int errsave; int e; struct cvec *cv; errsave = v->err; v->err = 0; c = element(v, startp, endp); e = v->err; v->err = errsave; if (e != 0) return (chr) lastresort; cv = range(v, c, c, 0); if (cv->nchrs == 0) return (chr) lastresort; return cv->chrs[0];}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -