regc_lex.c
来自「A*算法 A*算法 A*算法 A*算法A*算法A*算法」· C语言 代码 · 共 1,062 行 · 第 1/2 页
C
1,062 行
NOTE(REG_UUNSPEC);
RETV(PLAIN, c);
} else {
NOTE(REG_UBOUNDS);
INTOCON(L_EBND);
RET('{');
}
assert(NOTREACHED);
break;
case CHR('('): /* parenthesis, or advanced extension */
if ((v->cflags®_ADVF) && NEXT1('?')) {
NOTE(REG_UNONPOSIX);
v->now++;
switch (*v->now++) {
case CHR(':'): /* non-capturing paren */
RETV('(', 0);
break;
case CHR('#'): /* comment */
while (!ATEOS() && *v->now != CHR(')'))
v->now++;
if (!ATEOS())
v->now++;
assert(v->nexttype == v->lasttype);
return next(v);
break;
case CHR('='): /* positive lookahead */
NOTE(REG_ULOOKAHEAD);
RETV(LACON, 1);
break;
case CHR('!'): /* negative lookahead */
NOTE(REG_ULOOKAHEAD);
RETV(LACON, 0);
break;
default:
FAILW(REG_BADRPT);
break;
}
assert(NOTREACHED);
}
if (v->cflags®_NOSUB)
RETV('(', 0); /* all parens non-capturing */
else
RETV('(', 1);
break;
case CHR(')'):
if (LASTTYPE('(')) {
NOTE(REG_UUNSPEC);
}
RETV(')', c);
break;
case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
if (HAVE(6) && *(v->now+0) == CHR('[') &&
*(v->now+1) == CHR(':') &&
(*(v->now+2) == CHR('<') ||
*(v->now+2) == CHR('>')) &&
*(v->now+3) == CHR(':') &&
*(v->now+4) == CHR(']') &&
*(v->now+5) == CHR(']')) {
c = *(v->now+2);
v->now += 6;
NOTE(REG_UNONPOSIX);
RET((c == CHR('<')) ? '<' : '>');
}
INTOCON(L_BRACK);
if (NEXT1('^')) {
v->now++;
RETV('[', 0);
}
RETV('[', 1);
break;
case CHR('.'):
RET('.');
break;
case CHR('^'):
RET('^');
break;
case CHR('$'):
RET('$');
break;
case CHR('\\'): /* mostly punt backslashes to code below */
if (ATEOS())
FAILW(REG_EESCAPE);
break;
default: /* ordinary character */
RETV(PLAIN, c);
break;
}
/* ERE/ARE backslash handling; backslash already eaten */
assert(!ATEOS());
if (!(v->cflags®_ADVF)) { /* only AREs have non-trivial escapes */
if (iscalnum(*v->now)) {
NOTE(REG_UBSALNUM);
NOTE(REG_UUNSPEC);
}
RETV(PLAIN, *v->now++);
}
(DISCARD)lexescape(v);
if (ISERR())
FAILW(REG_EESCAPE);
if (v->nexttype == CCLASS) { /* fudge at lexical level */
switch (v->nextvalue) {
case 'd': lexnest(v, backd, ENDOF(backd)); break;
case 'D': lexnest(v, backD, ENDOF(backD)); break;
case 's': lexnest(v, backs, ENDOF(backs)); break;
case 'S': lexnest(v, backS, ENDOF(backS)); break;
case 'w': lexnest(v, backw, ENDOF(backw)); break;
case 'W': lexnest(v, backW, ENDOF(backW)); break;
default:
assert(NOTREACHED);
FAILW(REG_ASSERT);
break;
}
/* lexnest done, back up and try again */
v->nexttype = v->lasttype;
return next(v);
}
/* otherwise, lexescape has already done the work */
return !ISERR();
}
/*
- lexescape - parse an ARE backslash escape (backslash already eaten)
* Note slightly nonstandard use of the CCLASS type code.
^ static int lexescape(struct vars *);
*/
static int /* not actually used, but convenient for RETV */
lexescape(v)
struct vars *v;
{
chr c;
static chr alert[] = {
CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
};
static chr esc[] = {
CHR('E'), CHR('S'), CHR('C')
};
chr *save;
assert(v->cflags®_ADVF);
assert(!ATEOS());
c = *v->now++;
if (!iscalnum(c))
RETV(PLAIN, c);
NOTE(REG_UNONPOSIX);
switch (c) {
case CHR('a'):
RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
break;
case CHR('A'):
RETV(SBEGIN, 0);
break;
case CHR('b'):
RETV(PLAIN, CHR('\b'));
break;
case CHR('B'):
RETV(PLAIN, CHR('\\'));
break;
case CHR('c'):
NOTE(REG_UUNPORT);
if (ATEOS())
FAILW(REG_EESCAPE);
RETV(PLAIN, (chr)(*v->now++ & 037));
break;
case CHR('d'):
NOTE(REG_ULOCALE);
RETV(CCLASS, 'd');
break;
case CHR('D'):
NOTE(REG_ULOCALE);
RETV(CCLASS, 'D');
break;
case CHR('e'):
NOTE(REG_UUNPORT);
RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
break;
case CHR('f'):
RETV(PLAIN, CHR('\f'));
break;
case CHR('m'):
RET('<');
break;
case CHR('M'):
RET('>');
break;
case CHR('n'):
RETV(PLAIN, CHR('\n'));
break;
case CHR('r'):
RETV(PLAIN, CHR('\r'));
break;
case CHR('s'):
NOTE(REG_ULOCALE);
RETV(CCLASS, 's');
break;
case CHR('S'):
NOTE(REG_ULOCALE);
RETV(CCLASS, 'S');
break;
case CHR('t'):
RETV(PLAIN, CHR('\t'));
break;
case CHR('u'):
c = lexdigits(v, 16, 4, 4);
if (ISERR())
FAILW(REG_EESCAPE);
RETV(PLAIN, c);
break;
case CHR('U'):
c = lexdigits(v, 16, 8, 8);
if (ISERR())
FAILW(REG_EESCAPE);
RETV(PLAIN, c);
break;
case CHR('v'):
RETV(PLAIN, CHR('\v'));
break;
case CHR('w'):
NOTE(REG_ULOCALE);
RETV(CCLASS, 'w');
break;
case CHR('W'):
NOTE(REG_ULOCALE);
RETV(CCLASS, 'W');
break;
case CHR('x'):
NOTE(REG_UUNPORT);
c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
if (ISERR())
FAILW(REG_EESCAPE);
RETV(PLAIN, c);
break;
case CHR('y'):
NOTE(REG_ULOCALE);
RETV(WBDRY, 0);
break;
case CHR('Y'):
NOTE(REG_ULOCALE);
RETV(NWBDRY, 0);
break;
case CHR('Z'):
RETV(SEND, 0);
break;
case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
case CHR('9'):
save = v->now;
v->now--; /* put first digit back */
c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
if (ISERR())
FAILW(REG_EESCAPE);
/* ugly heuristic (first test is "exactly 1 digit?") */
if (v->now - save == 0 || (int)c <= v->nsubexp) {
NOTE(REG_UBACKREF);
RETV(BACKREF, (chr)c);
}
/* oops, doesn't look like it's a backref after all... */
v->now = save;
/* and fall through into octal number */
case CHR('0'):
NOTE(REG_UUNPORT);
v->now--; /* put first digit back */
c = lexdigits(v, 8, 1, 3);
if (ISERR())
FAILW(REG_EESCAPE);
RETV(PLAIN, c);
break;
default:
assert(iscalpha(c));
FAILW(REG_EESCAPE); /* unknown alphabetic escape */
break;
}
assert(NOTREACHED);
}
/*
- lexdigits - slurp up digits and return chr value
^ static chr lexdigits(struct vars *, int, int, int);
*/
static chr /* chr value; errors signalled via ERR */
lexdigits(v, base, minlen, maxlen)
struct vars *v;
int base;
int minlen;
int maxlen;
{
uchr n; /* unsigned to avoid overflow misbehavior */
int len;
chr c;
int d;
CONST uchr ub = (uchr) base;
n = 0;
for (len = 0; len < maxlen && !ATEOS(); len++) {
c = *v->now++;
switch (c) {
case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
case CHR('8'): case CHR('9'):
d = DIGITVAL(c);
break;
case CHR('a'): case CHR('A'): d = 10; break;
case CHR('b'): case CHR('B'): d = 11; break;
case CHR('c'): case CHR('C'): d = 12; break;
case CHR('d'): case CHR('D'): d = 13; break;
case CHR('e'): case CHR('E'): d = 14; break;
case CHR('f'): case CHR('F'): d = 15; break;
default:
v->now--; /* oops, not a digit at all */
d = -1;
break;
}
if (d >= base) { /* not a plausible digit */
v->now--;
d = -1;
}
if (d < 0)
break; /* NOTE BREAK OUT */
n = n*ub + (uchr)d;
}
if (len < minlen)
ERR(REG_EESCAPE);
return (chr)n;
}
/*
- brenext - get next BRE token
* This is much like EREs except for all the stupid backslashes and the
* context-dependency of some things.
^ static int brenext(struct vars *, pchr);
*/
static int /* 1 normal, 0 failure */
brenext(v, pc)
struct vars *v;
pchr pc;
{
chr c = (chr)pc;
switch (c) {
case CHR('*'):
if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
RETV(PLAIN, c);
RET('*');
break;
case CHR('['):
if (HAVE(6) && *(v->now+0) == CHR('[') &&
*(v->now+1) == CHR(':') &&
(*(v->now+2) == CHR('<') ||
*(v->now+2) == CHR('>')) &&
*(v->now+3) == CHR(':') &&
*(v->now+4) == CHR(']') &&
*(v->now+5) == CHR(']')) {
c = *(v->now+2);
v->now += 6;
NOTE(REG_UNONPOSIX);
RET((c == CHR('<')) ? '<' : '>');
}
INTOCON(L_BRACK);
if (NEXT1('^')) {
v->now++;
RETV('[', 0);
}
RETV('[', 1);
break;
case CHR('.'):
RET('.');
break;
case CHR('^'):
if (LASTTYPE(EMPTY))
RET('^');
if (LASTTYPE('(')) {
NOTE(REG_UUNSPEC);
RET('^');
}
RETV(PLAIN, c);
break;
case CHR('$'):
if (v->cflags®_EXPANDED)
skip(v);
if (ATEOS())
RET('$');
if (NEXT2('\\', ')')) {
NOTE(REG_UUNSPEC);
RET('$');
}
RETV(PLAIN, c);
break;
case CHR('\\'):
break; /* see below */
default:
RETV(PLAIN, c);
break;
}
assert(c == CHR('\\'));
if (ATEOS())
FAILW(REG_EESCAPE);
c = *v->now++;
switch (c) {
case CHR('{'):
INTOCON(L_BBND);
NOTE(REG_UBOUNDS);
RET('{');
break;
case CHR('('):
RETV('(', 1);
break;
case CHR(')'):
RETV(')', c);
break;
case CHR('<'):
NOTE(REG_UNONPOSIX);
RET('<');
break;
case CHR('>'):
NOTE(REG_UNONPOSIX);
RET('>');
break;
case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
case CHR('9'):
NOTE(REG_UBACKREF);
RETV(BACKREF, (chr)DIGITVAL(c));
break;
default:
if (iscalnum(c)) {
NOTE(REG_UBSALNUM);
NOTE(REG_UUNSPEC);
}
RETV(PLAIN, c);
break;
}
assert(NOTREACHED);
}
/*
- skip - skip white space and comments in expanded form
^ static VOID skip(struct vars *);
*/
static VOID
skip(v)
struct vars *v;
{
chr *start = v->now;
assert(v->cflags®_EXPANDED);
for (;;) {
while (!ATEOS() && iscspace(*v->now))
v->now++;
if (ATEOS() || *v->now != CHR('#'))
break; /* NOTE BREAK OUT */
assert(NEXT1('#'));
while (!ATEOS() && *v->now != CHR('\n'))
v->now++;
/* leave the newline to be picked up by the iscspace loop */
}
if (v->now != start)
NOTE(REG_UNONPOSIX);
}
/*
- newline - return the chr for a newline
* This helps confine use of CHR to this source file.
^ static chr newline(NOPARMS);
*/
static chr
newline()
{
return CHR('\n');
}
/*
- ch - return the chr sequence for regc_locale.c's fake collating element ch
* This helps confine use of CHR to this source file. Beware that the caller
* knows how long the sequence is.
^ #ifdef REG_DEBUG
^ static chr *ch(NOPARMS);
^ #endif
*/
#ifdef REG_DEBUG
static chr *
ch()
{
static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
return chstr;
}
#endif
/*
- chrnamed - return the chr known by a given (chr string) name
* The code is a bit clumsy, but this routine gets only such specialized
* use that it hardly matters.
^ static chr chrnamed(struct vars *, chr *, chr *, pchr);
*/
static chr
chrnamed(v, startp, endp, lastresort)
struct vars *v;
chr *startp; /* start of name */
chr *endp; /* just past end of name */
pchr lastresort; /* what to return if name lookup fails */
{
celt c;
int errsave;
int e;
struct cvec *cv;
errsave = v->err;
v->err = 0;
c = element(v, startp, endp);
e = v->err;
v->err = errsave;
if (e != 0)
return (chr)lastresort;
cv = range(v, c, c, 0);
if (cv->nchrs == 0)
return (chr)lastresort;
return cv->chrs[0];
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?