📄 regcomp.cpp
字号:
break;
case BACKSL|')': /* should not get here -- must be user */
case BACKSL|'}':
SETERROR(REG_EPAREN);
break;
case BACKSL|'1':
case BACKSL|'2':
case BACKSL|'3':
case BACKSL|'4':
case BACKSL|'5':
case BACKSL|'6':
case BACKSL|'7':
case BACKSL|'8':
case BACKSL|'9':
i = (c&~BACKSL) - '0';
assert(i < NPAREN);
if (p->pend[i] != 0) {
assert(i <= p->g->nsub);
EMIT(OBACK_, i);
assert(p->pbegin[i] != 0);
assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
assert(OP(p->strip[p->pend[i]]) == ORPAREN);
(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
EMIT(O_BACK, i);
} else
SETERROR(REG_ESUBREG);
p->g->backrefs = 1;
break;
case '*':
REQUIRE(starordinary, REG_BADRPT);
/* FALLTHROUGH */
default:
ordinary(p, (char)c); /* takes off BACKSL, if any */
break;
}
if (EAT('*')) { /* implemented as +? */
/* this case does not require the (y|) trick, noKLUDGE */
INSERT(OPLUS_, pos);
ASTERN(O_PLUS, pos);
INSERT(OQUEST_, pos);
ASTERN(O_QUEST, pos);
} else if (EATTWO('\\', '{')) {
count = p_count(p);
if (EAT(',')) {
if (MORE() && isdigit(PEEK())) {
count2 = p_count(p);
REQUIRE(count <= count2, REG_BADBR);
} else /* single number with comma */
count2 = INFINITY;
} else /* just a single number */
count2 = count;
repeat(p, pos, count, count2);
if (!EATTWO('\\', '}')) { /* error heuristics */
while (MORE() && !SEETWO('\\', '}'))
NEXT();
REQUIRE(MORE(), REG_EBRACE);
SETERROR(REG_BADBR);
}
} else if (c == (unsigned char)'$') /* $ (but not \$) ends it */
return(1);
return(0);
}
/*
- p_count - parse a repetition count
== static int p_count(register struct parse *p);
*/
static int p_count(register struct parse *p)
{
register int count = 0;
register int ndigits = 0;
while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
count = count*10 + (GETNEXT() - '0');
ndigits++;
}
REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
return(count);
}
/*
- p_bracket - parse a bracketed character list
== static void p_bracket(register struct parse *p);
*
* Note a significant property of this code: if the allocset() did SETERROR,
* no set operations are done.
*/
static void p_bracket(register struct parse *p)
{
register cset *cs = allocset(p);
register int invert = 0;
/* Dept of Truly Sickening Special-Case Kludges */
if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
EMIT(OBOW, 0);
NEXTn(6);
return;
}
if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
EMIT(OEOW, 0);
NEXTn(6);
return;
}
if (EAT('^'))
invert++; /* make note to invert set at end */
if (EAT(']'))
CHadd(cs, ']');
else if (EAT('-'))
CHadd(cs, '-');
while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
p_b_term(p, cs);
if (EAT('-'))
CHadd(cs, '-');
MUSTEAT(']', REG_EBRACK);
if (p->error != 0) /* don't mess things up further */
return;
if (p->g->cflags®_ICASE) {
register int i;
register int ci;
for (i = p->g->csetsize - 1; i >= 0; i--)
if (CHIN(cs, i) && isalpha(i)) {
ci = othercase(i);
if (ci != i)
CHadd(cs, ci);
}
if (cs->multis != NULL)
mccase(p, cs);
}
if (invert) {
register int i;
for (i = p->g->csetsize - 1; i >= 0; i--)
if (CHIN(cs, i))
CHsub(cs, i);
else
CHadd(cs, i);
if (p->g->cflags®_NEWLINE)
CHsub(cs, '\n');
if (cs->multis != NULL)
mcinvert(p, cs);
}
assert(cs->multis == NULL); /* xxx */
if (nch(p, cs) == 1) { /* optimize singleton sets */
ordinary(p, firstch(p, cs));
freeset(p, cs);
} else
EMIT(OANYOF, freezeset(p, cs));
}
/*
- p_b_term - parse one term of a bracketed character list
== static void p_b_term(register struct parse *p, register cset *cs);
*/
static void p_b_term(register struct parse *p, register cset *cs)
{
register char c;
register char start, finish;
register int i;
/* classify what we've got */
switch ((MORE()) ? PEEK() : '\0') {
case '[':
c = (MORE2()) ? PEEK2() : '\0';
break;
case '-':
SETERROR(REG_ERANGE);
return; /* NOTE RETURN */
break;
default:
c = '\0';
break;
}
switch (c) {
case ':': /* character class */
NEXT2();
REQUIRE(MORE(), REG_EBRACK);
c = PEEK();
REQUIRE(c != '-' && c != ']', REG_ECTYPE);
p_b_cclass(p, cs);
REQUIRE(MORE(), REG_EBRACK);
REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
break;
case '=': /* equivalence class */
NEXT2();
REQUIRE(MORE(), REG_EBRACK);
c = PEEK();
REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
p_b_eclass(p, cs);
REQUIRE(MORE(), REG_EBRACK);
REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
break;
default: /* symbol, ordinary character, or range */
/* xxx revision needed for multichar stuff */
start = p_b_symbol(p);
if (SEE('-') && MORE2() && PEEK2() != ']') {
/* range */
NEXT();
if (EAT('-'))
finish = '-';
else
finish = p_b_symbol(p);
} else
finish = start;
/* xxx what about signed chars here... */
REQUIRE(start <= finish, REG_ERANGE);
for (i = start; i <= finish; i++)
CHadd(cs, i);
break;
}
}
/*
- p_b_cclass - parse a character-class name and deal with it
== static void p_b_cclass(register struct parse *p, register cset *cs);
*/
static void p_b_cclass(register struct parse *p, register cset *cs)
{
register char *sp = p->next;
register struct cclass *cp;
register size_t len;
register char *u;
register char c;
while (MORE() && isalpha(PEEK()))
NEXT();
len = p->next - sp;
for (cp = cclasses; cp->name != NULL; cp++)
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
break;
if (cp->name == NULL) {
/* oops, didn't find it */
SETERROR(REG_ECTYPE);
return;
}
u = cp->chars;
while ((c = *u++) != '\0')
CHadd(cs, c);
for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
MCadd(p, cs, u);
}
/*
- p_b_eclass - parse an equivalence-class name and deal with it
== static void p_b_eclass(register struct parse *p, register cset *cs);
*
* This implementation is incomplete. xxx
*/
static void p_b_eclass(register struct parse *p, register cset *cs)
{
register char c;
c = p_b_coll_elem(p, '=');
CHadd(cs, c);
}
/*
- p_b_symbol - parse a character or [..]ed multicharacter collating symbol
== static char p_b_symbol(register struct parse *p);
*/
static char p_b_symbol(register struct parse *p)
{
register char value;
REQUIRE(MORE(), REG_EBRACK);
if (!EATTWO('[', '.'))
return(GETNEXT());
/* collating symbol */
value = p_b_coll_elem(p, '.');
REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
return(value);
}
/*
- p_b_coll_elem - parse a collating-element name and look it up
== static char p_b_coll_elem(register struct parse *p, int endc);
*/
static char p_b_coll_elem(register struct parse *p, int endc)
{
register char *sp = p->next;
register struct cname *cp;
register int len;
while (MORE() && !SEETWO(endc, ']'))
NEXT();
if (!MORE()) {
SETERROR(REG_EBRACK);
return(0);
}
len = p->next - sp;
for (cp = cnames; cp->name != NULL; cp++)
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
return(cp->code); /* known name */
if (len == 1)
return(*sp); /* single character */
SETERROR(REG_ECOLLATE); /* neither */
return(0);
}
/*
- othercase - return the case counterpart of an alphabetic
== static char othercase(int ch);
*/
static char othercase(int ch)
{
assert(isalpha(ch));
if (isupper(ch))
return(tolower(ch));
else if (islower(ch))
return(toupper(ch));
else /* peculiar, but could happen */
return(ch);
}
/*
- bothcases - emit a dualcase version of a two-case character
== static void bothcases(register struct parse *p, int ch);
*
* Boy, is this implementation ever a kludge...
*/
static void bothcases(register struct parse *p, int ch)
{
register char *oldnext = p->next;
register char *oldend = p->end;
char bracket[3];
assert(othercase(ch) != ch); /* p_bracket() would recurse */
p->next = bracket;
p->end = bracket+2;
bracket[0] = ch;
bracket[1] = ']';
bracket[2] = '\0';
p_bracket(p);
assert(p->next == bracket+2);
p->next = oldnext;
p->end = oldend;
}
/*
- ordinary - emit an ordinary character
== static void ordinary(register struct parse *p, register int ch);
*/
static void ordinary(register struct parse *p, register int ch)
{
register cat_t *cap = p->g->categories;
if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch)
bothcases(p, ch);
else {
EMIT(OCHAR, (unsigned char)ch);
if (cap[ch] == 0)
cap[ch] = p->g->ncategories++;
}
}
/*
- nonnewline - emit REG_NEWLINE version of OANY
== static void nonnewline(register struct parse *p);
*
* Boy, is this implementation ever a kludge...
*/
static void nonnewline(register struct parse *p)
{
register char *oldnext = p->next;
register char *oldend = p->end;
char bracket[4];
p->next = bracket;
p->end = bracket+3;
bracket[0] = '^';
bracket[1] = '\n';
bracket[2] = ']';
bracket[3] = '\0';
p_bracket(p);
assert(p->next == bracket+3);
p->next = oldnext;
p->end = oldend;
}
/*
- repeat - generate code for a bounded repetition, recursively if needed
== static void repeat(register struct parse *p, sopno start, int from, int to);
*/
static void repeat(register struct parse *p, sopno start, int from, int to)
{
register sopno finish = HERE();
# define N 2
# define INF 3
# define REP(f, t) ((f)*8 + (t))
# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
register sopno copy;
if (p->error != 0) /* head off possible runaway recursion */
return;
assert(from <= to);
switch (REP(MAP(from), MAP(to))) {
case REP(0, 0): /* must be user doing this */
DROP(finish-start); /* drop the operand */
break;
case REP(0, 1): /* as x{1,1}? */
case REP(0, N): /* as x{1,n}? */
case REP(0, INF): /* as x{1,}? */
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
INSERT(OCH_, start); /* offset is wrong... */
repeat(p, start+1, 1, to);
ASTERN(OOR1, start);
AHEAD(start); /* ... fix it */
EMIT(OOR2, 0);
AHEAD(THERE());
ASTERN(O_CH, THERETHERE());
break;
case REP(1, 1): /* trivial case */
/* done */
break;
case REP(1, N): /* as x?x{1,n-1} */
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
INSERT(OCH_, start);
ASTERN(OOR1, start);
AHEAD(start);
EMIT(OOR2, 0); /* offset very wrong... */
AHEAD(THERE()); /* ...so fix it */
ASTERN(O_CH, THERETHERE());
copy = dupl(p, start+1, finish+1);
assert(copy == finish+4);
repeat(p, copy, 1, to-1);
break;
case REP(1, INF): /* as x+ */
INSERT(OPLUS_, start);
ASTERN(O_PLUS, start);
break;
case REP(N, N): /* as xx{m-1,n-1} */
copy = dupl(p, start, finish);
repeat(p, copy, from-1, to-1);
break;
case REP(N, INF): /* as xx{n-1,INF} */
copy = dupl(p, start, finish);
repeat(p, copy, from-1, to);
break;
default: /* "can't happen" */
SETERROR(REG_ASSERT); /* just in case */
break;
}
}
/*
- seterr - set an error condition
== static int seterr(register struct parse *p, int e);
*/
static int seterr(register struct parse *p, int e)
{
if (p->error == 0) /* keep earliest error condition */
p->error = e;
p->next = nuls; /* try to bring things to a halt */
p->end = nuls;
return(0); /* make the return value well-defined */
}
/*
- allocset - allocate a set of characters for []
== static cset *allocset(register struct parse *p);
*/
static cset * allocset(register struct parse *p)
{
register int no = p->g->ncsets++;
register size_t nc;
register size_t nbytes;
register cset *cs;
register size_t css = (size_t)p->g->csetsize;
register int i;
if (no >= p->ncsalloc) { /* need another column of space */
p->ncsalloc += CHAR_BIT;
nc = p->ncsalloc;
assert(nc % CHAR_BIT == 0);
nbytes = nc / CHAR_BIT * css;
if (p->g->sets == NULL)
p->g->sets = (cset *)malloc(nc * sizeof(cset));
else
p->g->sets = (cset *)realloc((char *)p->g->sets,
nc * sizeof(cset));
if (p->g->setbits == NULL)
p->g->setbits = (uch *)malloc(nbytes);
else {
p->g->setbits = (uch *)realloc((char *)p->g->setbits,
nbytes);
/* xxx this isn't right if setbits is now NULL */
for (i = 0; i < no; i++)
p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
}
if (p->g->sets != NULL && p->g->setbits != NULL)
(void) memset((char *)p->g->setbits + (nbytes - css),
0, css);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -