📄 regcomp.c
字号:
count2 = INFINITY;
} else /* just a single number */
count2 = count;
repeat(p, pos, count, count2);
if (!EATTWO('\\', '}')) { /* error heuristics */
while (MORE() && !SEETWO('\\', '}'))
NEXT();
REQUIRE(MORE(), REG_EBRACE);
SETERROR(REG_BADBR);
}
} else if (c == (unsigned char)'$') /* $ (but not \$) ends it */
return(1);
return(0);
}
/*
- p_count - parse a repetition count
== static int p_count(register struct parse *p);
*/
static int /* the value */
p_count(p)
register struct parse *p;
{
register int count = 0;
register int ndigits = 0;
while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
count = count*10 + (GETNEXT() - '0');
ndigits++;
}
REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
return(count);
}
/*
- p_bracket - parse a bracketed character list
== static void p_bracket(register struct parse *p);
*
* Note a significant property of this code: if the allocset() did SETERROR,
* no set operations are done.
*/
static void
p_bracket(p)
register struct parse *p;
{
register cset *cs = allocset(p);
register int invert = 0;
/* Dept of Truly Sickening Special-Case Kludges */
if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
EMIT(OBOW, 0);
NEXTn(6);
return;
}
if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
EMIT(OEOW, 0);
NEXTn(6);
return;
}
if (EAT('^'))
invert++; /* make note to invert set at end */
if (EAT(']'))
CHadd(cs, ']');
else if (EAT('-'))
CHadd(cs, '-');
while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
p_b_term(p, cs);
if (EAT('-'))
CHadd(cs, '-');
MUSTEAT(']', REG_EBRACK);
if (p->error != 0) /* don't mess things up further */
return;
if (p->g->cflags®_ICASE) {
register int i;
register int ci;
for (i = p->g->csetsize - 1; i >= 0; i--)
if (CHIN(cs, i) && isalpha(i)) {
ci = othercase(i);
if (ci != i)
CHadd(cs, ci);
}
if (cs->multis != NULL)
mccase(p, cs);
}
if (invert) {
register int i;
for (i = p->g->csetsize - 1; i >= 0; i--)
if (CHIN(cs, i))
CHsub(cs, i);
else
CHadd(cs, i);
if (p->g->cflags®_NEWLINE)
CHsub(cs, '\n');
if (cs->multis != NULL)
mcinvert(p, cs);
}
assert(cs->multis == NULL); /* xxx */
if (nch(p, cs) == 1) { /* optimize singleton sets */
ordinary(p, firstch(p, cs));
freeset(p, cs);
} else
EMIT(OANYOF, freezeset(p, cs));
}
/*
- p_b_term - parse one term of a bracketed character list
== static void p_b_term(register struct parse *p, register cset *cs);
*/
static void
p_b_term(p, cs)
register struct parse *p;
register cset *cs;
{
register char c;
register char start, finish;
register int i;
/* classify what we've got */
switch ((MORE()) ? PEEK() : '\0') {
case '[':
c = (MORE2()) ? PEEK2() : '\0';
break;
case '-':
SETERROR(REG_ERANGE);
return; /* NOTE RETURN */
break;
default:
c = '\0';
break;
}
switch (c) {
case ':': /* character class */
NEXT2();
REQUIRE(MORE(), REG_EBRACK);
c = PEEK();
REQUIRE(c != '-' && c != ']', REG_ECTYPE);
p_b_cclass(p, cs);
REQUIRE(MORE(), REG_EBRACK);
REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
break;
case '=': /* equivalence class */
NEXT2();
REQUIRE(MORE(), REG_EBRACK);
c = PEEK();
REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
p_b_eclass(p, cs);
REQUIRE(MORE(), REG_EBRACK);
REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
break;
default: /* symbol, ordinary character, or range */
/* xxx revision needed for multichar stuff */
start = p_b_symbol(p);
if (SEE('-') && MORE2() && PEEK2() != ']') {
/* range */
NEXT();
if (EAT('-'))
finish = '-';
else
finish = p_b_symbol(p);
} else
finish = start;
/* xxx what about signed chars here... */
REQUIRE(start <= finish, REG_ERANGE);
for (i = start; i <= finish; i++)
CHadd(cs, i);
break;
}
}
/*
- p_b_cclass - parse a character-class name and deal with it
== static void p_b_cclass(register struct parse *p, register cset *cs);
*/
static void
p_b_cclass(p, cs)
register struct parse *p;
register cset *cs;
{
register char *sp = p->next;
register struct cclass *cp;
register size_t len;
register char *u;
register char c;
while (MORE() && isalpha(PEEK()))
NEXT();
len = p->next - sp;
for (cp = cclasses; cp->name != NULL; cp++)
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
break;
if (cp->name == NULL) {
/* oops, didn't find it */
SETERROR(REG_ECTYPE);
return;
}
u = cp->chars;
while ((c = *u++) != '\0')
CHadd(cs, c);
for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
MCadd(p, cs, u);
}
/*
- p_b_eclass - parse an equivalence-class name and deal with it
== static void p_b_eclass(register struct parse *p, register cset *cs);
*
* This implementation is incomplete. xxx
*/
static void
p_b_eclass(p, cs)
register struct parse *p;
register cset *cs;
{
register char c;
c = p_b_coll_elem(p, '=');
CHadd(cs, c);
}
/*
- p_b_symbol - parse a character or [..]ed multicharacter collating symbol
== static char p_b_symbol(register struct parse *p);
*/
static char /* value of symbol */
p_b_symbol(p)
register struct parse *p;
{
register char value;
REQUIRE(MORE(), REG_EBRACK);
if (!EATTWO('[', '.'))
return(GETNEXT());
/* collating symbol */
value = p_b_coll_elem(p, '.');
REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
return(value);
}
/*
- p_b_coll_elem - parse a collating-element name and look it up
== static char p_b_coll_elem(register struct parse *p, int endc);
*/
static char /* value of collating element */
p_b_coll_elem(p, endc)
register struct parse *p;
int endc; /* name ended by endc,']' */
{
register char *sp = p->next;
register struct cname *cp;
register int len;
while (MORE() && !SEETWO(endc, ']'))
NEXT();
if (!MORE()) {
SETERROR(REG_EBRACK);
return(0);
}
len = p->next - sp;
for (cp = cnames; cp->name != NULL; cp++)
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
return(cp->code); /* known name */
if (len == 1)
return(*sp); /* single character */
SETERROR(REG_ECOLLATE); /* neither */
return(0);
}
/*
- othercase - return the case counterpart of an alphabetic
== static char othercase(int ch);
*/
static char /* if no counterpart, return ch */
othercase(ch)
int ch;
{
assert(isalpha(ch));
if (isupper(ch))
return(tolower(ch));
else if (islower(ch))
return(toupper(ch));
else /* peculiar, but could happen */
return(ch);
}
/*
- bothcases - emit a dualcase version of a two-case character
== static void bothcases(register struct parse *p, int ch);
*
* Boy, is this implementation ever a kludge...
*/
static void
bothcases(p, ch)
register struct parse *p;
int ch;
{
register char *oldnext = p->next;
register char *oldend = p->end;
char bracket[3];
assert(othercase(ch) != ch); /* p_bracket() would recurse */
p->next = bracket;
p->end = bracket+2;
bracket[0] = ch;
bracket[1] = ']';
bracket[2] = '\0';
p_bracket(p);
assert(p->next == bracket+2);
p->next = oldnext;
p->end = oldend;
}
/*
- ordinary - emit an ordinary character
== static void ordinary(register struct parse *p, register int ch);
*/
static void
ordinary(p, ch)
register struct parse *p;
register int ch;
{
register cat_t *cap = p->g->categories;
if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch)
bothcases(p, ch);
else {
EMIT(OCHAR, (unsigned char)ch);
if (cap[ch] == 0)
cap[ch] = p->g->ncategories++;
}
}
/*
- nonnewline - emit REG_NEWLINE version of OANY
== static void nonnewline(register struct parse *p);
*
* Boy, is this implementation ever a kludge...
*/
static void
nonnewline(p)
register struct parse *p;
{
register char *oldnext = p->next;
register char *oldend = p->end;
char bracket[4];
p->next = bracket;
p->end = bracket+3;
bracket[0] = '^';
bracket[1] = '\n';
bracket[2] = ']';
bracket[3] = '\0';
p_bracket(p);
assert(p->next == bracket+3);
p->next = oldnext;
p->end = oldend;
}
/*
- repeat - generate code for a bounded repetition, recursively if needed
== static void repeat(register struct parse *p, sopno start, int from, int to);
*/
static void
repeat(p, start, from, to)
register struct parse *p;
sopno start; /* operand from here to end of strip */
int from; /* repeated from this number */
int to; /* to this number of times (maybe INFINITY) */
{
register sopno finish = HERE();
# define N 2
# define INF 3
# define REP(f, t) ((f)*8 + (t))
# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
register sopno copy;
if (p->error != 0) /* head off possible runaway recursion */
return;
assert(from <= to);
switch (REP(MAP(from), MAP(to))) {
case REP(0, 0): /* must be user doing this */
DROP(finish-start); /* drop the operand */
break;
case REP(0, 1): /* as x{1,1}? */
case REP(0, N): /* as x{1,n}? */
case REP(0, INF): /* as x{1,}? */
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
INSERT(OCH_, start); /* offset is wrong... */
repeat(p, start+1, 1, to);
ASTERN(OOR1, start);
AHEAD(start); /* ... fix it */
EMIT(OOR2, 0);
AHEAD(THERE());
ASTERN(O_CH, THERETHERE());
break;
case REP(1, 1): /* trivial case */
/* done */
break;
case REP(1, N): /* as x?x{1,n-1} */
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
INSERT(OCH_, start);
ASTERN(OOR1, start);
AHEAD(start);
EMIT(OOR2, 0); /* offset very wrong... */
AHEAD(THERE()); /* ...so fix it */
ASTERN(O_CH, THERETHERE());
copy = dupl(p, start+1, finish+1);
assert(copy == finish+4);
repeat(p, copy, 1, to-1);
break;
case REP(1, INF): /* as x+ */
INSERT(OPLUS_, start);
ASTERN(O_PLUS, start);
break;
case REP(N, N): /* as xx{m-1,n-1} */
copy = dupl(p, start, finish);
repeat(p, copy, from-1, to-1);
break;
case REP(N, INF): /* as xx{n-1,INF} */
copy = dupl(p, start, finish);
repeat(p, copy, from-1, to);
break;
default: /* "can't happen" */
SETERROR(REG_ASSERT); /* just in case */
break;
}
}
/*
- seterr - set an error condition
== static int seterr(register struct parse *p, int e);
*/
static int /* useless but makes type checking happy */
seterr(p, e)
register struct parse *p;
int e;
{
if (p->error == 0) /* keep earliest error condition */
p->error = e;
p->next = nuls; /* try to bring things to a halt */
p->end = nuls;
return(0); /* make the return value well-defined */
}
/*
- allocset - allocate a set of characters for []
== static cset *allocset(register struct parse *p);
*/
static cset *
allocset(p)
register struct parse *p;
{
register int no = p->g->ncsets++;
register size_t nc;
register size_t nbytes;
register cset *cs;
register size_t css = (size_t)p->g->csetsize;
register int i;
if (no >= p->ncsalloc) { /* need another column of space */
p->ncsalloc += CHAR_BIT;
nc = p->ncsalloc;
assert(nc % CHAR_BIT == 0);
nbytes = nc / CHAR_BIT * css;
if (p->g->sets == NULL)
p->g->sets = (cset *)malloc(nc * sizeof(cset));
else
p->g->sets = (cset *)realloc((char *)p->g->sets,
nc * sizeof(cset));
if (p->g->setbits == NULL)
p->g->setbits = (uch *)malloc(nbytes);
else {
p->g->setbits = (uch *)realloc((char *)p->g->setbits,
nbytes);
/* xxx this isn't right if setbits is now NULL */
for (i = 0; i < no; i++)
p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
}
if (p->g->sets != NULL && p->g->setbits != NULL)
(void) memset((char *)p->g->setbits + (nbytes - css),
0, css);
else {
no = 0;
SETERROR(REG_ESPACE);
/* caller's responsibility not to do set ops */
}
}
assert(p->g->sets != NULL); /* xxx */
cs = &p->g->sets[no];
cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
cs->mask = 1 << ((no) % CHAR_BIT);
cs->hash = 0;
cs->smultis = 0;
cs->multis = NULL;
return(cs);
}
/*
- freeset - free a now-unused set
== static void freeset(register struct parse *p, register cset *cs);
*/
static void
freeset(p, cs)
register struct parse *p;
register cset *cs;
{
register int i;
register cset *top = &p->g->sets[p->g->ncsets];
register size_t css = (size_t)p->g->csetsize;
for (i = 0; i < css; i++)
CHsub(cs, i);
if (cs == top-1) /* recover only the easy case */
p->g->ncsets--;
}
/*
- freezeset - final processing on a set of characters
== static int freezeset(register struct parse *p, register cset *cs);
*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -