📄 regcomp.c
字号:
* The main task here is merging identical sets. This is usually a waste
* of time (although the hash code minimizes the overhead), but can win
* big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
* is done using addition rather than xor -- all ASCII [aA] sets xor to
* the same value!
*/
static int /* set number */
freezeset(p, cs)
register struct parse *p;
register cset *cs;
{
register uch h = cs->hash;
register int i;
register cset *top = &p->g->sets[p->g->ncsets];
register cset *cs2;
register size_t css = (size_t)p->g->csetsize;
/* look for an earlier one which is the same */
for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
if (cs2->hash == h && cs2 != cs) {
/* maybe */
for (i = 0; i < css; i++)
if (!!CHIN(cs2, i) != !!CHIN(cs, i))
break; /* no */
if (i == css)
break; /* yes */
}
if (cs2 < top) { /* found one */
freeset(p, cs);
cs = cs2;
}
return((int)(cs - p->g->sets));
}
/*
- firstch - return first character in a set (which must have at least one)
== static int firstch(register struct parse *p, register cset *cs);
*/
static int /* character; there is no "none" value */
firstch(p, cs)
register struct parse *p;
register cset *cs;
{
register int i;
register size_t css = (size_t)p->g->csetsize;
for (i = 0; i < css; i++)
if (CHIN(cs, i))
return((char)i);
assert(never);
return(0); /* arbitrary */
}
/*
- nch - number of characters in a set
== static int nch(register struct parse *p, register cset *cs);
*/
static int
nch(p, cs)
register struct parse *p;
register cset *cs;
{
register int i;
register size_t css = (size_t)p->g->csetsize;
register int n = 0;
for (i = 0; i < css; i++)
if (CHIN(cs, i))
n++;
return(n);
}
/*
- mcadd - add a collating element to a cset
== static void mcadd(register struct parse *p, register cset *cs, \
== register char *cp);
*/
static void
mcadd(p, cs, cp)
register struct parse *p;
register cset *cs;
register char *cp;
{
register size_t oldend = cs->smultis;
cs->smultis += strlen(cp) + 1;
if (cs->multis == NULL)
cs->multis = malloc(cs->smultis);
else
cs->multis = realloc(cs->multis, cs->smultis);
if (cs->multis == NULL) {
SETERROR(REG_ESPACE);
return;
}
(void) strcpy(cs->multis + oldend - 1, cp);
cs->multis[cs->smultis - 1] = '\0';
}
/*
- mcsub - subtract a collating element from a cset
== static void mcsub(register cset *cs, register char *cp);
*/
static void
mcsub(cs, cp)
register cset *cs;
register char *cp;
{
register char *fp = mcfind(cs, cp);
register size_t len = strlen(fp);
assert(fp != NULL);
(void) memmove(fp, fp + len + 1,
cs->smultis - (fp + len + 1 - cs->multis));
cs->smultis -= len;
if (cs->smultis == 0) {
free(cs->multis);
cs->multis = NULL;
return;
}
cs->multis = realloc(cs->multis, cs->smultis);
assert(cs->multis != NULL);
}
/*
- mcin - is a collating element in a cset?
== static int mcin(register cset *cs, register char *cp);
*/
static int
mcin(cs, cp)
register cset *cs;
register char *cp;
{
return(mcfind(cs, cp) != NULL);
}
/*
- mcfind - find a collating element in a cset
== static char *mcfind(register cset *cs, register char *cp);
*/
static char *
mcfind(cs, cp)
register cset *cs;
register char *cp;
{
register char *p;
if (cs->multis == NULL)
return(NULL);
for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
if (strcmp(cp, p) == 0)
return(p);
return(NULL);
}
/*
- mcinvert - invert the list of collating elements in a cset
== static void mcinvert(register struct parse *p, register cset *cs);
*
* This would have to know the set of possibilities. Implementation
* is deferred.
*/
static void
mcinvert(p, cs)
register struct parse *p;
register cset *cs;
{
assert(cs->multis == NULL); /* xxx */
}
/*
- mccase - add case counterparts of the list of collating elements in a cset
== static void mccase(register struct parse *p, register cset *cs);
*
* This would have to know the set of possibilities. Implementation
* is deferred.
*/
static void
mccase(p, cs)
register struct parse *p;
register cset *cs;
{
assert(cs->multis == NULL); /* xxx */
}
/*
- isinsets - is this character in any sets?
== static int isinsets(register struct re_guts *g, int c);
*/
static int /* predicate */
isinsets(g, c)
register struct re_guts *g;
int c;
{
register uch *col;
register int i;
register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
register unsigned uc = (unsigned char)c;
for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
if (col[uc] != 0)
return(1);
return(0);
}
/*
- samesets - are these two characters in exactly the same sets?
== static int samesets(register struct re_guts *g, int c1, int c2);
*/
static int /* predicate */
samesets(g, c1, c2)
register struct re_guts *g;
int c1;
int c2;
{
register uch *col;
register int i;
register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
register unsigned uc1 = (unsigned char)c1;
register unsigned uc2 = (unsigned char)c2;
for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
if (col[uc1] != col[uc2])
return(0);
return(1);
}
/*
- categorize - sort out character categories
== static void categorize(struct parse *p, register struct re_guts *g);
*/
static void
categorize(p, g)
struct parse *p;
register struct re_guts *g;
{
register cat_t *cats = g->categories;
register int c;
register int c2;
register cat_t cat;
/* avoid making error situations worse */
if (p->error != 0)
return;
for (c = CHAR_MIN; c <= CHAR_MAX; c++)
if (cats[c] == 0 && isinsets(g, c)) {
cat = g->ncategories++;
cats[c] = cat;
for (c2 = c+1; c2 <= CHAR_MAX; c2++)
if (cats[c2] == 0 && samesets(g, c, c2))
cats[c2] = cat;
}
}
/*
- dupl - emit a duplicate of a bunch of sops
== static sopno dupl(register struct parse *p, sopno start, sopno finish);
*/
static sopno /* start of duplicate */
dupl(p, start, finish)
register struct parse *p;
sopno start; /* from here */
sopno finish; /* to this less one */
{
register sopno ret = HERE();
register sopno len = finish - start;
assert(finish >= start);
if (len == 0)
return(ret);
enlarge(p, p->ssize + len); /* this many unexpected additions */
assert(p->ssize >= p->slen + len);
(void) memcpy((char *)(p->strip + p->slen),
(char *)(p->strip + start), (size_t)len*sizeof(sop));
p->slen += len;
return(ret);
}
/*
- doemit - emit a strip operator
== static void doemit(register struct parse *p, sop op, size_t opnd);
*
* It might seem better to implement this as a macro with a function as
* hard-case backup, but it's just too big and messy unless there are
* some changes to the data structures. Maybe later.
*/
static void
doemit(p, op, opnd)
register struct parse *p;
sop op;
size_t opnd;
{
/* avoid making error situations worse */
if (p->error != 0)
return;
/* deal with oversize operands ("can't happen", more or less) */
assert(opnd < 1<<OPSHIFT);
/* deal with undersized strip */
if (p->slen >= p->ssize)
enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
assert(p->slen < p->ssize);
/* finally, it's all reduced to the easy case */
p->strip[p->slen++] = SOP(op, opnd);
}
/*
- doinsert - insert a sop into the strip
== static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
*/
static void
doinsert(p, op, opnd, pos)
register struct parse *p;
sop op;
size_t opnd;
sopno pos;
{
register sopno sn;
register sop s;
register int i;
/* avoid making error situations worse */
if (p->error != 0)
return;
sn = HERE();
EMIT(op, opnd); /* do checks, ensure space */
assert(HERE() == sn+1);
s = p->strip[sn];
/* adjust paren pointers */
assert(pos > 0);
for (i = 1; i < NPAREN; i++) {
if (p->pbegin[i] >= pos) {
p->pbegin[i]++;
}
if (p->pend[i] >= pos) {
p->pend[i]++;
}
}
memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
(HERE()-pos-1)*sizeof(sop));
p->strip[pos] = s;
}
/*
- dofwd - complete a forward reference
== static void dofwd(register struct parse *p, sopno pos, sop value);
*/
static void
dofwd(p, pos, value)
register struct parse *p;
register sopno pos;
sop value;
{
/* avoid making error situations worse */
if (p->error != 0)
return;
assert(value < 1<<OPSHIFT);
p->strip[pos] = OP(p->strip[pos]) | value;
}
/*
- enlarge - enlarge the strip
== static void enlarge(register struct parse *p, sopno size);
*/
static void
enlarge(p, size)
register struct parse *p;
register sopno size;
{
register sop *sp;
if (p->ssize >= size)
return;
sp = (sop *)realloc(p->strip, size*sizeof(sop));
if (sp == NULL) {
SETERROR(REG_ESPACE);
return;
}
p->strip = sp;
p->ssize = size;
}
/*
- stripsnug - compact the strip
== static void stripsnug(register struct parse *p, register struct re_guts *g);
*/
static void
stripsnug(p, g)
register struct parse *p;
register struct re_guts *g;
{
g->nstates = p->slen;
g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
if (g->strip == NULL) {
SETERROR(REG_ESPACE);
g->strip = p->strip;
}
}
/*
- findmust - fill in must and mlen with longest mandatory literal string
== static void findmust(register struct parse *p, register struct re_guts *g);
*
* This algorithm could do fancy things like analyzing the operands of |
* for common subsequences. Someday. This code is simple and finds most
* of the interesting cases.
*
* Note that must and mlen got initialized during setup.
*/
static void
findmust(p, g)
struct parse *p;
register struct re_guts *g;
{
register sop *scan;
sop *start;
register sop *newstart;
register sopno newlen;
register sop s;
register char *cp;
register sopno i;
/* avoid making error situations worse */
if (p->error != 0)
return;
/* find the longest OCHAR sequence in strip */
newlen = 0;
scan = g->strip + 1;
do {
s = *scan++;
switch (OP(s)) {
case OCHAR: /* sequence member */
if (newlen == 0) /* new sequence */
newstart = scan - 1;
newlen++;
break;
case OPLUS_: /* things that don't break one */
case OLPAREN:
case ORPAREN:
break;
case OQUEST_: /* things that must be skipped */
case OCH_:
scan--;
do {
scan += OPND(s);
s = *scan;
/* assert() interferes w debug printouts */
if (OP(s) != O_QUEST && OP(s) != O_CH &&
OP(s) != OOR2) {
g->iflags |= BAD;
return;
}
} while (OP(s) != O_QUEST && OP(s) != O_CH);
/* fallthrough */
default: /* things that break a sequence */
if (newlen > g->mlen) { /* ends one */
start = newstart;
g->mlen = newlen;
}
newlen = 0;
break;
}
} while (OP(s) != OEND);
if (g->mlen == 0) /* there isn't one */
return;
/* turn it into a character string */
g->must = malloc((size_t)g->mlen + 1);
if (g->must == NULL) { /* argh; just forget it */
g->mlen = 0;
return;
}
cp = g->must;
scan = start;
for (i = g->mlen; i > 0; i--) {
while (OP(s = *scan++) != OCHAR)
continue;
assert(cp < g->must + g->mlen);
*cp++ = (char)OPND(s);
}
assert(cp == g->must + g->mlen);
*cp++ = '\0'; /* just on general principles */
}
/*
- pluscount - count + nesting
== static sopno pluscount(register struct parse *p, register struct re_guts *g);
*/
static sopno /* nesting depth */
pluscount(p, g)
struct parse *p;
register struct re_guts *g;
{
register sop *scan;
register sop s;
register sopno plusnest = 0;
register sopno maxnest = 0;
if (p->error != 0)
return(0); /* there may not be an OEND */
scan = g->strip + 1;
do {
s = *scan++;
switch (OP(s)) {
case OPLUS_:
plusnest++;
break;
case O_PLUS:
if (plusnest > maxnest)
maxnest = plusnest;
plusnest--;
break;
}
} while (OP(s) != OEND);
if (plusnest != 0)
g->iflags |= BAD;
return(maxnest);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -