📄 regcomp.c
字号:
p_bracket(p); assert(p->next == bracket+2); p->next = oldnext; p->end = oldend;}/* - ordinary - emit an ordinary character == static void ordinary(struct parse *p, int ch); */static voidordinary(p, ch)struct parse *p;int ch;{ cat_t *cap = p->g->categories; if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) bothcases(p, ch); else { EMIT(OCHAR, (uch)ch); if (cap[ch] == 0) cap[ch] = p->g->ncategories++; }}/* - nonnewline - emit REG_NEWLINE version of OANY == static void nonnewline(struct parse *p); * * Boy, is this implementation ever a kludge... */static voidnonnewline(p)struct parse *p;{ char *oldnext = p->next; char *oldend = p->end; char bracket[4]; p->next = bracket; p->end = bracket+3; bracket[0] = '^'; bracket[1] = '\n'; bracket[2] = ']'; bracket[3] = '\0'; p_bracket(p); assert(p->next == bracket+3); p->next = oldnext; p->end = oldend;}/* - repeat - generate code for a bounded repetition, recursively if needed == static void repeat(struct parse *p, sopno start, int from, int to); */static voidrepeat(p, start, from, to)struct parse *p;sopno start; /* operand from here to end of strip */int from; /* repeated from this number */int to; /* to this number of times (maybe INFINITY) */{ sopno finish = HERE();# define N 2# define INF 3# define REP(f, t) ((f)*8 + (t))# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) sopno copy; if (p->error != 0) /* head off possible runaway recursion */ return; assert(from <= to); switch (REP(MAP(from), MAP(to))) { case REP(0, 0): /* must be user doing this */ DROP(finish-start); /* drop the operand */ break; case REP(0, 1): /* as x{1,1}? */ case REP(0, N): /* as x{1,n}? */ case REP(0, INF): /* as x{1,}? */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); /* offset is wrong... */ repeat(p, start+1, 1, to); ASTERN(OOR1, start); AHEAD(start); /* ... fix it */ EMIT(OOR2, 0); AHEAD(THERE()); ASTERN(O_CH, THERETHERE()); break; case REP(1, 1): /* trivial case */ /* done */ break; case REP(1, N): /* as x?x{1,n-1} */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); ASTERN(OOR1, start); AHEAD(start); EMIT(OOR2, 0); /* offset very wrong... */ AHEAD(THERE()); /* ...so fix it */ ASTERN(O_CH, THERETHERE()); copy = dupl(p, start+1, finish+1); assert(copy == finish+4); repeat(p, copy, 1, to-1); break; case REP(1, INF): /* as x+ */ INSERT(OPLUS_, start); ASTERN(O_PLUS, start); break; case REP(N, N): /* as xx{m-1,n-1} */ copy = dupl(p, start, finish); repeat(p, copy, from-1, to-1); break; case REP(N, INF): /* as xx{n-1,INF} */ copy = dupl(p, start, finish); repeat(p, copy, from-1, to); break; default: /* "can't happen" */ SETERROR(REG_ASSERT); /* just in case */ break; }}/* - seterr - set an error condition == static int seterr(struct parse *p, int e); */static int /* useless but makes type checking happy */seterr(p, e)struct parse *p;int e;{ if (p->error == 0) /* keep earliest error condition */ p->error = e; p->next = nuls; /* try to bring things to a halt */ p->end = nuls; return(0); /* make the return value well-defined */}/* - allocset - allocate a set of characters for [] == static cset *allocset(struct parse *p); */static cset *allocset(p)struct parse *p;{ int no = p->g->ncsets++; size_t nc; size_t nbytes; cset *cs; size_t css = (size_t)p->g->csetsize; int i; if (no >= p->ncsalloc) { /* need another column of space */ p->ncsalloc += CHAR_BIT; nc = p->ncsalloc; assert(nc % CHAR_BIT == 0); nbytes = nc / CHAR_BIT * css; if (p->g->sets == NULL) p->g->sets = (cset *)malloc(nc * sizeof(cset)); else p->g->sets = (cset *)reallocf((char *)p->g->sets, nc * sizeof(cset)); if (p->g->setbits == NULL) p->g->setbits = (uch *)malloc(nbytes); else { p->g->setbits = (uch *)reallocf((char *)p->g->setbits, nbytes); /* xxx this isn't right if setbits is now NULL */ for (i = 0; i < no; i++) p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); } if (p->g->sets != NULL && p->g->setbits != NULL) (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); else { no = 0; SETERROR(REG_ESPACE); /* caller's responsibility not to do set ops */ } } assert(p->g->sets != NULL); /* xxx */ cs = &p->g->sets[no]; cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); cs->mask = 1 << ((no) % CHAR_BIT); cs->hash = 0; cs->smultis = 0; cs->multis = NULL; return(cs);}/* - freeset - free a now-unused set == static void freeset(struct parse *p, cset *cs); */static voidfreeset(p, cs)struct parse *p;cset *cs;{ int i; cset *top = &p->g->sets[p->g->ncsets]; size_t css = (size_t)p->g->csetsize; for (i = 0; i < css; i++) CHsub(cs, i); if (cs == top-1) /* recover only the easy case */ p->g->ncsets--;}/* - freezeset - final processing on a set of characters == static int freezeset(struct parse *p, cset *cs); * * The main task here is merging identical sets. This is usually a waste * of time (although the hash code minimizes the overhead), but can win * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash * is done using addition rather than xor -- all ASCII [aA] sets xor to * the same value! */static int /* set number */freezeset(p, cs)struct parse *p;cset *cs;{ short h = cs->hash; int i; cset *top = &p->g->sets[p->g->ncsets]; cset *cs2; size_t css = (size_t)p->g->csetsize; /* look for an earlier one which is the same */ for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) if (cs2->hash == h && cs2 != cs) { /* maybe */ for (i = 0; i < css; i++) if (!!CHIN(cs2, i) != !!CHIN(cs, i)) break; /* no */ if (i == css) break; /* yes */ } if (cs2 < top) { /* found one */ freeset(p, cs); cs = cs2; } return((int)(cs - p->g->sets));}/* - firstch - return first character in a set (which must have at least one) == static int firstch(struct parse *p, cset *cs); */static int /* character; there is no "none" value */firstch(p, cs)struct parse *p;cset *cs;{ int i; size_t css = (size_t)p->g->csetsize; for (i = 0; i < css; i++) if (CHIN(cs, i)) return((char)i); assert(never); return(0); /* arbitrary */}/* - nch - number of characters in a set == static int nch(struct parse *p, cset *cs); */static intnch(p, cs)struct parse *p;cset *cs;{ int i; size_t css = (size_t)p->g->csetsize; int n = 0; for (i = 0; i < css; i++) if (CHIN(cs, i)) n++; return(n);}/* - mcadd - add a collating element to a cset == static void mcadd(struct parse *p, cset *cs, \ == char *cp); */static voidmcadd(p, cs, cp)struct parse *p;cset *cs;char *cp;{ size_t oldend = cs->smultis; cs->smultis += strlen(cp) + 1; if (cs->multis == NULL) cs->multis = malloc(cs->smultis); else cs->multis = reallocf(cs->multis, cs->smultis); if (cs->multis == NULL) { SETERROR(REG_ESPACE); return; } (void) strcpy(cs->multis + oldend - 1, cp); cs->multis[cs->smultis - 1] = '\0';}#if used/* - mcsub - subtract a collating element from a cset == static void mcsub(cset *cs, char *cp); */static voidmcsub(cs, cp)cset *cs;char *cp;{ char *fp = mcfind(cs, cp); size_t len = strlen(fp); assert(fp != NULL); (void) memmove(fp, fp + len + 1, cs->smultis - (fp + len + 1 - cs->multis)); cs->smultis -= len; if (cs->smultis == 0) { free(cs->multis); cs->multis = NULL; return; } cs->multis = reallocf(cs->multis, cs->smultis); assert(cs->multis != NULL);}/* - mcin - is a collating element in a cset? == static int mcin(cset *cs, char *cp); */static intmcin(cs, cp)cset *cs;char *cp;{ return(mcfind(cs, cp) != NULL);}/* - mcfind - find a collating element in a cset == static char *mcfind(cset *cs, char *cp); */static char *mcfind(cs, cp)cset *cs;char *cp;{ char *p; if (cs->multis == NULL) return(NULL); for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) if (strcmp(cp, p) == 0) return(p); return(NULL);}#endif/* - mcinvert - invert the list of collating elements in a cset == static void mcinvert(struct parse *p, cset *cs); * * This would have to know the set of possibilities. Implementation * is deferred. */static voidmcinvert(p, cs)struct parse *p;cset *cs;{ assert(cs->multis == NULL); /* xxx */}/* - mccase - add case counterparts of the list of collating elements in a cset == static void mccase(struct parse *p, cset *cs); * * This would have to know the set of possibilities. Implementation * is deferred. */static voidmccase(p, cs)struct parse *p;cset *cs;{ assert(cs->multis == NULL); /* xxx */}/* - isinsets - is this character in any sets? == static int isinsets(struct re_guts *g, int c); */static int /* predicate */isinsets(g, c)struct re_guts *g;int c;{ uch *col; int i; int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; unsigned uc = (uch)c; for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) if (col[uc] != 0) return(1); return(0);}/* - samesets - are these two characters in exactly the same sets? == static int samesets(struct re_guts *g, int c1, int c2); */static int /* predicate */samesets(g, c1, c2)struct re_guts *g;int c1;int c2;{ uch *col; int i; int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; unsigned uc1 = (uch)c1; unsigned uc2 = (uch)c2; for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) if (col[uc1] != col[uc2]) return(0); return(1);}/* - categorize - sort out character categories == static void categorize(struct parse *p, struct re_guts *g); */static voidcategorize(p, g)struct parse *p;struct re_guts *g;{ cat_t *cats = g->categories; int c; int c2; cat_t cat; /* avoid making error situations worse */ if (p->error != 0) return; for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (cats[c] == 0 && isinsets(g, c)) { cat = g->ncategories++; cats[c] = cat; for (c2 = c+1; c2 <= CHAR_MAX; c2++) if (cats[c2] == 0 && samesets(g, c, c2)) cats[c2] = cat; }}/* - dupl - emit a duplicate of a bunch of sops == static sopno dupl(struct parse *p, sopno start, sopno finish); */static sopno /* start of duplicate */dupl(p, start, finish)struct parse *p;sopno start; /* from here */sopno finish; /* to this less one */{ sopno ret = HERE(); sopno len = finish - start; assert(finish >= start); if (len == 0) return(ret); enlarge(p, p->ssize + len); /* this many unexpected additions */ assert(p->ssize >= p->slen + len); (void) memcpy((char *)(p->strip + p->slen), (char *)(p->strip + start), (size_t)len*sizeof(sop)); p->slen += len; return(ret);}/* - doemit - emit a strip operator == static void doemit(struct parse *p, sop op, size_t opnd); * * It might seem better to implement this as a macro with a function as * hard-case backup, but it's just too big and messy unless there are * some changes to the data structures. Maybe later. */static voiddoemit(p, op, opnd)struct parse *p;sop op;size_t opnd;{ /* avoid making error situations worse */ if (p->error != 0) return; /* deal with oversize operands ("can't happen", more or less) */ assert(opnd < 1<<OPSHIFT);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -