📄 regcomp.c
字号:
count2 = INFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ return(1); return(0);}/* - p_count - parse a repetition count == static int p_count(register struct parse *p); */static int /* the value */p_count(p)register struct parse *p;{ register int count = 0; register int ndigits = 0; while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { count = count*10 + (GETNEXT() - '0'); ndigits++; } REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); return(count);}/* - p_bracket - parse a bracketed character list == static void p_bracket(register struct parse *p); * * Note a significant property of this code: if the allocset() did SETERROR, * no set operations are done. */static voidp_bracket(p)register struct parse *p;{ register cset *cs = allocset(p); register int invert = 0; /* Dept of Truly Sickening Special-Case Kludges */ if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { EMIT(OBOW, 0); NEXTn(6); return; } if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { EMIT(OEOW, 0); NEXTn(6); return; } if (EAT('^')) invert++; /* make note to invert set at end */ if (EAT(']')) CHadd(cs, ']'); else if (EAT('-')) CHadd(cs, '-'); while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) p_b_term(p, cs); if (EAT('-')) CHadd(cs, '-'); MUSTEAT(']', REG_EBRACK); if (p->error != 0) /* don't mess things up further */ return; if (p->g->cflags®_ICASE) { register int i; register int ci; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i) && isalpha(i)) { ci = othercase(i); if (ci != i) CHadd(cs, ci); } if (cs->multis != NULL) mccase(p, cs); } if (invert) { register int i; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i)) CHsub(cs, i); else CHadd(cs, i); if (p->g->cflags®_NEWLINE) CHsub(cs, '\n'); if (cs->multis != NULL) mcinvert(p, cs); } assert(cs->multis == NULL); /* xxx */ if (nch(p, cs) == 1) { /* optimize singleton sets */ ordinary(p, firstch(p, cs)); freeset(p, cs); } else EMIT(OANYOF, freezeset(p, cs));}/* - p_b_term - parse one term of a bracketed character list == static void p_b_term(register struct parse *p, register cset *cs); */static voidp_b_term(p, cs)register struct parse *p;register cset *cs;{ register char c; register char start, finish; register int i; /* classify what we've got */ switch ((MORE()) ? PEEK() : '\0') { case '[': c = (MORE2()) ? PEEK2() : '\0'; break; case '-': SETERROR(REG_ERANGE); return; /* NOTE RETURN */ break; default: c = '\0'; break; } switch (c) { case ':': /* character class */ NEXT2(); REQUIRE(MORE(), REG_EBRACK); c = PEEK(); REQUIRE(c != '-' && c != ']', REG_ECTYPE); p_b_cclass(p, cs); REQUIRE(MORE(), REG_EBRACK); REQUIRE(EATTWO(':', ']'), REG_ECTYPE); break; case '=': /* equivalence class */ NEXT2(); REQUIRE(MORE(), REG_EBRACK); c = PEEK(); REQUIRE(c != '-' && c != ']', REG_ECOLLATE); p_b_eclass(p, cs); REQUIRE(MORE(), REG_EBRACK); REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); break; default: /* symbol, ordinary character, or range *//* xxx revision needed for multichar stuff */ start = p_b_symbol(p); if (SEE('-') && MORE2() && PEEK2() != ']') { /* range */ NEXT(); if (EAT('-')) finish = '-'; else finish = p_b_symbol(p); } else finish = start;/* xxx what about signed chars here... */ REQUIRE(start <= finish, REG_ERANGE); for (i = start; i <= finish; i++) CHadd(cs, i); break; }}/* - p_b_cclass - parse a character-class name and deal with it == static void p_b_cclass(register struct parse *p, register cset *cs); */static voidp_b_cclass(p, cs)register struct parse *p;register cset *cs;{ register char *sp = p->next; register struct cclass *cp; register size_t len; register char *u; register char c; while (MORE() && isalpha(PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') break; if (cp->name == NULL) { /* oops, didn't find it */ SETERROR(REG_ECTYPE); return; } u = cp->chars; while ((c = *u++) != '\0') CHadd(cs, c); for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) MCadd(p, cs, u);}/* - p_b_eclass - parse an equivalence-class name and deal with it == static void p_b_eclass(register struct parse *p, register cset *cs); * * This implementation is incomplete. xxx */static voidp_b_eclass(p, cs)register struct parse *p;register cset *cs;{ register char c; c = p_b_coll_elem(p, '='); CHadd(cs, c);}/* - p_b_symbol - parse a character or [..]ed multicharacter collating symbol == static char p_b_symbol(register struct parse *p); */static char /* value of symbol */p_b_symbol(p)register struct parse *p;{ register char value; REQUIRE(MORE(), REG_EBRACK); if (!EATTWO('[', '.')) return(GETNEXT()); /* collating symbol */ value = p_b_coll_elem(p, '.'); REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); return(value);}/* - p_b_coll_elem - parse a collating-element name and look it up == static char p_b_coll_elem(register struct parse *p, int endc); */static char /* value of collating element */p_b_coll_elem(p, endc)register struct parse *p;int endc; /* name ended by endc,']' */{ register char *sp = p->next; register struct cname *cp; register int len; while (MORE() && !SEETWO(endc, ']')) NEXT(); if (!MORE()) { SETERROR(REG_EBRACK); return(0); } len = p->next - sp; for (cp = cnames; cp->name != NULL; cp++) if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') return(cp->code); /* known name */ if (len == 1) return(*sp); /* single character */ SETERROR(REG_ECOLLATE); /* neither */ return(0);}/* - othercase - return the case counterpart of an alphabetic == static char othercase(int ch); */static char /* if no counterpart, return ch */othercase(ch)int ch;{ assert(isalpha(ch)); if (isupper(ch)) return(tolower(ch)); else if (islower(ch)) return(toupper(ch)); else /* peculiar, but could happen */ return(ch);}/* - bothcases - emit a dualcase version of a two-case character == static void bothcases(register struct parse *p, int ch); * * Boy, is this implementation ever a kludge... */static voidbothcases(p, ch)register struct parse *p;int ch;{ register char *oldnext = p->next; register char *oldend = p->end; char bracket[3]; assert(othercase(ch) != ch); /* p_bracket() would recurse */ p->next = bracket; p->end = bracket+2; bracket[0] = ch; bracket[1] = ']'; bracket[2] = '\0'; p_bracket(p); assert(p->next == bracket+2); p->next = oldnext; p->end = oldend;}/* - ordinary - emit an ordinary character == static void ordinary(register struct parse *p, register int ch); */static voidordinary(p, ch)register struct parse *p;register int ch;{ register cat_t *cap = p->g->categories; if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) bothcases(p, ch); else { EMIT(OCHAR, (unsigned char)ch); if (cap[ch] == 0) cap[ch] = p->g->ncategories++; }}/* - nonnewline - emit REG_NEWLINE version of OANY == static void nonnewline(register struct parse *p); * * Boy, is this implementation ever a kludge... */static voidnonnewline(p)register struct parse *p;{ register char *oldnext = p->next; register char *oldend = p->end; char bracket[4]; p->next = bracket; p->end = bracket+3; bracket[0] = '^'; bracket[1] = '\n'; bracket[2] = ']'; bracket[3] = '\0'; p_bracket(p); assert(p->next == bracket+3); p->next = oldnext; p->end = oldend;}/* - repeat - generate code for a bounded repetition, recursively if needed == static void repeat(register struct parse *p, sopno start, int from, int to); */static voidrepeat(p, start, from, to)register struct parse *p;sopno start; /* operand from here to end of strip */int from; /* repeated from this number */int to; /* to this number of times (maybe INFINITY) */{ register sopno finish = HERE();# define N 2# define INF 3# define REP(f, t) ((f)*8 + (t))# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) register sopno copy; if (p->error != 0) /* head off possible runaway recursion */ return; assert(from <= to); switch (REP(MAP(from), MAP(to))) { case REP(0, 0): /* must be user doing this */ DROP(finish-start); /* drop the operand */ break; case REP(0, 1): /* as x{1,1}? */ case REP(0, N): /* as x{1,n}? */ case REP(0, INF): /* as x{1,}? */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); /* offset is wrong... */ repeat(p, start+1, 1, to); ASTERN(OOR1, start); AHEAD(start); /* ... fix it */ EMIT(OOR2, 0); AHEAD(THERE()); ASTERN(O_CH, THERETHERE()); break; case REP(1, 1): /* trivial case */ /* done */ break; case REP(1, N): /* as x?x{1,n-1} */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); ASTERN(OOR1, start); AHEAD(start); EMIT(OOR2, 0); /* offset very wrong... */ AHEAD(THERE()); /* ...so fix it */ ASTERN(O_CH, THERETHERE()); copy = dupl(p, start+1, finish+1); assert(copy == finish+4); repeat(p, copy, 1, to-1); break; case REP(1, INF): /* as x+ */ INSERT(OPLUS_, start); ASTERN(O_PLUS, start); break; case REP(N, N): /* as xx{m-1,n-1} */ copy = dupl(p, start, finish); repeat(p, copy, from-1, to-1); break; case REP(N, INF): /* as xx{n-1,INF} */ copy = dupl(p, start, finish); repeat(p, copy, from-1, to); break; default: /* "can't happen" */ SETERROR(REG_ASSERT); /* just in case */ break; }}/* - seterr - set an error condition == static int seterr(register struct parse *p, int e); */static int /* useless but makes type checking happy */seterr(p, e)register struct parse *p;int e;{ if (p->error == 0) /* keep earliest error condition */ p->error = e; p->next = nuls; /* try to bring things to a halt */ p->end = nuls; return(0); /* make the return value well-defined */}/* - allocset - allocate a set of characters for [] == static cset *allocset(register struct parse *p); */static cset *allocset(p)register struct parse *p;{ register int no = p->g->ncsets++; register size_t nc; register size_t nbytes; register cset *cs; register size_t css = (size_t)p->g->csetsize; register int i; if (no >= p->ncsalloc) { /* need another column of space */ p->ncsalloc += CHAR_BIT; nc = p->ncsalloc; assert(nc % CHAR_BIT == 0); nbytes = nc / CHAR_BIT * css; if (p->g->sets == NULL) p->g->sets = (cset *)malloc(nc * sizeof(cset)); else p->g->sets = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset)); if (p->g->setbits == NULL) p->g->setbits = (uch *)malloc(nbytes); else { p->g->setbits = (uch *)realloc((char *)p->g->setbits, nbytes); /* xxx this isn't right if setbits is now NULL */ for (i = 0; i < no; i++) p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); } if (p->g->sets != NULL && p->g->setbits != NULL) (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); else { no = 0; SETERROR(REG_ESPACE); /* caller's responsibility not to do set ops */ } } assert(p->g->sets != NULL); /* xxx */ cs = &p->g->sets[no]; cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); cs->mask = 1 << ((no) % CHAR_BIT); cs->hash = 0; cs->smultis = 0; cs->multis = NULL; return(cs);}/* - freeset - free a now-unused set == static void freeset(register struct parse *p, register cset *cs); */static voidfreeset(p, cs)register struct parse *p;register cset *cs;{ register size_t i; register cset *top = &p->g->sets[p->g->ncsets]; register size_t css = (size_t)p->g->csetsize; for (i = 0; i < css; i++) CHsub(cs, i); if (cs == top-1) /* recover only the easy case */ p->g->ncsets--;}/* - freezeset - final processing on a set of characters == static int freezeset(register struct parse *p, register cset *cs); *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -