📄 regcomp.c
字号:
default: c = '\0'; break; } switch (c) { case ':': /* character class */ NEXT2(); REQUIRE(MORE(), REG_EBRACK); c = PEEK(); REQUIRE(c != '-' && c != ']', REG_ECTYPE); p_b_cclass(p, cs); REQUIRE(MORE(), REG_EBRACK); REQUIRE(EATTWO(':', ']'), REG_ECTYPE); break; case '=': /* equivalence class */ NEXT2(); REQUIRE(MORE(), REG_EBRACK); c = PEEK(); REQUIRE(c != '-' && c != ']', REG_ECOLLATE); p_b_eclass(p, cs); REQUIRE(MORE(), REG_EBRACK); REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); break; default: /* symbol, ordinary character, or range *//* xxx revision needed for multichar stuff */ start = p_b_symbol(p); if (SEE('-') && MORE2() && PEEK2() != ']') { /* range */ NEXT(); if (EAT('-')) finish = '-'; else finish = p_b_symbol(p); } else finish = start;/* xxx what about signed chars here... */ REQUIRE(start <= finish, REG_ERANGE); for (i = start; i <= finish; i++) CHadd(cs, i); break; }}/* - p_b_cclass - parse a character-class name and deal with it == static void p_b_cclass(register struct parse *p, register cset *cs); */static voidp_b_cclass(p, cs)register struct parse *p;register cset *cs;{ register char *sp = p->next; register struct cclass *cp; register size_t len; register char *u; register char c; while (MORE() && isalpha(PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') break; if (cp->name == NULL) { /* oops, didn't find it */ SETERROR(REG_ECTYPE); return; } u = cp->chars; while ((c = *u++) != '\0') CHadd(cs, c); for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) MCadd(p, cs, u);}/* - p_b_eclass - parse an equivalence-class name and deal with it == static void p_b_eclass(register struct parse *p, register cset *cs); * * This implementation is incomplete. xxx */static voidp_b_eclass(p, cs)register struct parse *p;register cset *cs;{ register char c; c = p_b_coll_elem(p, '='); CHadd(cs, c);}/* - p_b_symbol - parse a character or [..]ed multicharacter collating symbol == static char p_b_symbol(register struct parse *p); */static char /* value of symbol */p_b_symbol(p)register struct parse *p;{ register char value; REQUIRE(MORE(), REG_EBRACK); if (!EATTWO('[', '.')) return(GETNEXT()); /* collating symbol */ value = p_b_coll_elem(p, '.'); REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); return(value);}/* - p_b_coll_elem - parse a collating-element name and look it up == static char p_b_coll_elem(register struct parse *p, int endc); */static char /* value of collating element */p_b_coll_elem(p, endc)register struct parse *p;int endc; /* name ended by endc,']' */{ register char *sp = p->next; register struct cname *cp; register int len; /* register char c; XXX jcf: not used */ while (MORE() && !SEETWO(endc, ']')) NEXT(); if (!MORE()) { SETERROR(REG_EBRACK); return(0); } len = p->next - sp; for (cp = cnames; cp->name != NULL; cp++) if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') return(cp->code); /* known name */ if (len == 1) return(*sp); /* single character */ SETERROR(REG_ECOLLATE); /* neither */ return(0);}/* - othercase - return the case counterpart of an alphabetic == static char othercase(int ch); */static char /* if no counterpart, return ch */othercase(ch)int ch;{ assert(isalpha(ch)); if (isupper(ch)) return(tolower(ch)); else if (islower(ch)) return(toupper(ch)); else /* peculiar, but could happen */ return(ch);}/* - bothcases - emit a dualcase version of a two-case character == static void bothcases(register struct parse *p, int ch); * * Boy, is this implementation ever a kludge... */static voidbothcases(p, ch)register struct parse *p;int ch;{ register char *oldnext = p->next; register char *oldend = p->end; char bracket[3]; assert(othercase(ch) != ch); /* p_bracket() would recurse */ p->next = bracket; p->end = bracket+2; bracket[0] = ch; bracket[1] = ']'; bracket[2] = '\0'; p_bracket(p); assert(p->next == bracket+2); p->next = oldnext; p->end = oldend;}/* - ordinary - emit an ordinary character == static void ordinary(register struct parse *p, register int ch); */static voidordinary(p, ch)register struct parse *p;register int ch;{ register cat_t *cap = p->g->categories; if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) bothcases(p, ch); else { EMIT(OCHAR, (unsigned char)ch); if (cap[ch] == 0) cap[ch] = p->g->ncategories++; }}/* - nonnewline - emit REG_NEWLINE version of OANY == static void nonnewline(register struct parse *p); * * Boy, is this implementation ever a kludge... */static voidnonnewline(p)register struct parse *p;{ register char *oldnext = p->next; register char *oldend = p->end; char bracket[4]; p->next = bracket; p->end = bracket+3; bracket[0] = '^'; bracket[1] = '\n'; bracket[2] = ']'; bracket[3] = '\0'; p_bracket(p); assert(p->next == bracket+3); p->next = oldnext; p->end = oldend;}/* - repeat - generate code for a bounded repetition, recursively if needed == static void repeat(register struct parse *p, sopno start, int from, int to); */static voidrepeat(p, start, from, to)register struct parse *p;sopno start; /* operand from here to end of strip */int from; /* repeated from this number */int to; /* to this number of times (maybe INFINITY) */{ register sopno finish = HERE();# define N 2# define INF 3# define REP(f, t) ((f)*8 + (t))# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) register sopno copy; if (p->error != 0) /* head off possible runaway recursion */ return; assert(from <= to); switch (REP(MAP(from), MAP(to))) { case REP(0, 0): /* must be user doing this */ DROP(finish-start); /* drop the operand */ break; case REP(0, 1): /* as x{1,1}? */ case REP(0, N): /* as x{1,n}? */ case REP(0, INF): /* as x{1,}? */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); /* offset is wrong... */ repeat(p, start+1, 1, to); ASTERN(OOR1, start); AHEAD(start); /* ... fix it */ EMIT(OOR2, 0); AHEAD(THERE()); ASTERN(O_CH, THERETHERE()); break; case REP(1, 1): /* trivial case */ /* done */ break; case REP(1, N): /* as x?x{1,n-1} */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); ASTERN(OOR1, start); AHEAD(start); EMIT(OOR2, 0); /* offset very wrong... */ AHEAD(THERE()); /* ...so fix it */ ASTERN(O_CH, THERETHERE()); copy = dupl(p, start+1, finish+1); assert(copy == finish+4); repeat(p, copy, 1, to-1); break; case REP(1, INF): /* as x+ */ INSERT(OPLUS_, start); ASTERN(O_PLUS, start); break; case REP(N, N): /* as xx{m-1,n-1} */ copy = dupl(p, start, finish); repeat(p, copy, from-1, to-1); break; case REP(N, INF): /* as xx{n-1,INF} */ copy = dupl(p, start, finish); repeat(p, copy, from-1, to); break; default: /* "can't happen" */ SETERROR(REG_ASSERT); /* just in case */ break; }}/* - seterr - set an error condition == static int seterr(register struct parse *p, int e); */static int /* useless but makes type checking happy */seterr(p, e)register struct parse *p;int e;{ if (p->error == 0) /* keep earliest error condition */ p->error = e; p->next = nuls; /* try to bring things to a halt */ p->end = nuls; return(0); /* make the return value well-defined */}/* - allocset - allocate a set of characters for [] == static cset *allocset(register struct parse *p); */static cset *allocset(p)register struct parse *p;{ register int no = p->g->ncsets++; register size_t nc; register size_t nbytes; register cset *cs; register size_t css = (size_t)p->g->csetsize; register int i; if (no >= p->ncsalloc) { /* need another column of space */ p->ncsalloc += CHAR_BIT; nc = p->ncsalloc; assert(nc % CHAR_BIT == 0); nbytes = nc / CHAR_BIT * css; if (p->g->sets == NULL) p->g->sets = (cset *)malloc(nc * sizeof(cset)); else p->g->sets = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset)); if (p->g->setbits == NULL) p->g->setbits = (uch *)malloc(nbytes); else { p->g->setbits = (uch *)realloc((char *)p->g->setbits, nbytes); /* xxx this isn't right if setbits is now NULL */ for (i = 0; i < no; i++) p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); } if (p->g->sets != NULL && p->g->setbits != NULL) (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); else { no = 0; SETERROR(REG_ESPACE); /* caller's responsibility not to do set ops */ } } assert(p->g->sets != NULL); /* xxx */ cs = &p->g->sets[no]; cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); cs->mask = 1 << ((no) % CHAR_BIT); cs->hash = 0; cs->smultis = 0; cs->multis = NULL; return(cs);}/* - freeset - free a now-unused set == static void freeset(register struct parse *p, register cset *cs); */static voidfreeset(p, cs)register struct parse *p;register cset *cs;{ register int i; register cset *top = &p->g->sets[p->g->ncsets]; register size_t css = (size_t)p->g->csetsize; for (i = 0; i < (int) css; i++) CHsub(cs, i); if (cs == top-1) /* recover only the easy case */ p->g->ncsets--;}/* - freezeset - final processing on a set of characters == static int freezeset(register struct parse *p, register cset *cs); * * The main task here is merging identical sets. This is usually a waste * of time (although the hash code minimizes the overhead), but can win * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash * is done using addition rather than xor -- all ASCII [aA] sets xor to * the same value! */static int /* set number */freezeset(p, cs)register struct parse *p;register cset *cs;{ register uch h = cs->hash; register int i; register cset *top = &p->g->sets[p->g->ncsets]; register cset *cs2; register size_t css = (size_t)p->g->csetsize; /* look for an earlier one which is the same */ for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) if (cs2->hash == h && cs2 != cs) { /* maybe */ for (i = 0; i < (int) css; i++) if (!!CHIN(cs2, i) != !!CHIN(cs, i)) break; /* no */ if (i == (int) css) break; /* yes */ } if (cs2 < top) { /* found one */ freeset(p, cs); cs = cs2; } return((int)(cs - p->g->sets));}/* - firstch - return first character in a set (which must have at least one) == static int firstch(register struct parse *p, register cset *cs); */static int /* character; there is no "none" value */firstch(p, cs)register struct parse *p;register cset *cs;{ register int i; register size_t css = (size_t)p->g->csetsize; for (i = 0; i < (int) css; i++) if (CHIN(cs, i)) return((char)i); assert(never); return(0); /* arbitrary */}/* - nch - number of characters in a set == static int nch(register struct parse *p, register cset *cs); */static intnch(p, cs)register struct parse *p;register cset *cs;{ register int i; register size_t css = (size_t)p->g->csetsize; register int n = 0; for (i = 0; i < (int) css; i++) if (CHIN(cs, i))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -