📄 regcomp.c
字号:
p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); /* the MORE here is an error heuristic */ if (MORE() && !SEETWO('\\', ')')) p_bre(p, '\\', ')'); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); } EMIT(ORPAREN, subno); REQUIRE(EATTWO('\\', ')'), REG_EPAREN); break; case BACKSL | ')': /* should not get here -- must be user */ case BACKSL | '}': SETERROR(REG_EPAREN); break; case BACKSL | '1': case BACKSL | '2': case BACKSL | '3': case BACKSL | '4': case BACKSL | '5': case BACKSL | '6': case BACKSL | '7': case BACKSL | '8': case BACKSL | '9': i = (c & ~BACKSL) - '0'; assert(i < NPAREN); if (p->pend[i] != 0) { assert(i <= p->g->nsub); EMIT(OBACK_, i); assert(p->pbegin[i] != 0); assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); assert(OP(p->strip[p->pend[i]]) == ORPAREN); dupl(p, p->pbegin[i] + 1, p->pend[i]); EMIT(O_BACK, i); } else SETERROR(REG_ESUBREG); p->g->backrefs = 1; break; case '*': REQUIRE(starordinary, REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, c & ~BACKSL); break; } if (EAT('*')) { /* implemented as +? */ /* this case does not require the (y|) trick, noKLUDGE */ INSERT(OPLUS_, pos); ASTERN(O_PLUS, pos); INSERT(OQUEST_, pos); ASTERN(O_QUEST, pos); } else if (EATTWO('\\', '{')) { count = p_count(p); if (EAT(',')) { if (MORE() && pg_isdigit(PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else/* single number with comma */ count2 = INFINITY; } else/* just a single number */ count2 = count; repeat(p, pos, count, count2); if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } } else if (c == (unsigned char) '$') /* $ (but not \$) ends it */ return 1; return 0;}/* - p_count - parse a repetition count == static int p_count(struct parse *p); */static int /* the value */p_count(p)struct parse *p;{ int count = 0; int ndigits = 0; while (MORE() && pg_isdigit(PEEK()) && count <= DUPMAX) { count = count * 10 + (GETNEXT() - '0'); ndigits++; } REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); return count;}/* - p_bracket - parse a bracketed character list == static void p_bracket(struct parse *p); * * Note a significant property of this code: if the allocset() did SETERROR, * no set operations are done. */static voidp_bracket(p)struct parse *p;{ cset *cs = allocset(p); int invert = 0;#ifdef MULTIBYTE pg_wchar sp1[] = {'[', ':', '<', ':', ']', ']'}; pg_wchar sp2[] = {'[', ':', '>', ':', ']', ']'};#endif /* Dept of Truly Sickening Special-Case Kludges */#ifdef MULTIBYTE if (p->next + 5 < p->end && pg_wchar_strncmp(p->next, sp1, 6) == 0)#else if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0)#endif { EMIT(OBOW, 0); NEXTn(6); return; }#ifdef MULTIBYTE if (p->next + 5 < p->end && pg_wchar_strncmp(p->next, sp2, 6) == 0)#else if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0)#endif { EMIT(OEOW, 0); NEXTn(6); return; } if (EAT('^')) invert++; /* make note to invert set at end */ if (EAT(']')) CHadd(cs, ']'); else if (EAT('-')) CHadd(cs, '-'); while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) p_b_term(p, cs); if (EAT('-')) CHadd(cs, '-'); MUSTEAT(']', REG_EBRACK); if (p->error != 0) /* don't mess things up further */ return; if (p->g->cflags & REG_ICASE) { int i; int ci; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i) && pg_isalpha(i)) { ci = othercase(i); if (ci != i) CHadd(cs, ci); } if (cs->multis != NULL) mccase(p, cs); } if (invert) { int i; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i)) CHsub(cs, i); else CHadd(cs, i); if (p->g->cflags & REG_NEWLINE) CHsub(cs, '\n'); if (cs->multis != NULL) mcinvert(p, cs); } assert(cs->multis == NULL); /* xxx */ if (nch(p, cs) == 1) { /* optimize singleton sets */ ordinary(p, firstch(p, cs)); freeset(p, cs); } else EMIT(OANYOF, freezeset(p, cs));}/* - p_b_term - parse one term of a bracketed character list == static void p_b_term(struct parse *p, cset *cs); */static voidp_b_term(p, cs)struct parse *p;cset *cs;{ pg_wchar c; pg_wchar start, finish; int i; /* classify what we've got */ switch ((MORE()) ? PEEK() : '\0') { case '[': c = (MORE2()) ? PEEK2() : '\0'; break; case '-': SETERROR(REG_ERANGE); return; /* NOTE RETURN */ break; default: c = '\0'; break; } switch (c) { case ':': /* character class */ NEXT2(); REQUIRE(MORE(), REG_EBRACK); c = PEEK(); REQUIRE(c != '-' && c != ']', REG_ECTYPE); p_b_cclass(p, cs); REQUIRE(MORE(), REG_EBRACK); REQUIRE(EATTWO(':', ']'), REG_ECTYPE); break; case '=': /* equivalence class */ NEXT2(); REQUIRE(MORE(), REG_EBRACK); c = PEEK(); REQUIRE(c != '-' && c != ']', REG_ECOLLATE); p_b_eclass(p, cs); REQUIRE(MORE(), REG_EBRACK); REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); break; default: /* symbol, ordinary character, or range *//* xxx revision needed for multichar stuff */ start = p_b_symbol(p); if (SEE('-') && MORE2() && PEEK2() != ']') { /* range */ NEXT(); if (EAT('-')) finish = '-'; else finish = p_b_symbol(p); } else finish = start;/* xxx what about signed chars here... */ REQUIRE(start <= finish, REG_ERANGE);#ifdef MULTIBYTE if (CHlc(start) != CHlc(finish)) SETERROR(REG_ERANGE);#endif for (i = start; i <= finish; i++) CHadd(cs, i); break; }}/* - p_b_cclass - parse a character-class name and deal with it == static void p_b_cclass(struct parse *p, cset *cs); */static voidp_b_cclass(p, cs)struct parse *p;cset *cs;{ pg_wchar *sp = p->next; struct cclass *cp; size_t len; char *u; char c; while (MORE() && pg_isalpha(PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++)#ifdef MULTIBYTE if (pg_char_and_wchar_strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')#else if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')#endif break; if (cp->name == NULL) { /* oops, didn't find it */ SETERROR(REG_ECTYPE); return; } u = cp->chars; while ((c = *u++) != '\0') CHadd(cs, c); for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) MCadd(p, cs, u);}/* - p_b_eclass - parse an equivalence-class name and deal with it == static void p_b_eclass(struct parse *p, cset *cs); * * This implementation is incomplete. xxx */static voidp_b_eclass(p, cs)struct parse *p;cset *cs;{ char c; c = p_b_coll_elem(p, '='); CHadd(cs, c);}/* - p_b_symbol - parse a character or [..]ed multicharacter collating symbol == static char p_b_symbol(struct parse *p); */static pg_wchar /* value of symbol */p_b_symbol(p)struct parse *p;{ pg_wchar value; REQUIRE(MORE(), REG_EBRACK); if (!EATTWO('[', '.')) return GETNEXT(); /* collating symbol */ value = p_b_coll_elem(p, '.'); REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); return value;}/* - p_b_coll_elem - parse a collating-element name and look it up == static char p_b_coll_elem(struct parse *p, int endc); */static char /* value of collating element */p_b_coll_elem(p, endc)struct parse *p;int endc; /* name ended by endc,']' */{ pg_wchar *sp = p->next; struct cname *cp; int len; while (MORE() && !SEETWO(endc, ']')) NEXT(); if (!MORE()) { SETERROR(REG_EBRACK); return 0; } len = p->next - sp; for (cp = cnames; cp->name != NULL; cp++)#ifdef MULTIBYTE if (pg_char_and_wchar_strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')#else if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')#endif return cp->code; /* known name */ if (len == 1) return *sp; /* single character */ SETERROR(REG_ECOLLATE); /* neither */ return 0;}/* - othercase - return the case counterpart of an alphabetic == static char othercase(int ch); */#ifdef MULTIBYTEstatic unsigned char /* if no counterpart, return ch */#elsestatic char /* if no counterpart, return ch */#endifothercase(ch)int ch;{ assert(pg_isalpha(ch)); if (pg_isupper(ch))#ifdef MULTIBYTE return (unsigned char) tolower(ch);#else return tolower(ch);#endif else if (pg_islower(ch))#ifdef MULTIBYTE return (unsigned char) toupper(ch);#else return toupper(ch);#endif else/* peculiar, but could happen */#ifdef MULTIBYTE return (unsigned char) ch;#else return ch;#endif}/* - bothcases - emit a dualcase version of a two-case character == static void bothcases(struct parse *p, int ch); * * Boy, is this implementation ever a kludge... */static voidbothcases(p, ch)struct parse *p;int ch;{ pg_wchar *oldnext = p->next; pg_wchar *oldend = p->end; pg_wchar bracket[3]; assert(othercase(ch) != ch);/* p_bracket() would recurse */ p->next = bracket; p->end = bracket + 2; bracket[0] = ch; bracket[1] = ']'; bracket[2] = '\0'; p_bracket(p); assert(p->next == bracket + 2); p->next = oldnext; p->end = oldend;}/* - ordinary - emit an ordinary character == static void ordinary(struct parse *p, int ch); */static voidordinary(p, ch)struct parse *p;int ch;{ cat_t *cap = p->g->categories; if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != ch) bothcases(p, ch); else {#ifdef MULTIBYTE EMIT(OCHAR, (pg_wchar) ch);#else EMIT(OCHAR, (unsigned char) ch);#endif if (ch >= CHAR_MIN && ch <= CHAR_MAX && cap[ch] == 0) cap[ch] = p->g->ncategories++; }}/* - nonnewline - emit REG_NEWLINE version of OANY == static void nonnewline(struct parse *p); * * Boy, is this implementation ever a kludge... */static voidnonnewline(p)struct parse *p;{ pg_wchar *oldnext = p->next; pg_wchar *oldend = p->end; pg_wchar bracket[4]; p->next = bracket; p->end = bracket + 3; bracket[0] = '^'; bracket[1] = '\n'; bracket[2] = ']'; bracket[3] = '\0'; p_bracket(p); assert(p->next == bracket + 3); p->next = oldnext; p->end = oldend;}/* - repeat - generate code for a bounded repetition, recursively if needed == static void repeat(struct parse *p, sopno start, int from, int to); */static voidrepeat(p, start, from, to)struct parse *p;sopno start; /* operand from here to end of strip */int from; /* repeated from this number */int to; /* to this number of times (maybe * INFINITY) */{ sopno finish = HERE();#define N 2#define INF 3#define REP(f, t) ((f)*8 + (t))#define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) sopno copy; if (p->error != 0) /* head off possible runaway recursion */ return; assert(from <= to); switch (REP(MAP(from), MAP(to))) { case REP(0, 0): /* must be user doing this */ DROP(finish - start); /* drop the operand */ break; case REP(0, 1): /* as x{1,1}? */ case REP(0, N): /* as x{1,n}? */ case REP(0, INF): /* as x{1,}? */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start);/* offset is wrong... */ repeat(p, start + 1, 1, to); ASTERN(OOR1, start); AHEAD(start); /* ... fix it */ EMIT(OOR2, 0); AHEAD(THERE()); ASTERN(O_CH, THERETHERE()); break; case REP(1, 1): /* trivial case */ /* done */ break; case REP(1, N): /* as x?x{1,n-1} */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); ASTERN(OOR1, start); AHEAD(start); EMIT(OOR2, 0); /* offset very wrong... */ AHEAD(THERE()); /* ...so fix it */ ASTERN(O_CH, THERETHERE()); copy = dupl(p, start + 1, finish + 1); assert(copy == finish + 4); repeat(p, copy, 1, to - 1); break; case REP(1, INF): /* as x+ */ INSERT(OPLUS_, start); ASTERN(O_PLUS, start); break; case REP(N, N): /* as xx{m-1,n-1} */ copy = dupl(p, start, finish); repeat(p, copy, from - 1, to - 1); break; case REP(N, INF): /* as xx{n-1,INF} */ copy = dupl(p, start, finish); repeat(p, copy, from - 1, to); break; default: /* "can't happen" */ SETERROR(REG_ASSERT); /* just in case */ break; }}/* - seterr - set an error condition == static int seterr(struct parse *p, int e); */static int /* useless but makes type checking happy */seterr(p, e)struct parse *p;int e;{ if (p->error == 0) /* keep earliest error condition */ p->error = e; p->next = nuls; /* try to bring things to a halt */ p->end = nuls; return 0; /* make the return value well-defined */}/* - allocset - allocate a set of characters for [] == static cset *allocset(struct parse *p); */static cset *allocset(p)struct parse *p;{ int no = p->g->ncsets++; size_t nc; size_t nbytes; cset *cs; size_t css = (size_t) p->g->csetsize; int i; if (no >= p->ncsalloc) { /* need another column of space */ p->ncsalloc += CHAR_BIT; nc = p->ncsalloc; assert(nc % CHAR_BIT == 0); nbytes = nc / CHAR_BIT * css; if (p->g->sets == NULL) p->g->sets = (cset *) malloc(nc * sizeof(cset)); else p->g->sets = (cset *) realloc((char *) p->g->sets, nc * sizeof(cset)); if (p->g->setbits == NULL) p->g->setbits = (uch *) malloc(nbytes); else { p->g->setbits = (uch *) realloc((char *) p->g->setbits, nbytes); /* xxx this isn't right if setbits is now NULL */ for (i = 0; i < no; i++) p->g->sets[i].ptr = p->g->setbits + css * (i / CHAR_BIT); } if (p->g->sets != NULL && p->g->setbits != NULL) memset((char *) p->g->setbits + (nbytes - css), 0, css); else { no = 0; SETERROR(REG_ESPACE);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -