📄 regcomp.c
字号:
p_bre(p, end1, end2)struct parse *p;int end1; /* first terminating character */int end2; /* second terminating character */{ sopno start = HERE(); int first = 1; /* first subexpression? */ int wasdollar = 0; if (EAT('^')) { EMIT(OBOL, 0); p->g->iflags |= USEBOL; p->g->nbol++; } while (MORE() && !SEETWO(end1, end2)) { wasdollar = p_simp_re(p, first); first = 0; } if (wasdollar) { /* oops, that was a trailing anchor */ DROP(1); EMIT(OEOL, 0); p->g->iflags |= USEEOL; p->g->neol++; } (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */}/* - p_simp_re - parse a simple RE, an atom possibly followed by a repetition == static int p_simp_re(struct parse *p, int starordinary); */static int /* was the simple RE an unbackslashed $? */p_simp_re(p, starordinary)struct parse *p;int starordinary; /* is a leading * an ordinary character? */{ int c; int count; int count2; sopno pos; int i; sopno subno;# define BACKSL (1<<CHAR_BIT) pos = HERE(); /* repetion op, if any, covers from here */ assert(MORE()); /* caller should have ensured this */ c = GETNEXT(); if (c == '\\') { (void)REQUIRE(MORE(), REG_EESCAPE); c = BACKSL | GETNEXT(); } switch (c) { case '.': if (p->g->cflags®_NEWLINE) nonnewline(p); else EMIT(OANY, 0); break; case '[': p_bracket(p); break; case BACKSL|'{': SETERROR(REG_BADRPT); break; case BACKSL|'(': p->g->nsub++; subno = p->g->nsub; if (subno < NPAREN) p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); /* the MORE here is an error heuristic */ if (MORE() && !SEETWO('\\', ')')) p_bre(p, '\\', ')'); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); } EMIT(ORPAREN, subno); (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN); break; case BACKSL|')': /* should not get here -- must be user */ case BACKSL|'}': SETERROR(REG_EPAREN); break; case BACKSL|'1': case BACKSL|'2': case BACKSL|'3': case BACKSL|'4': case BACKSL|'5': case BACKSL|'6': case BACKSL|'7': case BACKSL|'8': case BACKSL|'9': i = (c&~BACKSL) - '0'; assert(i < NPAREN); if (p->pend[i] != 0) { assert(i <= p->g->nsub); EMIT(OBACK_, i); assert(p->pbegin[i] != 0); assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); assert(OP(p->strip[p->pend[i]]) == ORPAREN); (void) dupl(p, p->pbegin[i]+1, p->pend[i]); EMIT(O_BACK, i); } else SETERROR(REG_ESUBREG); p->g->backrefs = 1; break; case '*': (void)REQUIRE(starordinary, REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, (char)c); break; } if (EAT('*')) { /* implemented as +? */ /* this case does not require the (y|) trick, noKLUDGE */ INSERT(OPLUS_, pos); ASTERN(O_PLUS, pos); INSERT(OQUEST_, pos); ASTERN(O_QUEST, pos); } else if (EATTWO('\\', '{')) { count = p_count(p); if (EAT(',')) { if (MORE() && isdigit((uch)PEEK())) { count2 = p_count(p); (void)REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ count2 = INFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) NEXT(); (void)REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } } else if (c == '$') /* $ (but not \$) ends it */ return(1); return(0);}/* - p_count - parse a repetition count == static int p_count(struct parse *p); */static int /* the value */p_count(p)struct parse *p;{ int count = 0; int ndigits = 0; while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { count = count*10 + (GETNEXT() - '0'); ndigits++; } (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); return(count);}/* - p_bracket - parse a bracketed character list == static void p_bracket(struct parse *p); * * Note a significant property of this code: if the allocset() did SETERROR, * no set operations are done. */static voidp_bracket(p)struct parse *p;{ cset *cs = allocset(p); int invert = 0; /* Dept of Truly Sickening Special-Case Kludges */ if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { EMIT(OBOW, 0); NEXTn(6); return; } if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { EMIT(OEOW, 0); NEXTn(6); return; } if (EAT('^')) invert++; /* make note to invert set at end */ if (EAT(']')) CHadd(cs, ']'); else if (EAT('-')) CHadd(cs, '-'); while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) p_b_term(p, cs); if (EAT('-')) CHadd(cs, '-'); (void)MUSTEAT(']', REG_EBRACK); if (p->error != 0) /* don't mess things up further */ return; if (p->g->cflags®_ICASE) { int i; int ci; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i) && isalpha(i)) { ci = othercase(i); if (ci != i) CHadd(cs, ci); } if (cs->multis != NULL) mccase(p, cs); } if (invert) { int i; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i)) CHsub(cs, i); else CHadd(cs, i); if (p->g->cflags®_NEWLINE) CHsub(cs, '\n'); if (cs->multis != NULL) mcinvert(p, cs); } assert(cs->multis == NULL); /* xxx */ if (nch(p, cs) == 1) { /* optimize singleton sets */ ordinary(p, firstch(p, cs)); freeset(p, cs); } else EMIT(OANYOF, freezeset(p, cs));}/* - p_b_term - parse one term of a bracketed character list == static void p_b_term(struct parse *p, cset *cs); */static voidp_b_term(p, cs)struct parse *p;cset *cs;{ char c; char start, finish; int i; /* classify what we've got */ switch ((MORE()) ? PEEK() : '\0') { case '[': c = (MORE2()) ? PEEK2() : '\0'; break; case '-': SETERROR(REG_ERANGE); return; /* NOTE RETURN */ break; default: c = '\0'; break; } switch (c) { case ':': /* character class */ NEXT2(); (void)REQUIRE(MORE(), REG_EBRACK); c = PEEK(); (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE); p_b_cclass(p, cs); (void)REQUIRE(MORE(), REG_EBRACK); (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE); break; case '=': /* equivalence class */ NEXT2(); (void)REQUIRE(MORE(), REG_EBRACK); c = PEEK(); (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE); p_b_eclass(p, cs); (void)REQUIRE(MORE(), REG_EBRACK); (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); break; default: /* symbol, ordinary character, or range *//* xxx revision needed for multichar stuff */ start = p_b_symbol(p); if (SEE('-') && MORE2() && PEEK2() != ']') { /* range */ NEXT(); if (EAT('-')) finish = '-'; else finish = p_b_symbol(p); } else finish = start; if (start == finish) CHadd(cs, start); else { if (__collate_load_error) { (void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE); for (i = (uch)start; i <= (uch)finish; i++) CHadd(cs, i); } else { (void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE); for (i = CHAR_MIN; i <= CHAR_MAX; i++) { if ( __collate_range_cmp(start, i) <= 0 && __collate_range_cmp(i, finish) <= 0 ) CHadd(cs, i); } } } break; }}/* - p_b_cclass - parse a character-class name and deal with it == static void p_b_cclass(struct parse *p, cset *cs); */static voidp_b_cclass(p, cs)struct parse *p;cset *cs;{ int c; char *sp = p->next; struct cclass *cp; size_t len; while (MORE() && isalpha((uch)PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') break; if (cp->name == NULL) { /* oops, didn't find it */ SETERROR(REG_ECTYPE); return; } switch (cp->fidx) { case CALNUM: for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (isalnum((uch)c)) CHadd(cs, c); break; case CALPHA: for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (isalpha((uch)c)) CHadd(cs, c); break; case CBLANK: for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (isblank((uch)c)) CHadd(cs, c); break; case CCNTRL: for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (iscntrl((uch)c)) CHadd(cs, c); break; case CDIGIT: for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (isdigit((uch)c)) CHadd(cs, c); break; case CGRAPH: for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (isgraph((uch)c)) CHadd(cs, c); break; case CLOWER: for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (islower((uch)c)) CHadd(cs, c); break; case CPRINT: for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (isprint((uch)c)) CHadd(cs, c); break; case CPUNCT: for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (ispunct((uch)c)) CHadd(cs, c); break; case CSPACE: for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (isspace((uch)c)) CHadd(cs, c); break; case CUPPER: for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (isupper((uch)c)) CHadd(cs, c); break; case CXDIGIT: for (c = CHAR_MIN; c <= CHAR_MAX; c++) if (isxdigit((uch)c)) CHadd(cs, c); break; }#if 0 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) MCadd(p, cs, u);#endif}/* - p_b_eclass - parse an equivalence-class name and deal with it == static void p_b_eclass(struct parse *p, cset *cs); * * This implementation is incomplete. xxx */static voidp_b_eclass(p, cs)struct parse *p;cset *cs;{ char c; c = p_b_coll_elem(p, '='); CHadd(cs, c);}/* - p_b_symbol - parse a character or [..]ed multicharacter collating symbol == static char p_b_symbol(struct parse *p); */static char /* value of symbol */p_b_symbol(p)struct parse *p;{ char value; (void)REQUIRE(MORE(), REG_EBRACK); if (!EATTWO('[', '.')) return(GETNEXT()); /* collating symbol */ value = p_b_coll_elem(p, '.'); (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); return(value);}/* - p_b_coll_elem - parse a collating-element name and look it up == static char p_b_coll_elem(struct parse *p, int endc); */static char /* value of collating element */p_b_coll_elem(p, endc)struct parse *p;int endc; /* name ended by endc,']' */{ char *sp = p->next; struct cname *cp; int len; while (MORE() && !SEETWO(endc, ']')) NEXT(); if (!MORE()) { SETERROR(REG_EBRACK); return(0); } len = p->next - sp; for (cp = cnames; cp->name != NULL; cp++) if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') return(cp->code); /* known name */ if (len == 1) return(*sp); /* single character */ SETERROR(REG_ECOLLATE); /* neither */ return(0);}/* - othercase - return the case counterpart of an alphabetic == static char othercase(int ch); */static char /* if no counterpart, return ch */othercase(ch)int ch;{ ch = (uch)ch; assert(isalpha(ch)); if (isupper(ch)) return(tolower(ch)); else if (islower(ch)) return(toupper(ch)); else /* peculiar, but could happen */ return(ch);}/* - bothcases - emit a dualcase version of a two-case character == static void bothcases(struct parse *p, int ch); * * Boy, is this implementation ever a kludge... */static voidbothcases(p, ch)struct parse *p;int ch;{ char *oldnext = p->next; char *oldend = p->end; char bracket[3]; ch = (uch)ch; assert(othercase(ch) != ch); /* p_bracket() would recurse */ p->next = bracket; p->end = bracket+2; bracket[0] = ch; bracket[1] = ']'; bracket[2] = '\0'; p_bracket(p);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -