📄 regcomp.c
字号:
/* do a bunch of concatenated expressions */ conc = HERE(); while (MORE() && (c = PEEK()) != '|' && c != stop) p_ere_exp(p); REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ if (!EAT('|')) break; /* NOTE BREAK OUT */ if (first) { INSERT(OCH_, conc); /* offset is wrong */ prevfwd = conc; prevback = conc; first = 0; } ASTERN(OOR1, prevback); prevback = THERE(); AHEAD(prevfwd); /* fix previous offset */ prevfwd = HERE(); EMIT(OOR2, 0); /* offset is very wrong */ } if (!first) { /* tail-end fixups */ AHEAD(prevfwd); ASTERN(O_CH, prevback); } assert(!MORE() || SEE(stop));}/* - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op == static void p_ere_exp(register struct parse *p); */static voidp_ere_exp(p)register struct parse *p;{ register char c; register sopno pos; register int count; register int count2; register sopno subno; int wascaret = 0; assert(MORE()); /* caller should have ensured this */ c = GETNEXT(); pos = HERE(); switch (c) { case '(': REQUIRE(MORE(), REG_EPAREN); p->g->nsub++; subno = p->g->nsub; if (subno < NPAREN) p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); if (!SEE(')')) p_ere(p, ')'); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); } EMIT(ORPAREN, subno); MUSTEAT(')', REG_EPAREN); break;#ifndef POSIX_MISTAKE case ')': /* happens only if no current unmatched ( */ /* * You may ask, why the ifndef? Because I didn't notice * this until slightly too late for 1003.2, and none of the * other 1003.2 regular-expression reviewers noticed it at * all. So an unmatched ) is legal POSIX, at least until * we can get it fixed. */ SETERROR(REG_EPAREN); break;#endif case '^': EMIT(OBOL, 0); p->g->iflags |= USEBOL; p->g->nbol++; wascaret = 1; break; case '$': EMIT(OEOL, 0); p->g->iflags |= USEEOL; p->g->neol++; break; case '|': SETERROR(REG_EMPTY); break; case '*': case '+': case '?': SETERROR(REG_BADRPT); break; case '.': if (p->g->cflags®_NEWLINE) nonnewline(p); else EMIT(OANY, 0); break; case '[': p_bracket(p); break; case '\\': REQUIRE(MORE(), REG_EESCAPE); c = GETNEXT(); ordinary(p, c); break; case '{': /* okay as ordinary except if digit follows */ REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, c); break; } if (!MORE()) return; c = PEEK(); /* we call { a repetition if followed by a digit */ if (!( c == '*' || c == '+' || c == '?' || (c == '{' && MORE2() && isdigit(PEEK2())) )) return; /* no repetition, we're done */ NEXT(); REQUIRE(!wascaret, REG_BADRPT); switch (c) { case '*': /* implemented as +? */ /* this case does not require the (y|) trick, noKLUDGE */ INSERT(OPLUS_, pos); ASTERN(O_PLUS, pos); INSERT(OQUEST_, pos); ASTERN(O_QUEST, pos); break; case '+': INSERT(OPLUS_, pos); ASTERN(O_PLUS, pos); break; case '?': /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, pos); /* offset slightly wrong */ ASTERN(OOR1, pos); /* this one's right */ AHEAD(pos); /* fix the OCH_ */ EMIT(OOR2, 0); /* offset very wrong... */ AHEAD(THERE()); /* ...so fix it */ ASTERN(O_CH, THERETHERE()); break; case '{': count = p_count(p); if (EAT(',')) { if (isdigit(PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ count2 = INFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); if (!EAT('}')) { /* error heuristics */ while (MORE() && PEEK() != '}') NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } break; } if (!MORE()) return; c = PEEK(); if (!( c == '*' || c == '+' || c == '?' || (c == '{' && MORE2() && isdigit(PEEK2())) ) ) return; SETERROR(REG_BADRPT);}/* - p_str - string (no metacharacters) "parser" == static void p_str(register struct parse *p); */static voidp_str(p)register struct parse *p;{ REQUIRE(MORE(), REG_EMPTY); while (MORE()) ordinary(p, GETNEXT());}/* - p_bre - BRE parser top level, anchoring and concatenation == static void p_bre(register struct parse *p, register int end1, \ == register int end2); * Giving end1 as OUT essentially eliminates the end1/end2 check. * * This implementation is a bit of a kludge, in that a trailing $ is first * taken as an ordinary character and then revised to be an anchor. The * only undesirable side effect is that '$' gets included as a character * category in such cases. This is fairly harmless; not worth fixing. * The amount of lookahead needed to avoid this kludge is excessive. */static voidp_bre(p, end1, end2)register struct parse *p;register int end1; /* first terminating character */register int end2; /* second terminating character */{ register sopno start = HERE(); register int first = 1; /* first subexpression? */ register int wasdollar = 0; if (EAT('^')) { EMIT(OBOL, 0); p->g->iflags |= USEBOL; p->g->nbol++; } while (MORE() && !SEETWO(end1, end2)) { wasdollar = p_simp_re(p, first); first = 0; } if (wasdollar) { /* oops, that was a trailing anchor */ DROP(1); EMIT(OEOL, 0); p->g->iflags |= USEEOL; p->g->neol++; } REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */}/* - p_simp_re - parse a simple RE, an atom possibly followed by a repetition == static int p_simp_re(register struct parse *p, int starordinary); */static int /* was the simple RE an unbackslashed $? */p_simp_re(p, starordinary)register struct parse *p;int starordinary; /* is a leading * an ordinary character? */{ register int c; register int count; register int count2; register sopno pos; register int i; register sopno subno;# define BACKSL (1<<CHAR_BIT) pos = HERE(); /* repetion op, if any, covers from here */ assert(MORE()); /* caller should have ensured this */ c = GETNEXT(); if (c == '\\') { REQUIRE(MORE(), REG_EESCAPE); c = BACKSL | (unsigned char)GETNEXT(); } switch (c) { case '.': if (p->g->cflags®_NEWLINE) nonnewline(p); else EMIT(OANY, 0); break; case '[': p_bracket(p); break; case BACKSL|'{': SETERROR(REG_BADRPT); break; case BACKSL|'(': p->g->nsub++; subno = p->g->nsub; if (subno < NPAREN) p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); /* the MORE here is an error heuristic */ if (MORE() && !SEETWO('\\', ')')) p_bre(p, '\\', ')'); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); } EMIT(ORPAREN, subno); REQUIRE(EATTWO('\\', ')'), REG_EPAREN); break; case BACKSL|')': /* should not get here -- must be user */ case BACKSL|'}': SETERROR(REG_EPAREN); break; case BACKSL|'1': case BACKSL|'2': case BACKSL|'3': case BACKSL|'4': case BACKSL|'5': case BACKSL|'6': case BACKSL|'7': case BACKSL|'8': case BACKSL|'9': i = (c&~BACKSL) - '0'; assert(i < NPAREN); if (p->pend[i] != 0) { assert(i <= p->g->nsub); EMIT(OBACK_, i); assert(p->pbegin[i] != 0); assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); assert(OP(p->strip[p->pend[i]]) == ORPAREN); (void) dupl(p, p->pbegin[i]+1, p->pend[i]); EMIT(O_BACK, i); } else SETERROR(REG_ESUBREG); p->g->backrefs = 1; break; case '*': REQUIRE(starordinary, REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, c &~ BACKSL); break; } if (EAT('*')) { /* implemented as +? */ /* this case does not require the (y|) trick, noKLUDGE */ INSERT(OPLUS_, pos); ASTERN(O_PLUS, pos); INSERT(OQUEST_, pos); ASTERN(O_QUEST, pos); } else if (EATTWO('\\', '{')) { count = p_count(p); if (EAT(',')) { if (MORE() && isdigit(PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ count2 = INFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) NEXT(); REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ return(1); return(0);}/* - p_count - parse a repetition count == static int p_count(register struct parse *p); */static int /* the value */p_count(p)register struct parse *p;{ register int count = 0; register int ndigits = 0; while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { count = count*10 + (GETNEXT() - '0'); ndigits++; } REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); return(count);}/* - p_bracket - parse a bracketed character list == static void p_bracket(register struct parse *p); * * Note a significant property of this code: if the allocset() did SETERROR, * no set operations are done. */static voidp_bracket(p)register struct parse *p;{ /* register char c; XXX jcf: unused */ register cset *cs = allocset(p); register int invert = 0; /* Dept of Truly Sickening Special-Case Kludges */ if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { EMIT(OBOW, 0); NEXTn(6); return; } if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { EMIT(OEOW, 0); NEXTn(6); return; } if (EAT('^')) invert++; /* make note to invert set at end */ if (EAT(']')) CHadd(cs, ']'); else if (EAT('-')) CHadd(cs, '-'); while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) p_b_term(p, cs); if (EAT('-')) CHadd(cs, '-'); MUSTEAT(']', REG_EBRACK); if (p->error != 0) /* don't mess things up further */ return; if (p->g->cflags®_ICASE) { register int i; register int ci; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i) && isalpha(i)) { ci = othercase(i); if (ci != i) CHadd(cs, ci); } if (cs->multis != NULL) mccase(p, cs); } if (invert) { register int i; for (i = p->g->csetsize - 1; i >= 0; i--) if (CHIN(cs, i)) CHsub(cs, i); else CHadd(cs, i); if (p->g->cflags®_NEWLINE) CHsub(cs, '\n'); if (cs->multis != NULL) mcinvert(p, cs); } assert(cs->multis == NULL); /* xxx */ if (nch(p, cs) == 1) { /* optimize singleton sets */ ordinary(p, firstch(p, cs)); freeset(p, cs); } else EMIT(OANYOF, freezeset(p, cs));}/* - p_b_term - parse one term of a bracketed character list == static void p_b_term(register struct parse *p, register cset *cs); */static voidp_b_term(p, cs)register struct parse *p;register cset *cs;{ register char c; register char start, finish; register int i; /* classify what we've got */ switch ((MORE()) ? PEEK() : '\0') { case '[': c = (MORE2()) ? PEEK2() : '\0'; break; case '-': SETERROR(REG_ERANGE); return; /* NOTE RETURN */ break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -