📄 regexec.c
字号:
assert(n > 0); if ((size_t) n >= v->nmatch) return; MDEBUG(("setting %d\n", n)); v->pmatch[n].rm_so = OFF(begin); v->pmatch[n].rm_eo = OFF(end);}/* * dissect - determine subexpression matches (uncomplicated case) */static int /* regexec return code */dissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */{ assert(t != NULL); MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end))); switch (t->op) { case '=': /* terminal node */ assert(t->left == NULL && t->right == NULL); return REG_OKAY; /* no action, parent did the work */ break; case '|': /* alternation */ assert(t->left != NULL); return altdissect(v, t, begin, end); break; case 'b': /* back ref -- shouldn't be calling us! */ return REG_ASSERT; break; case '.': /* concatenation */ assert(t->left != NULL && t->right != NULL); return condissect(v, t, begin, end); break; case '(': /* capturing */ assert(t->left != NULL && t->right == NULL); assert(t->subno > 0); subset(v, t, begin, end); return dissect(v, t->left, begin, end); break; default: return REG_ASSERT; break; }}/* * condissect - determine concatenation subexpression matches (uncomplicated) */static int /* regexec return code */condissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */{ struct dfa *d; struct dfa *d2; chr *mid; int i; int shorter = (t->left->flags & SHORTER) ? 1 : 0; chr *stop = (shorter) ? end : begin; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); NOERR(); d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); if (ISERR()) { assert(d2 == NULL); freedfa(d); return v->err; } /* pick a tentative midpoint */ if (shorter) mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); else mid = longest(v, d, begin, end, (int *) NULL); if (mid == NULL) { freedfa(d); freedfa(d2); return REG_ASSERT; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* iterate until satisfaction or failure */ while (longest(v, d2, mid, end, (int *) NULL) != end) { /* that midpoint didn't work, find a new one */ if (mid == stop) { /* all possibilities exhausted! */ MDEBUG(("no midpoint!\n")); freedfa(d); freedfa(d2); return REG_ASSERT; } if (shorter) mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); else mid = longest(v, d, begin, mid - 1, (int *) NULL); if (mid == NULL) { /* failed to find a new one! */ MDEBUG(("failed midpoint!\n")); freedfa(d); freedfa(d2); return REG_ASSERT; } MDEBUG(("new midpoint %ld\n", LOFF(mid))); } /* satisfaction */ MDEBUG(("successful\n")); freedfa(d); freedfa(d2); i = dissect(v, t->left, begin, mid); if (i != REG_OKAY) return i; return dissect(v, t->right, mid, end);}/* * altdissect - determine alternative subexpression matches (uncomplicated) */static int /* regexec return code */altdissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */{ struct dfa *d; int i; assert(t != NULL); assert(t->op == '|'); for (i = 0; t != NULL; t = t->right, i++) { MDEBUG(("trying %dth\n", i)); assert(t->left != NULL && t->left->cnfa.nstates > 0); d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); if (ISERR()) return v->err; if (longest(v, d, begin, end, (int *) NULL) == end) { MDEBUG(("success\n")); freedfa(d); return dissect(v, t->left, begin, end); } freedfa(d); } return REG_ASSERT; /* none of them matched?!? */}/* * cdissect - determine subexpression matches (with complications) * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". */static int /* regexec return code */cdissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */{ int er; assert(t != NULL); MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); switch (t->op) { case '=': /* terminal node */ assert(t->left == NULL && t->right == NULL); return REG_OKAY; /* no action, parent did the work */ break; case '|': /* alternation */ assert(t->left != NULL); return caltdissect(v, t, begin, end); break; case 'b': /* back ref -- shouldn't be calling us! */ assert(t->left == NULL && t->right == NULL); return cbrdissect(v, t, begin, end); break; case '.': /* concatenation */ assert(t->left != NULL && t->right != NULL); return ccondissect(v, t, begin, end); break; case '(': /* capturing */ assert(t->left != NULL && t->right == NULL); assert(t->subno > 0); er = cdissect(v, t->left, begin, end); if (er == REG_OKAY) subset(v, t, begin, end); return er; break; default: return REG_ASSERT; break; }}/* * ccondissect - concatenation subexpression matches (with complications) * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". */static int /* regexec return code */ccondissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */{ struct dfa *d; struct dfa *d2; chr *mid; int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); if (t->left->flags & SHORTER) /* reverse scan */ return crevdissect(v, t, begin, end); d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) { freedfa(d); return v->err; } MDEBUG(("cconcat %d\n", t->retry)); /* pick a tentative midpoint */ if (v->mem[t->retry] == 0) { mid = longest(v, d, begin, end, (int *) NULL); if (mid == NULL) { freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; } else { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* iterate until satisfaction or failure */ for (;;) { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY && longest(v, d2, mid, end, (int *) NULL) == end && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) break; /* NOTE BREAK OUT */ if (er != REG_OKAY && er != REG_NOMATCH) { freedfa(d); freedfa(d2); return er; } /* that midpoint didn't work, find a new one */ if (mid == begin) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } mid = longest(v, d, begin, mid - 1, (int *) NULL); if (mid == NULL) { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; zapmem(v, t->left); zapmem(v, t->right); } /* satisfaction */ MDEBUG(("successful\n")); freedfa(d); freedfa(d2); return REG_OKAY;}/* * crevdissect - determine backref shortest-first subexpression matches * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". */static int /* regexec return code */crevdissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */{ struct dfa *d; struct dfa *d2; chr *mid; int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); assert(t->left->flags & SHORTER); /* concatenation -- need to split the substring between parts */ d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) { freedfa(d); return v->err; } MDEBUG(("crev %d\n", t->retry)); /* pick a tentative midpoint */ if (v->mem[t->retry] == 0) { mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); if (mid == NULL) { freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; } else { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* iterate until satisfaction or failure */ for (;;) { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY && longest(v, d2, mid, end, (int *) NULL) == end && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) break; /* NOTE BREAK OUT */ if (er != REG_OKAY && er != REG_NOMATCH) { freedfa(d); freedfa(d2); return er; } /* that midpoint didn't work, find a new one */ if (mid == end) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); if (mid == NULL) { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; zapmem(v, t->left); zapmem(v, t->right); } /* satisfaction */ MDEBUG(("successful\n")); freedfa(d); freedfa(d2); return REG_OKAY;}/* * cbrdissect - determine backref subexpression matches */static int /* regexec return code */cbrdissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */{ int i; int n = t->subno; size_t len; chr *paren; chr *p; chr *stop; int min = t->min; int max = t->max; assert(t != NULL); assert(t->op == 'b'); assert(n >= 0); assert((size_t) n < v->nmatch); MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max)); if (v->pmatch[n].rm_so == -1) return REG_NOMATCH; paren = v->start + v->pmatch[n].rm_so; len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so; /* no room to maneuver -- retries are pointless */ if (v->mem[t->retry]) return REG_NOMATCH; v->mem[t->retry] = 1; /* special-case zero-length string */ if (len == 0) { if (begin == end) return REG_OKAY; return REG_NOMATCH; } /* and too-short string */ assert(end >= begin); if ((size_t) (end - begin) < len) return REG_NOMATCH; stop = end - len; /* count occurrences */ i = 0; for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) { if ((*v->g->compare) (paren, p, len) != 0) break; i++; } MDEBUG(("cbackref found %d\n", i)); /* and sort it out */ if (p != end) /* didn't consume all of it */ return REG_NOMATCH; if (min <= i && (i <= max || max == INFINITY)) return REG_OKAY; return REG_NOMATCH; /* out of range */}/* * caltdissect - determine alternative subexpression matches (w. complications) */static int /* regexec return code */caltdissect(struct vars * v, struct subre * t, chr *begin, /* beginning of relevant substring */ chr *end) /* end of same */{ struct dfa *d; int er;#define UNTRIED 0 /* not yet tried at all */#define TRYING 1 /* top matched, trying submatches */#define TRIED 2 /* top didn't match or submatches exhausted */ if (t == NULL) return REG_NOMATCH; assert(t->op == '|'); if (v->mem[t->retry] == TRIED) return caltdissect(v, t->right, begin, end); MDEBUG(("calt n%d\n", t->retry)); assert(t->left != NULL); if (v->mem[t->retry] == UNTRIED) { d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; if (longest(v, d, begin, end, (int *) NULL) != end) { freedfa(d); v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end); } freedfa(d); MDEBUG(("calt matched\n")); v->mem[t->retry] = TRYING; } er = cdissect(v, t->left, begin, end); if (er != REG_NOMATCH) return er; v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end);}#include "rege_dfa.c"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -