📄 regcomp.c
字号:
/* deal with undersized strip */ if (p->slen >= p->ssize) enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ assert(p->slen < p->ssize); /* finally, it's all reduced to the easy case */ p->strip[p->slen++] = SOP(op, opnd);}/* - doinsert - insert a sop into the strip == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos); */static voiddoinsert(p, op, opnd, pos)struct parse *p;sop op;size_t opnd;sopno pos;{ sopno sn; sop s; int i; /* avoid making error situations worse */ if (p->error != 0) return; sn = HERE(); EMIT(op, opnd); /* do checks, ensure space */ assert(HERE() == sn+1); s = p->strip[sn]; /* adjust paren pointers */ assert(pos > 0); for (i = 1; i < NPAREN; i++) { if (p->pbegin[i] >= pos) { p->pbegin[i]++; } if (p->pend[i] >= pos) { p->pend[i]++; } } memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], (HERE()-pos-1)*sizeof(sop)); p->strip[pos] = s;}/* - dofwd - complete a forward reference == static void dofwd(struct parse *p, sopno pos, sop value); */static voiddofwd(p, pos, value)struct parse *p;sopno pos;sop value;{ /* avoid making error situations worse */ if (p->error != 0) return; assert(value < 1<<OPSHIFT); p->strip[pos] = OP(p->strip[pos]) | value;}/* - enlarge - enlarge the strip == static void enlarge(struct parse *p, sopno size); */static voidenlarge(p, size)struct parse *p;sopno size;{ sop *sp; if (p->ssize >= size) return; sp = (sop *)realloc(p->strip, size*sizeof(sop)); if (sp == NULL) { SETERROR(REG_ESPACE); return; } p->strip = sp; p->ssize = size;}/* - stripsnug - compact the strip == static void stripsnug(struct parse *p, struct re_guts *g); */static voidstripsnug(p, g)struct parse *p;struct re_guts *g;{ g->nstates = p->slen; g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); if (g->strip == NULL) { SETERROR(REG_ESPACE); g->strip = p->strip; }}/* - findmust - fill in must and mlen with longest mandatory literal string == static void findmust(struct parse *p, struct re_guts *g); * * This algorithm could do fancy things like analyzing the operands of | * for common subsequences. Someday. This code is simple and finds most * of the interesting cases. * * Note that must and mlen got initialized during setup. */static voidfindmust(p, g)struct parse *p;struct re_guts *g;{ sop *scan; sop *start; sop *newstart; sopno newlen; sop s; char *cp; sopno i; int offset; int cs, mccs; /* avoid making error situations worse */ if (p->error != 0) return; /* Find out if we can handle OANYOF or not */ mccs = 0; for (cs = 0; cs < g->ncsets; cs++) if (g->sets[cs].multis != NULL) mccs = 1; /* find the longest OCHAR sequence in strip */ newlen = 0; offset = 0; g->moffset = 0; scan = g->strip + 1; do { s = *scan++; switch (OP(s)) { case OCHAR: /* sequence member */ if (newlen == 0) /* new sequence */ newstart = scan - 1; newlen++; break; case OPLUS_: /* things that don't break one */ case OLPAREN: case ORPAREN: break; case OQUEST_: /* things that must be skipped */ case OCH_: offset = altoffset(scan, offset, mccs); scan--; do { scan += OPND(s); s = *scan; /* assert() interferes w debug printouts */ if (OP(s) != O_QUEST && OP(s) != O_CH && OP(s) != OOR2) { g->iflags |= BAD; return; } } while (OP(s) != O_QUEST && OP(s) != O_CH); /* fallthrough */ case OBOW: /* things that break a sequence */ case OEOW: case OBOL: case OEOL: case O_QUEST: case O_CH: case OEND: if (newlen > g->mlen) { /* ends one */ start = newstart; g->mlen = newlen; if (offset > -1) { g->moffset += offset; offset = newlen; } else g->moffset = offset; } else { if (offset > -1) offset += newlen; } newlen = 0; break; case OANY: if (newlen > g->mlen) { /* ends one */ start = newstart; g->mlen = newlen; if (offset > -1) { g->moffset += offset; offset = newlen; } else g->moffset = offset; } else { if (offset > -1) offset += newlen; } if (offset > -1) offset++; newlen = 0; break; case OANYOF: /* may or may not invalidate offset */ /* First, everything as OANY */ if (newlen > g->mlen) { /* ends one */ start = newstart; g->mlen = newlen; if (offset > -1) { g->moffset += offset; offset = newlen; } else g->moffset = offset; } else { if (offset > -1) offset += newlen; } if (offset > -1) offset++; newlen = 0; /* And, now, if we found out we can't deal with * it, make offset = -1. */ if (mccs) offset = -1; break; default: /* Anything here makes it impossible or too hard * to calculate the offset -- so we give up; * save the last known good offset, in case the * must sequence doesn't occur later. */ if (newlen > g->mlen) { /* ends one */ start = newstart; g->mlen = newlen; if (offset > -1) g->moffset += offset; else g->moffset = offset; } offset = -1; newlen = 0; break; } } while (OP(s) != OEND); if (g->mlen == 0) { /* there isn't one */ g->moffset = -1; return; } /* turn it into a character string */ g->must = malloc((size_t)g->mlen + 1); if (g->must == NULL) { /* argh; just forget it */ g->mlen = 0; g->moffset = -1; return; } cp = g->must; scan = start; for (i = g->mlen; i > 0; i--) { while (OP(s = *scan++) != OCHAR) continue; assert(cp < g->must + g->mlen); *cp++ = (char)OPND(s); } assert(cp == g->must + g->mlen); *cp++ = '\0'; /* just on general principles */}/* - altoffset - choose biggest offset among multiple choices == static int altoffset(sop *scan, int offset, int mccs); * * Compute, recursively if necessary, the largest offset among multiple * re paths. */static intaltoffset(scan, offset, mccs)sop *scan;int offset;int mccs;{ int largest; int try; sop s; /* If we gave up already on offsets, return */ if (offset == -1) return -1; largest = 0; try = 0; s = *scan++; while (OP(s) != O_QUEST && OP(s) != O_CH) { switch (OP(s)) { case OOR1: if (try > largest) largest = try; try = 0; break; case OQUEST_: case OCH_: try = altoffset(scan, try, mccs); if (try == -1) return -1; scan--; do { scan += OPND(s); s = *scan; if (OP(s) != O_QUEST && OP(s) != O_CH && OP(s) != OOR2) return -1; } while (OP(s) != O_QUEST && OP(s) != O_CH); /* We must skip to the next position, or we'll * leave altoffset() too early. */ scan++; break; case OANYOF: if (mccs) return -1; case OCHAR: case OANY: try++; case OBOW: case OEOW: case OLPAREN: case ORPAREN: case OOR2: break; default: try = -1; break; } if (try == -1) return -1; s = *scan++; } if (try > largest) largest = try; return largest+offset;}/* - computejumps - compute char jumps for BM scan == static void computejumps(struct parse *p, struct re_guts *g); * * This algorithm assumes g->must exists and is has size greater than * zero. It's based on the algorithm found on Computer Algorithms by * Sara Baase. * * A char jump is the number of characters one needs to jump based on * the value of the character from the text that was mismatched. */static voidcomputejumps(p, g)struct parse *p;struct re_guts *g;{ int ch; int mindex; /* Avoid making errors worse */ if (p->error != 0) return; g->charjump = (int*) malloc((NC + 1) * sizeof(int)); if (g->charjump == NULL) /* Not a fatal error */ return; /* Adjust for signed chars, if necessary */ g->charjump = &g->charjump[-(CHAR_MIN)]; /* If the character does not exist in the pattern, the jump * is equal to the number of characters in the pattern. */ for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++) g->charjump[ch] = g->mlen; /* If the character does exist, compute the jump that would * take us to the last character in the pattern equal to it * (notice that we match right to left, so that last character * is the first one that would be matched). */ for (mindex = 0; mindex < g->mlen; mindex++) g->charjump[g->must[mindex]] = g->mlen - mindex - 1;}/* - computematchjumps - compute match jumps for BM scan == static void computematchjumps(struct parse *p, struct re_guts *g); * * This algorithm assumes g->must exists and is has size greater than * zero. It's based on the algorithm found on Computer Algorithms by * Sara Baase. * * A match jump is the number of characters one needs to advance based * on the already-matched suffix. * Notice that all values here are minus (g->mlen-1), because of the way * the search algorithm works. */static voidcomputematchjumps(p, g)struct parse *p;struct re_guts *g;{ int mindex; /* General "must" iterator */ int suffix; /* Keeps track of matching suffix */ int ssuffix; /* Keeps track of suffixes' suffix */ int* pmatches; /* pmatches[k] points to the next i * such that i+1...mlen is a substring * of k+1...k+mlen-i-1 */ /* Avoid making errors worse */ if (p->error != 0) return; pmatches = (int*) malloc(g->mlen * sizeof(unsigned int)); if (pmatches == NULL) { g->matchjump = NULL; return; } g->matchjump = (int*) malloc(g->mlen * sizeof(unsigned int)); if (g->matchjump == NULL) /* Not a fatal error */ return; /* Set maximum possible jump for each character in the pattern */ for (mindex = 0; mindex < g->mlen; mindex++) g->matchjump[mindex] = 2*g->mlen - mindex - 1; /* Compute pmatches[] */ for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0; mindex--, suffix--) { pmatches[mindex] = suffix; /* If a mismatch is found, interrupting the substring, * compute the matchjump for that position. If no * mismatch is found, then a text substring mismatched * against the suffix will also mismatch against the * substring. */ while (suffix < g->mlen && g->must[mindex] != g->must[suffix]) { g->matchjump[suffix] = MIN(g->matchjump[suffix], g->mlen - mindex - 1); suffix = pmatches[suffix]; } } /* Compute the matchjump up to the last substring found to jump * to the beginning of the largest must pattern prefix matching * it's own suffix. */ for (mindex = 0; mindex <= suffix; mindex++) g->matchjump[mindex] = MIN(g->matchjump[mindex], g->mlen + suffix - mindex); ssuffix = pmatches[suffix]; while (suffix < g->mlen) { while (suffix <= ssuffix && suffix < g->mlen) { g->matchjump[suffix] = MIN(g->matchjump[suffix], g->mlen + ssuffix - suffix); suffix++; } if (suffix < g->mlen) ssuffix = pmatches[ssuffix]; } free(pmatches);}/* - pluscount - count + nesting == static sopno pluscount(struct parse *p, struct re_guts *g); */static sopno /* nesting depth */pluscount(p, g)struct parse *p;struct re_guts *g;{ sop *scan; sop s; sopno plusnest = 0; sopno maxnest = 0; if (p->error != 0) return(0); /* there may not be an OEND */ scan = g->strip + 1; do { s = *scan++; switch (OP(s)) { case OPLUS_: plusnest++; break; case O_PLUS: if (plusnest > maxnest) maxnest = plusnest; plusnest--; break; } } while (OP(s) != OEND); if (plusnest != 0) g->iflags |= BAD; return(maxnest);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -