📄 regex.c
字号:
if (*p == '+') for (sp = mp; lp < sp; lp++) store(*lp); store(END); store(END); sp = mp; while (--mp > lp) *mp = mp[-1]; store(CLO); mp = sp; break; case '\\': /* tags, backrefs .. */ switch(*++p) { case '(': if (tagc < MAXTAG) { tagstk[++tagi] = tagc; store(BOT); store(tagc++); } else badpat("Too many \\(\\) pairs"); break; case ')': if (*sp == BOT) badpat("Null pattern inside \\(\\)"); if (tagi > 0) { store(EOT); store(tagstk[tagi--]); } else badpat("Unmatched \\)"); break; case '<': store(BOW); break; case '>': if (*sp == BOW) badpat("Null pattern inside \\<\\>"); store(EOW); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': n = *p-'0'; if (tagi > 0 && tagstk[tagi] == n) badpat("Cyclical reference"); if (tagc > n) { store(REF); store(n); } else badpat("Undetermined reference"); break;#ifdef EXTEND case 'b': store(CHR); store('\b'); break; case 'n': store(CHR); store('\n'); break; case 'f': store(CHR); store('\f'); break; case 'r': store(CHR); store('\r'); break; case 't': store(CHR); store('\t'); break;#endif default: store(CHR); store(*p); } break; default : /* an ordinary char */ store(CHR); store(*p); break; } sp = lp; } if (tagi > 0) badpat("Unmatched \\("); store(END); sta = OKP; return 0;}static const char *bol;static const char *bopat[MAXTAG];static const char *eopat[MAXTAG];static const char *pmatch P((const char *, CHAR *));/* * re_exec: * execute nfa to find a match. * * special cases: (nfa[0]) * BOL * Match only once, starting from the * beginning. * CHR * First locate the character without * calling pmatch, and if found, call * pmatch for the remaining string. * END * re_comp failed, poor luser did not * check for it. Fail fast. * * If a match is found, bopat[0] and eopat[0] are set * to the beginning and the end of the matched fragment, * respectively. * */intre_exec(lp)register const char *lp;{ register char c; register const char *ep = 0; register CHAR *ap = nfa; bol = lp; bopat[0] = 0; bopat[1] = 0; bopat[2] = 0; bopat[3] = 0; bopat[4] = 0; bopat[5] = 0; bopat[6] = 0; bopat[7] = 0; bopat[8] = 0; bopat[9] = 0; switch(*ap) { case BOL: /* anchored: match from BOL only */ ep = pmatch(lp,ap); break; case CHR: /* ordinary char: locate it fast */ c = *(ap+1); while (*lp && *lp != c) lp++; if (!*lp) /* if EOS, fail, else fall thru. */ return 0; default: /* regular matching all the way. */ while (*lp) { if ((ep = pmatch(lp,ap))) break; lp++; } break; case END: /* munged automaton. fail always */ return 0; } if (!ep) return 0; if (internal_error) return -1; bopat[0] = lp; eopat[0] = ep; return 1;}/* * pmatch: * internal routine for the hard part * * This code is mostly snarfed from an early * grep written by David Conroy. The backref and * tag stuff, and various other mods are by oZ. * * special cases: (nfa[n], nfa[n+1]) * CLO ANY * We KNOW ".*" will match ANYTHING * upto the end of line. Thus, go to * the end of line straight, without * calling pmatch recursively. As in * the other closure cases, the remaining * pattern must be matched by moving * backwards on the string recursively, * to find a match for xy (x is ".*" and * y is the remaining pattern) where * the match satisfies the LONGEST match * for x followed by a match for y. * CLO CHR * We can again scan the string forward * for the single char without recursion, * and at the point of failure, we execute * the remaining nfa recursively, as * described above. * * At the end of a successful match, bopat[n] and eopat[n] * are set to the beginning and end of subpatterns matched * by tagged expressions (n = 1 to 9). * *//* * character classification table for word boundary * operators BOW and EOW. the reason for not using * ctype macros is that we can let the user add into * our own table. see re_modw. This table is not in * the bitset form, since we may wish to extend it * in the future for other character classifications. * * TRUE for 0-9 A-Z a-z _ */static char chrtyp[MAXCHR] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };#define inascii(x) (0177&(x))#define iswordc(x) chrtyp[inascii(x)]#define isinset(x,y) ((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND])/* * skip values for CLO XXX to skip past the closure * */#define ANYSKIP 2 /* [CLO] ANY END ... */#define CHRSKIP 3 /* [CLO] CHR chr END ... */#define CCLSKIP 18 /* [CLO] CCL 16bytes END ... */static const char *pmatch(lp, ap)register const char *lp;register CHAR *ap;{ register int op, c, n; register const char *e; /* extra pointer for CLO */ register const char *bp; /* beginning of subpat.. */ register const char *ep; /* ending of subpat.. */ const char *are; /* to save the line ptr. */ while ((op = *ap++) != END) switch(op) { case CHR: if (*lp++ != *ap++) return 0; break; case ANY: if (!*lp++) return 0; break; case CCL: c = *lp++; if (!isinset(ap,c)) return 0; ap += BITBLK; break; case BOL: if (lp != bol) return 0; break; case EOL: if (*lp) return 0; break; case BOT: bopat[(int)(*ap++)] = lp; break; case EOT: eopat[(int)(*ap++)] = lp; break; case BOW: if ((lp!=bol && iswordc(lp[-1])) || !iswordc(*lp)) return 0; break; case EOW: if (lp==bol || !iswordc(lp[-1]) || iswordc(*lp)) return 0; break; case REF: n = *ap++; bp = bopat[n]; ep = eopat[n]; while (bp < ep) if (*bp++ != *lp++) return 0; break; case CLO: are = lp; switch(*ap) { case ANY: while (*lp) lp++; n = ANYSKIP; break; case CHR: c = *(ap+1); while (*lp && c == *lp) lp++; n = CHRSKIP; break; case CCL: while ((c = *lp) && isinset(ap+1,c)) lp++; n = CCLSKIP; break; default: internal_error++; return 0; } ap += n; while (lp >= are) { if ((e = pmatch(lp, ap))) return e; --lp; } return 0; default: internal_error++; return 0; } return lp;}#endif /* Need regex libraries? Compile to nothing if not. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -