📄 regex.c
字号:
/* Extended regular expression matching and search library. Copyright (C) 1985, 1989 Free Software Foundation, Inc.This program is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2 of the License, or(at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* To test, compile with -Dtest. This Dtestable feature turns this into a self-contained program which reads a pattern, describes how it compiles, then reads a string and searches for it. */#ifdef emacs/* The `emacs' switch turns on certain special matching commands that make sense only in emacs. */#include "config.h"#include "lisp.h"#include "buffer.h"#include "syntax.h"#else /* not emacs */#define bcopy(s,d,n) memcpy((d),(s),(n))#define bcmp(s1,s2,n) memcmp((s1),(s2),(n))#define bzero(s,n) memset((s),0,(n))/* Make alloca work the best possible way. */#ifdef __GNUC__#define alloca __builtin_alloca#else#ifdef sparc#include <alloca.h>#endif#endif/* * Define the syntax stuff, so we can do the \<...\> things. */#ifndef Sword /* must be non-zero in some of the tests below... */#define Sword 1#endif#define SYNTAX(c) re_syntax_table[c]#ifdef SYNTAX_TABLEchar *re_syntax_table;#elsestatic char re_syntax_table[256];static voidinit_syntax_once (){ register int c; static int done = 0; if (done) return; bzero (re_syntax_table, sizeof re_syntax_table); for (c = 'a'; c <= 'z'; c++) re_syntax_table[c] = Sword; for (c = 'A'; c <= 'Z'; c++) re_syntax_table[c] = Sword; for (c = '0'; c <= '9'; c++) re_syntax_table[c] = Sword; done = 1;}#endif /* SYNTAX_TABLE */#endif /* not emacs */#include "regex.h"/* Number of failure points to allocate space for initially, when matching. If this number is exceeded, more space is allocated, so it is not a hard limit. */#ifndef NFAILURES#define NFAILURES 80#endif /* NFAILURES *//* width of a byte in bits */#define BYTEWIDTH 8#ifndef SIGN_EXTEND_CHAR#define SIGN_EXTEND_CHAR(x) (x)#endifstatic int obscure_syntax = 0;/* Specify the precise syntax of regexp for compilation. This provides for compatibility for various utilities which historically have different, incompatible syntaxes. The argument SYNTAX is a bit-mask containing the two bits RE_NO_BK_PARENS and RE_NO_BK_VBAR. */intre_set_syntax (syntax) int syntax;{ int ret; ret = obscure_syntax; obscure_syntax = syntax; return ret;}/* re_compile_pattern takes a regular-expression string and converts it into a buffer full of byte commands for matching. PATTERN is the address of the pattern string SIZE is the length of it. BUFP is a struct re_pattern_buffer * which points to the info on where to store the byte commands. This structure contains a char * which points to the actual space, which should have been obtained with malloc. re_compile_pattern may use realloc to grow the buffer space. The number of bytes of commands can be found out by looking in the struct re_pattern_buffer that bufp pointed to, after re_compile_pattern returns.*/#define PATPUSH(ch) (*b++ = (char) (ch))#define PATFETCH(c) \ {if (p == pend) goto end_of_pattern; \ c = * (unsigned char *) p++; \ if (translate) c = translate[c]; }#define PATFETCH_RAW(c) \ {if (p == pend) goto end_of_pattern; \ c = * (unsigned char *) p++; }#define PATUNFETCH p--#define EXTEND_BUFFER \ { char *old_buffer = bufp->buffer; \ if (bufp->allocated == (1<<16)) goto too_big; \ bufp->allocated *= 2; \ if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \ if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \ goto memory_exhausted; \ c = bufp->buffer - old_buffer; \ b += c; \ if (fixup_jump) \ fixup_jump += c; \ if (laststart) \ laststart += c; \ begalt += c; \ if (pending_exact) \ pending_exact += c; \ }static void store_jump (), insert_jump ();char *re_compile_pattern (pattern, size, bufp) char *pattern; int size; struct re_pattern_buffer *bufp;{ register char *b = bufp->buffer; register char *p = pattern; char *pend = pattern + size; register unsigned c, c1; char *p1; unsigned char *translate = (unsigned char *) bufp->translate; /* address of the count-byte of the most recently inserted "exactn" command. This makes it possible to tell whether a new exact-match character can be added to that command or requires a new "exactn" command. */ char *pending_exact = 0; /* address of the place where a forward-jump should go to the end of the containing expression. Each alternative of an "or", except the last, ends with a forward-jump of this sort. */ char *fixup_jump = 0; /* address of start of the most recently finished expression. This tells postfix * where to find the start of its operand. */ char *laststart = 0; /* In processing a repeat, 1 means zero matches is allowed */ char zero_times_ok; /* In processing a repeat, 1 means many matches is allowed */ char many_times_ok; /* address of beginning of regexp, or inside of last \( */ char *begalt = b; /* Stack of information saved by \( and restored by \). Four stack elements are pushed by each \(: First, the value of b. Second, the value of fixup_jump. Third, the value of regnum. Fourth, the value of begalt. */ int stackb[40]; int *stackp = stackb; int *stacke = stackb + 40; int *stackt; /* Counts \('s as they are encountered. Remembered for the matching \), where it becomes the "register number" to put in the stop_memory command */ int regnum = 1; bufp->fastmap_accurate = 0;#ifndef emacs#ifndef SYNTAX_TABLE /* * Initialize the syntax table. */ init_syntax_once();#endif#endif if (bufp->allocated == 0) { bufp->allocated = 28; if (bufp->buffer) /* EXTEND_BUFFER loses when bufp->allocated is 0 */ bufp->buffer = (char *) realloc (bufp->buffer, 28); else /* Caller did not allocate a buffer. Do it for him */ bufp->buffer = (char *) malloc (28); if (!bufp->buffer) goto memory_exhausted; begalt = b = bufp->buffer; } while (p != pend) { if (b - bufp->buffer > bufp->allocated - 10) /* Note that EXTEND_BUFFER clobbers c */ EXTEND_BUFFER; PATFETCH (c); switch (c) { case '$': if (obscure_syntax & RE_TIGHT_VBAR) { if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend) goto normal_char; /* Make operand of last vbar end before this `$'. */ if (fixup_jump) store_jump (fixup_jump, jump, b); fixup_jump = 0; PATPUSH (endline); break; } /* $ means succeed if at end of line, but only in special contexts. If randomly in the middle of a pattern, it is a normal character. */ if (p == pend || *p == '\n' || (obscure_syntax & RE_CONTEXT_INDEP_OPS) || (obscure_syntax & RE_NO_BK_PARENS ? *p == ')' : *p == '\\' && p[1] == ')') || (obscure_syntax & RE_NO_BK_VBAR ? *p == '|' : *p == '\\' && p[1] == '|')) { PATPUSH (endline); break; } goto normal_char; case '^': /* ^ means succeed if at beg of line, but only if no preceding pattern. */ if (laststart && p[-2] != '\n' && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) goto normal_char; if (obscure_syntax & RE_TIGHT_VBAR) { if (p != pattern + 1 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) goto normal_char; PATPUSH (begline); begalt = b; } else PATPUSH (begline); break; case '+': case '?': if (obscure_syntax & RE_BK_PLUS_QM) goto normal_char; handle_plus: case '*': /* If there is no previous pattern, char not special. */ if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) goto normal_char; /* If there is a sequence of repetition chars, collapse it down to equivalent to just one. */ zero_times_ok = 0; many_times_ok = 0; while (1) { zero_times_ok |= c != '+'; many_times_ok |= c != '?'; if (p == pend) break; PATFETCH (c); if (c == '*') ; else if (!(obscure_syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')) ; else if ((obscure_syntax & RE_BK_PLUS_QM) && c == '\\') { int c1; PATFETCH (c1); if (!(c1 == '+' || c1 == '?')) { PATUNFETCH; PATUNFETCH; break; } c = c1; } else { PATUNFETCH; break; } } /* Star, etc. applied to an empty pattern is equivalent to an empty pattern. */ if (!laststart) break; /* Now we know whether 0 matches is allowed, and whether 2 or more matches is allowed. */ if (many_times_ok) { /* If more than one repetition is allowed, put in a backward jump at the end. */ store_jump (b, maybe_finalize_jump, laststart - 3); b += 3; } insert_jump (on_failure_jump, laststart, b + 3, b); pending_exact = 0; b += 3; if (!zero_times_ok) { /* At least one repetition required: insert before the loop a skip over the initial on-failure-jump instruction */ insert_jump (dummy_failure_jump, laststart, laststart + 6, b); b += 3; } break; case '.': laststart = b; PATPUSH (anychar); break; case '[': while (b - bufp->buffer > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH) /* Note that EXTEND_BUFFER clobbers c */ EXTEND_BUFFER; laststart = b; if (*p == '^') PATPUSH (charset_not), p++; else PATPUSH (charset); p1 = p; PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH); /* Clear the whole map */ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); /* Read in characters and ranges, setting map bits */ while (1) { PATFETCH (c); if (c == ']' && p != p1 + 1) break; if (*p == '-' && p[1] != ']') { PATFETCH (c1); PATFETCH (c1); while (c <= c1) b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++; } else { b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH); } } /* Discard any bitmap bytes that are all 0 at the end of the map. Decrement the map-length byte too. */ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) b[-1]--; b += b[-1]; break; case '(': if (! (obscure_syntax & RE_NO_BK_PARENS)) goto normal_char; else goto handle_open; case ')': if (! (obscure_syntax & RE_NO_BK_PARENS)) goto normal_char; else goto handle_close; case '\n': if (! (obscure_syntax & RE_NEWLINE_OR)) goto normal_char; else goto handle_bar; case '|': if (! (obscure_syntax & RE_NO_BK_VBAR)) goto normal_char; else goto handle_bar; case '\\': if (p == pend) goto invalid_pattern; PATFETCH_RAW (c); switch (c) { case '(': if (obscure_syntax & RE_NO_BK_PARENS) goto normal_backsl; handle_open: if (stackp == stacke) goto nesting_too_deep; if (regnum < RE_NREGS) { PATPUSH (start_memory); PATPUSH (regnum); } *stackp++ = b - bufp->buffer; *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0; *stackp++ = regnum++; *stackp++ = begalt - bufp->buffer; fixup_jump = 0; laststart = 0; begalt = b; break; case ')': if (obscure_syntax & RE_NO_BK_PARENS) goto normal_backsl; handle_close: if (stackp == stackb) goto unmatched_close; begalt = *--stackp + bufp->buffer; if (fixup_jump) store_jump (fixup_jump, jump, b); if (stackp[-1] < RE_NREGS) { PATPUSH (stop_memory); PATPUSH (stackp[-1]); } stackp -= 2; fixup_jump = 0; if (*stackp) fixup_jump = *stackp + bufp->buffer - 1; laststart = *--stackp + bufp->buffer; break; case '|': if (obscure_syntax & RE_NO_BK_VBAR) goto normal_backsl; handle_bar: insert_jump (on_failure_jump, begalt, b + 6, b); pending_exact = 0; b += 3; if (fixup_jump) store_jump (fixup_jump, jump, b); fixup_jump = b; b += 3; laststart = 0; begalt = b; break;#ifdef emacs case '=': PATPUSH (at_dot); break; case 's': laststart = b; PATPUSH (syntaxspec); PATFETCH (c); PATPUSH (syntax_spec_code[c]); break; case 'S': laststart = b; PATPUSH (notsyntaxspec); PATFETCH (c); PATPUSH (syntax_spec_code[c]); break;#endif /* emacs */ case 'w': laststart = b; PATPUSH (wordchar); break; case 'W': laststart = b; PATPUSH (notwordchar); break; case '<': PATPUSH (wordbeg); break; case '>': PATPUSH (wordend); break; case 'b': PATPUSH (wordbound); break; case 'B': PATPUSH (notwordbound); break; case '`': PATPUSH (begbuf); break; case '\'': PATPUSH (endbuf); break; case '1': case '2': case '3': case '4': case '5':
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -