📄 dfa.c
字号:
/* dfa.c - determinisitic extended regexp routines for GNU Copyright (C) 1988 Free Software Foundation, Inc. Written June, 1988 by Mike Haertel Modified July, 1988 by Arthur David Olson to assist BMG speedups NO WARRANTY BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELYNO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPTWHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY ANDFITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITYAND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVEDEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR ORCORRECTION. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTYWHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BELIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OROTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THEUSE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA ORDATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES ORA FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THISPROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCHDAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. GENERAL PUBLIC LICENSE TO COPY 1. You may copy and distribute verbatim copies of this source fileas you receive it, in any medium, provided that you conspicuously andappropriately publish on each copy a valid copyright notice "Copyright (C) 1988 Free Software Foundation, Inc."; and include following thecopyright notice a verbatim copy of the above disclaimer of warrantyand of this License. You may charge a distribution fee for thephysical act of transferring a copy. 2. You may modify your copy or copies of this source file orany portion of it, and copy and distribute such modifications underthe terms of Paragraph 1 above, provided that you also do the following: a) cause the modified files to carry prominent notices stating that you changed the files and the date of any change; and b) cause the whole of any work that you distribute or publish, that in whole or in part contains or is a derivative of this program or any part thereof, to be licensed at no charge to all third parties on terms identical to those contained in this License Agreement (except that you may choose to grant more extensive warranty protection to some or all third parties, at your option). c) You may charge a distribution fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.Mere aggregation of another unrelated program with this program (or itsderivative) on a volume of a storage or distribution medium does not bringthe other program under the scope of these terms. 3. You may copy and distribute this program or any portion of it incompiled, executable or object code form under the terms of Paragraphs1 and 2 above provided that you do the following: a) accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Paragraphs 1 and 2 above; or, b) accompany it with a written offer, valid for at least three years, to give any third party free (except for a nominal shipping charge) a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Paragraphs 1 and 2 above; or, c) accompany it with the information you received as to where the corresponding source code may be obtained. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form alone.)For an executable file, complete source code means all the source code forall modules it contains; but, as a special exception, it need not includesource code for modules which are standard libraries that accompany theoperating system on which the executable file runs. 4. You may not copy, sublicense, distribute or transfer this programexcept as expressly provided under this License Agreement. Any attemptotherwise to copy, sublicense, distribute or transfer this program is void andyour rights to use the program under this License agreement shall beautomatically terminated. However, parties who have received computersoftware programs from you with this License Agreement will not havetheir licenses terminated so long as such parties remain in full compliance. 5. If you wish to incorporate parts of this program into other freeprograms whose distribution conditions are different, write to the FreeSoftware Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yetworked out a simple rule that can be stated here, but we will often permitthis. We will be guided by the two goals of preserving the free status ofall derivatives our free software and of promoting the sharing and reuse ofsoftware.In other words, you are welcome to use, share and improve this program.You are forbidden to forbid anyone else to use, share and improvewhat you give them. Help stamp out software-hoarding! */#include "awk.h"#include <assert.h>#ifdef setbit /* surprise - setbit and clrbit are macros on NeXT */#undef setbit#endif#ifdef clrbit#undef clrbit#endif#ifdef __STDC__typedef void *ptr_t;#elsetypedef char *ptr_t;#endiftypedef struct { char ** in; char * left; char * right; char * is;} must;static ptr_t xcalloc P((int n, size_t s));static ptr_t xmalloc P((size_t n));static ptr_t xrealloc P((ptr_t p, size_t n));static int tstbit P((int b, _charset c));static void setbit P((int b, _charset c));static void clrbit P((int b, _charset c));static void copyset P((const _charset src, _charset dst));static void zeroset P((_charset s));static void notset P((_charset s));static int equal P((const _charset s1, const _charset s2));static int charset_index P((const _charset s));static _token lex P((void));static void addtok P((_token t));static void atom P((void));static void closure P((void));static void branch P((void));static void regexp P((void));static void copy P((const _position_set *src, _position_set *dst));static void insert P((_position p, _position_set *s));static void merge P((_position_set *s1, _position_set *s2, _position_set *m));static void delete P((_position p, _position_set *s));static int state_index P((struct regexp *r, _position_set *s, int newline, int letter));static void epsclosure P((_position_set *s, struct regexp *r));static void build_state P((int s, struct regexp *r));static void build_state_zero P((struct regexp *r));static char *icatalloc P((char *old, const char *new));static char *icpyalloc P((const char *string));static char *istrstr P((char *lookin, char *lookfor));static void ifree P((char *cp));static void freelist P((char **cpp));static char **enlist P((char **cpp, char *new, size_t len));static char **comsubs P((char *left, char *right));static char **addlists P((char **old, char **new));static char **inboth P((char **left, char **right));static void resetmust P((must *mp));static void regmust P((struct regexp *r));#undef Pstatic ptr_txcalloc(n, s) int n; size_t s;{ ptr_t r = calloc(n, s); if (NULL == r) reg_error("Memory exhausted"); /* reg_error does not return */ return r;}static ptr_txmalloc(n) size_t n;{ ptr_t r = malloc(n); assert(n != 0); if (NULL == r) reg_error("Memory exhausted"); return r;}static ptr_txrealloc(p, n) ptr_t p; size_t n;{ ptr_t r = realloc(p, n); assert(n != 0); if (NULL == r) reg_error("Memory exhausted"); return r;}#define CALLOC(p, t, n) ((p) = (t *) xcalloc((n), sizeof (t)))#undef MALLOC#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))/* Reallocate an array of type t if nalloc is too small for index. */#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \ if ((index) >= (nalloc)) \ { \ while ((index) >= (nalloc)) \ (nalloc) *= 2; \ REALLOC(p, t, nalloc); \ }/* Stuff pertaining to charsets. */static inttstbit(b, c) int b; _charset c;{ return c[b / INTBITS] & 1 << b % INTBITS;}static voidsetbit(b, c) int b; _charset c;{ c[b / INTBITS] |= 1 << b % INTBITS;}static voidclrbit(b, c) int b; _charset c;{ c[b / INTBITS] &= ~(1 << b % INTBITS);}static voidcopyset(src, dst) const _charset src; _charset dst;{ int i; for (i = 0; i < _CHARSET_INTS; ++i) dst[i] = src[i];}static voidzeroset(s) _charset s;{ int i; for (i = 0; i < _CHARSET_INTS; ++i) s[i] = 0;}static voidnotset(s) _charset s;{ int i; for (i = 0; i < _CHARSET_INTS; ++i) s[i] = ~s[i];}static intequal(s1, s2) const _charset s1; const _charset s2;{ int i; for (i = 0; i < _CHARSET_INTS; ++i) if (s1[i] != s2[i]) return 0; return 1;}/* A pointer to the current regexp is kept here during parsing. */static struct regexp *reg;/* Find the index of charset s in reg->charsets, or allocate a new charset. */static intcharset_index(s) const _charset s;{ int i; for (i = 0; i < reg->cindex; ++i) if (equal(s, reg->charsets[i])) return i; REALLOC_IF_NECESSARY(reg->charsets, _charset, reg->calloc, reg->cindex); ++reg->cindex; copyset(s, reg->charsets[i]); return i;}/* Syntax bits controlling the behavior of the lexical analyzer. */static syntax_bits, syntax_bits_set;/* Flag for case-folding letters into sets. */static case_fold;/* Entry point to set syntax options. */voidregsyntax(bits, fold) long bits; int fold;{ syntax_bits_set = 1; syntax_bits = bits; case_fold = fold;}/* Lexical analyzer. */static const char *lexstart; /* Pointer to beginning of input string. */static const char *lexptr; /* Pointer to next input character. */static lexleft; /* Number of characters remaining. */static caret_allowed; /* True if backward context allows ^ (meaningful only if RE_CONTEXT_INDEP_OPS is turned off). */static closure_allowed; /* True if backward context allows closures (meaningful only if RE_CONTEXT_INDEP_OPS is turned off). *//* Note that characters become unsigned here. */#define FETCH(c, eoferr) \ { \ if (! lexleft) \ if (eoferr != NULL) \ reg_error(eoferr); \ else \ return _END; \ (c) = (unsigned char) *lexptr++; \ --lexleft; \ }static _tokenlex(){ _token c, c2; int invert; _charset cset; FETCH(c, (char *) 0); switch (c) { case '^': if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && (!caret_allowed || ((syntax_bits & RE_TIGHT_VBAR) && lexptr - 1 != lexstart))) goto normal_char; caret_allowed = 0; return syntax_bits & RE_TIGHT_VBAR ? _ALLBEGLINE : _BEGLINE; case '$': if (syntax_bits & RE_CONTEXT_INDEP_OPS || !lexleft || (! (syntax_bits & RE_TIGHT_VBAR) && ((syntax_bits & RE_NO_BK_PARENS ? lexleft > 0 && *lexptr == ')' : lexleft > 1 && *lexptr == '\\' && lexptr[1] == ')') || (syntax_bits & RE_NO_BK_VBAR ? lexleft > 0 && *lexptr == '|' : lexleft > 1 && *lexptr == '\\' && lexptr[1] == '|')))) return syntax_bits & RE_TIGHT_VBAR ? _ALLENDLINE : _ENDLINE; goto normal_char; case '\\': FETCH(c, "Unfinished \\ quote"); switch (c) { case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': caret_allowed = 0; closure_allowed = 1; return _BACKREF; case '<': caret_allowed = 0; return _BEGWORD; case '>': caret_allowed = 0; return _ENDWORD; case 'b': caret_allowed = 0; return _LIMWORD; case 'B': caret_allowed = 0; return _NOTLIMWORD; case 'w': case 'W': zeroset(cset); for (c2 = 0; c2 < _NOTCHAR; ++c2) if (ISALNUM(c2)) setbit(c2, cset); if (c == 'W') notset(cset); caret_allowed = 0; closure_allowed = 1; return _SET + charset_index(cset); case '?': if (syntax_bits & RE_BK_PLUS_QM) goto qmark; goto normal_char; case '+': if (syntax_bits & RE_BK_PLUS_QM) goto plus; goto normal_char; case '|': if (! (syntax_bits & RE_NO_BK_VBAR)) goto or; goto normal_char; case '(': if (! (syntax_bits & RE_NO_BK_PARENS)) goto lparen; goto normal_char; case ')': if (! (syntax_bits & RE_NO_BK_PARENS)) goto rparen; goto normal_char; default: goto normal_char; } case '?': if (syntax_bits & RE_BK_PLUS_QM) goto normal_char; qmark: if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed) goto normal_char; return _QMARK; case '*': if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed) goto normal_char; return _STAR; case '+': if (syntax_bits & RE_BK_PLUS_QM) goto normal_char; plus: if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed) goto normal_char; return _PLUS; case '|': if (! (syntax_bits & RE_NO_BK_VBAR)) goto normal_char; or: caret_allowed = 1; closure_allowed = 0; return _OR; case '\n': if (! (syntax_bits & RE_NEWLINE_OR)) goto normal_char; goto or; case '(': if (! (syntax_bits & RE_NO_BK_PARENS)) goto normal_char; lparen: caret_allowed = 1; closure_allowed = 0; return _LPAREN; case ')': if (! (syntax_bits & RE_NO_BK_PARENS)) goto normal_char; rparen: caret_allowed = 0; closure_allowed = 1; return _RPAREN; case '.': zeroset(cset); notset(cset); clrbit('\n', cset); caret_allowed = 0; closure_allowed = 1; return _SET + charset_index(cset); case '[': zeroset(cset); FETCH(c, "Unbalanced ["); if (c == '^') { FETCH(c, "Unbalanced ["); invert = 1; } else invert = 0; do { FETCH(c2, "Unbalanced ["); if ((syntax_bits & RE_AWK_CLASS_HACK) && c == '\\') { c = c2; FETCH(c2, "Unbalanced ["); } if (c2 == '-') { FETCH(c2, "Unbalanced ["); if (c2 == ']' && (syntax_bits & RE_AWK_CLASS_HACK)) { setbit(c, cset); setbit('-', cset); break; } while (c <= c2) setbit(c++, cset); FETCH(c, "Unbalanced ["); } else { setbit(c, cset); c = c2; } } while (c != ']'); if (invert) notset(cset); caret_allowed = 0; closure_allowed = 1; return _SET + charset_index(cset); default: normal_char: caret_allowed = 0; closure_allowed = 1; if (case_fold && ISALPHA(c)) { zeroset(cset); if (isupper(c)) c = tolower(c); setbit(c, cset); setbit(toupper(c), cset); return _SET + charset_index(cset); } return c; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -