📄 gnuregex.c
字号:
/* It is useful to test things that ``must'' be true when debugging. */#include <assert.h>static int debug = 0;#define DEBUG_STATEMENT(e) e#define DEBUG_PRINT1(x) if (debug) printf (x)#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ if (debug) print_partial_compiled_pattern (s, e)#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ if (debug) print_double_string (w, s1, sz1, s2, sz2)extern void printchar();/* Print the fastmap in human-readable form. */voidprint_fastmap(fastmap) char *fastmap;{ unsigned was_a_range = 0; unsigned i = 0; while (i < (1 << BYTEWIDTH)) { if (fastmap[i++]) { was_a_range = 0; printchar(i - 1); while (i < (1 << BYTEWIDTH) && fastmap[i]) { was_a_range = 1; i++; } if (was_a_range) { printf("-"); printchar(i - 1); } } } putchar('\n');}/* Print a compiled pattern string in human-readable form, starting at * the START pointer into it and ending just before the pointer END. */voidprint_partial_compiled_pattern(start, end) unsigned char *start; unsigned char *end;{ int mcnt, mcnt2; unsigned char *p = start; unsigned char *pend = end; if (start == NULL) { printf("(null)\n"); return; } /* Loop over pattern commands. */ while (p < pend) { switch ((re_opcode_t) * p++) { case no_op: printf("/no_op"); break; case exactn: mcnt = *p++; printf("/exactn/%d", mcnt); do { putchar('/'); printchar(*p++); } while (--mcnt); break; case start_memory: mcnt = *p++; printf("/start_memory/%d/%d", mcnt, *p++); break; case stop_memory: mcnt = *p++; printf("/stop_memory/%d/%d", mcnt, *p++); break; case duplicate: printf("/duplicate/%d", *p++); break; case anychar: printf("/anychar"); break; case charset: case charset_not: { register int c; printf("/charset%s", (re_opcode_t) * (p - 1) == charset_not ? "_not" : ""); assert(p + *p < pend); for (c = 0; c < *p; c++) { unsigned bit; unsigned char map_byte = p[1 + c]; putchar('/'); for (bit = 0; bit < BYTEWIDTH; bit++) if (map_byte & (1 << bit)) printchar(c * BYTEWIDTH + bit); } p += 1 + *p; break; } case begline: printf("/begline"); break; case endline: printf("/endline"); break; case on_failure_jump: extract_number_and_incr(&mcnt, &p); printf("/on_failure_jump/0/%d", mcnt); break; case on_failure_keep_string_jump: extract_number_and_incr(&mcnt, &p); printf("/on_failure_keep_string_jump/0/%d", mcnt); break; case dummy_failure_jump: extract_number_and_incr(&mcnt, &p); printf("/dummy_failure_jump/0/%d", mcnt); break; case push_dummy_failure: printf("/push_dummy_failure"); break; case maybe_pop_jump: extract_number_and_incr(&mcnt, &p); printf("/maybe_pop_jump/0/%d", mcnt); break; case pop_failure_jump: extract_number_and_incr(&mcnt, &p); printf("/pop_failure_jump/0/%d", mcnt); break; case jump_past_alt: extract_number_and_incr(&mcnt, &p); printf("/jump_past_alt/0/%d", mcnt); break; case jump: extract_number_and_incr(&mcnt, &p); printf("/jump/0/%d", mcnt); break; case succeed_n: extract_number_and_incr(&mcnt, &p); extract_number_and_incr(&mcnt2, &p); printf("/succeed_n/0/%d/0/%d", mcnt, mcnt2); break; case jump_n: extract_number_and_incr(&mcnt, &p); extract_number_and_incr(&mcnt2, &p); printf("/jump_n/0/%d/0/%d", mcnt, mcnt2); break; case set_number_at: extract_number_and_incr(&mcnt, &p); extract_number_and_incr(&mcnt2, &p); printf("/set_number_at/0/%d/0/%d", mcnt, mcnt2); break; case wordbound: printf("/wordbound"); break; case notwordbound: printf("/notwordbound"); break; case wordbeg: printf("/wordbeg"); break; case wordend: printf("/wordend");#ifdef emacs case before_dot: printf("/before_dot"); break; case at_dot: printf("/at_dot"); break; case after_dot: printf("/after_dot"); break; case syntaxspec: printf("/syntaxspec"); mcnt = *p++; printf("/%d", mcnt); break; case notsyntaxspec: printf("/notsyntaxspec"); mcnt = *p++; printf("/%d", mcnt); break;#endif /* emacs */ case wordchar: printf("/wordchar"); break; case notwordchar: printf("/notwordchar"); break; case begbuf: printf("/begbuf"); break; case endbuf: printf("/endbuf"); break; default: printf("?%d", *(p - 1)); } } printf("/\n");}voidprint_compiled_pattern(bufp) struct re_pattern_buffer *bufp;{ unsigned char *buffer = bufp->buffer; print_partial_compiled_pattern(buffer, buffer + bufp->used); printf("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); if (bufp->fastmap_accurate && bufp->fastmap) { printf("fastmap: "); print_fastmap(bufp->fastmap); } printf("re_nsub: %d\t", bufp->re_nsub); printf("regs_alloc: %d\t", bufp->regs_allocated); printf("can_be_null: %d\t", bufp->can_be_null); printf("newline_anchor: %d\n", bufp->newline_anchor); printf("no_sub: %d\t", bufp->no_sub); printf("not_bol: %d\t", bufp->not_bol); printf("not_eol: %d\t", bufp->not_eol); printf("syntax: %d\n", bufp->syntax); /* Perhaps we should print the translate table? */}voidprint_double_string(where, string1, size1, string2, size2) const char *where; const char *string1; const char *string2; int size1; int size2;{ unsigned this_char; if (where == NULL) printf("(null)"); else { if (FIRST_STRING_P(where)) { for (this_char = where - string1; this_char < size1; this_char++) printchar(string1[this_char]); where = string2; } for (this_char = where - string2; this_char < size2; this_char++) printchar(string2[this_char]); }}#else /* not DEBUG */#undef assert#define assert(e)#define DEBUG_STATEMENT(e)#define DEBUG_PRINT1(x)#define DEBUG_PRINT2(x1, x2)#define DEBUG_PRINT3(x1, x2, x3)#define DEBUG_PRINT4(x1, x2, x3, x4)#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)#endif /* not DEBUG *//* Set by `re_set_syntax' to the current regexp syntax to recognize. Can * also be assigned to arbitrarily: each pattern buffer stores its own * syntax, so it can be changed between regex compilations. */reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;/* Specify the precise syntax of regexps for compilation. This provides * for compatibility for various utilities which historically have * different, incompatible syntaxes. * * The argument SYNTAX is a bit mask comprised of the various bits * defined in regex.h. We return the old syntax. */reg_syntax_tre_set_syntax(syntax) reg_syntax_t syntax;{ reg_syntax_t ret = re_syntax_options; re_syntax_options = syntax; return ret;}/* This table gives an error message for each of the error codes listed * in regex.h. Obviously the order here has to be same as there. */static const char *re_error_msg[] ={NULL, /* REG_NOERROR */ "No match", /* REG_NOMATCH */ "Invalid regular expression", /* REG_BADPAT */ "Invalid collation character", /* REG_ECOLLATE */ "Invalid character class name", /* REG_ECTYPE */ "Trailing backslash", /* REG_EESCAPE */ "Invalid back reference", /* REG_ESUBREG */ "Unmatched [ or [^", /* REG_EBRACK */ "Unmatched ( or \\(", /* REG_EPAREN */ "Unmatched \\{", /* REG_EBRACE */ "Invalid content of \\{\\}", /* REG_BADBR */ "Invalid range end", /* REG_ERANGE */ "Memory exhausted", /* REG_ESPACE */ "Invalid preceding regular expression", /* REG_BADRPT */ "Premature end of regular expression", /* REG_EEND */ "Regular expression too big", /* REG_ESIZE */ "Unmatched ) or \\)", /* REG_ERPAREN */};/* Subroutine declarations and macros for regex_compile. */static void store_op1(), store_op2();static void insert_op1(), insert_op2();static boolean at_begline_loc_p(), at_endline_loc_p();static boolean group_in_compile_stack();static reg_errcode_t compile_range();/* Fetch the next character in the uncompiled pattern---translating it * if necessary. Also cast from a signed character in the constant * string passed to us by the user to an unsigned char that we can use * as an array index (in, e.g., `translate'). */#define PATFETCH(c) \ do {if (p == pend) return REG_EEND; \ c = (unsigned char) *p++; \ if (translate) c = translate[c]; \ } while (0)/* Fetch the next character in the uncompiled pattern, with no * translation. */#define PATFETCH_RAW(c) \ do {if (p == pend) return REG_EEND; \ c = (unsigned char) *p++; \ } while (0)/* Go backwards one character in the pattern. */#define PATUNFETCH p--/* If `translate' is non-null, return translate[D], else just D. We * cast the subscript to translate because some data is declared as * `char *', to avoid warnings when a string constant is passed. But * when we use a character as a subscript we must make it unsigned. */#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))/* Macros for outputting the compiled pattern into `buffer'. *//* If the buffer isn't allocated when it comes in, use this. */#define INIT_BUF_SIZE 32/* Make sure we have at least N more bytes of space in buffer. */#define GET_BUFFER_SPACE(n) \ while (b - bufp->buffer + (n) > bufp->allocated) \ EXTEND_BUFFER ()/* Make sure we have one more byte of buffer space and then add C to it. */#define BUF_PUSH(c) \ do { \ GET_BUFFER_SPACE (1); \ *b++ = (unsigned char) (c); \ } while (0)/* Ensure we have two more bytes of buffer space and then append C1 and C2. */#define BUF_PUSH_2(c1, c2) \ do { \ GET_BUFFER_SPACE (2); \ *b++ = (unsigned char) (c1); \ *b++ = (unsigned char) (c2); \ } while (0)/* As with BUF_PUSH_2, except for three bytes. */#define BUF_PUSH_3(c1, c2, c3) \ do { \ GET_BUFFER_SPACE (3); \ *b++ = (unsigned char) (c1); \ *b++ = (unsigned char) (c2); \ *b++ = (unsigned char) (c3); \ } while (0)/* Store a jump with opcode OP at LOC to location TO. We store a * relative address offset by the three bytes the jump itself occupies. */#define STORE_JUMP(op, loc, to) \ store_op1 (op, loc, (to) - (loc) - 3)/* Likewise, for a two-argument jump. */#define STORE_JUMP2(op, loc, to, arg) \ store_op2 (op, loc, (to) - (loc) - 3, arg)/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */#define INSERT_JUMP(op, loc, to) \ insert_op1 (op, loc, (to) - (loc) - 3, b)/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */#define INSERT_JUMP2(op, loc, to, arg) \ insert_op2 (op, loc, (to) - (loc) - 3, arg, b)/* This is not an arbitrary limit: the arguments which represent offsets * into the pattern are two bytes long. So if 2^16 bytes turns out to * be too small, many things would have to change. */#define MAX_BUF_SIZE (1L << 16)/* Extend the buffer by twice its current size via realloc and * reset the pointers that pointed into the old block to point to the
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -