📄 x_regex.c
字号:
#endif /* not EXTRACT_MACROS */#endif /* DEBUG *//* If DEBUG is defined, Regex prints many voluminous messages about what it is doing (if the variable `debug' is nonzero). If linked with the main program in `iregex.c', you can enter patterns and strings interactively. And if linked with the main program in `main.c' and the other test files, you can run the already-written tests. */#ifdef DEBUG/* We use standard I/O for debugging. */#include <stdio.h>/* It is useful to test things that ``must'' be true when debugging. */#include <assert.h>static Int32 debug = 0;#define DEBUG_STATEMENT(e) e#define DEBUG_PRINT1(x) if (debug) printf (x)#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ if (debug) print_partial_compiled_pattern (s, e)#define DEBUG_PRINT_Real64_STRING(w, s1, sz1, s2, sz2) \ if (debug) print_double_string (w, s1, sz1, s2, sz2)extern void printchar ();/* Print the fastmap in human-readable form. */voidprint_fastmap (fastmap) UChar *fastmap;{ Uns32 was_a_range = 0; Uns32 i = 0; while (i < (1 << BYTEWIDTH)) { if (fastmap[i++]) { was_a_range = 0; printchar (i - 1); while (i < (1 << BYTEWIDTH) && fastmap[i]) { was_a_range = 1; i++; } if (was_a_range) { printf ("-"); printchar (i - 1); } } } putchar ('\n'); }/* Print a compiled pattern string in human-readable form, starting at the START pointer into it and ending just before the pointer END. */voidprint_partial_compiled_pattern (start, end) UChar *start; UChar *end;{ Int32 mcnt, mcnt2; UChar *p = start; UChar *pend = end; if (start == (UChar*) NULL) { printf ("(null)\n"); return; } /* Loop over pattern commands. */ while (p < pend) { switch ((re_opcode_t) *p++) { case no_op: printf ("/no_op"); break; case exactn: mcnt = *p++; printf ("/exactn/%d", mcnt); do { putchar ('/'); printchar (*p++); } while (--mcnt); break; case start_memory: mcnt = *p++; printf ("/start_memory/%d/%d", mcnt, *p++); break; case stop_memory: mcnt = *p++; printf ("/stop_memory/%d/%d", mcnt, *p++); break; case duplicate: printf ("/duplicate/%d", *p++); break; case anychar: printf ("/anychar"); break; case charset: case charset_not: { register Int32 c; printf ("/charset%s", (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); assert (p + *p < pend); for (c = 0; c < *p; c++) { Uns32 bit; UChar map_byte = p[1 + c]; putchar ('/'); for (bit = 0; bit < BYTEWIDTH; bit++) if (map_byte & (1 << bit)) printchar (c * BYTEWIDTH + bit); } p += 1 + *p; break; } case begline: printf ("/begline"); break; case endline: printf ("/endline"); break; case on_failure_jump: extract_number_and_incr (&mcnt, &p); printf ("/on_failure_jump/0/%d", mcnt); break; case on_failure_keep_string_jump: extract_number_and_incr (&mcnt, &p); printf ("/on_failure_keep_string_jump/0/%d", mcnt); break; case dummy_failure_jump: extract_number_and_incr (&mcnt, &p); printf ("/dummy_failure_jump/0/%d", mcnt); break; case push_dummy_failure: printf ("/push_dummy_failure"); break; case maybe_pop_jump: extract_number_and_incr (&mcnt, &p); printf ("/maybe_pop_jump/0/%d", mcnt); break; case pop_failure_jump: extract_number_and_incr (&mcnt, &p); printf ("/pop_failure_jump/0/%d", mcnt); break; case jump_past_alt: extract_number_and_incr (&mcnt, &p); printf ("/jump_past_alt/0/%d", mcnt); break; case jump: extract_number_and_incr (&mcnt, &p); printf ("/jump/0/%d", mcnt); break; case succeed_n: extract_number_and_incr (&mcnt, &p); extract_number_and_incr (&mcnt2, &p); printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2); break; case jump_n: extract_number_and_incr (&mcnt, &p); extract_number_and_incr (&mcnt2, &p); printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2); break; case set_number_at: extract_number_and_incr (&mcnt, &p); extract_number_and_incr (&mcnt2, &p); printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2); break; case wordbound: printf ("/wordbound"); break; case notwordbound: printf ("/notwordbound"); break; case wordbeg: printf ("/wordbeg"); break; case wordend: printf ("/wordend"); #ifdef emacs case before_dot: printf ("/before_dot"); break; case at_dot: printf ("/at_dot"); break; case after_dot: printf ("/after_dot"); break; case syntaxspec: printf ("/syntaxspec"); mcnt = *p++; printf ("/%d", mcnt); break; case notsyntaxspec: printf ("/notsyntaxspec"); mcnt = *p++; printf ("/%d", mcnt); break;#endif /* emacs */ case wordchar: printf ("/wordchar"); break; case notwordchar: printf ("/notwordchar"); break; case begbuf: printf ("/begbuf"); break; case endbuf: printf ("/endbuf"); break; default: printf ("?%d", *(p-1)); } } printf ("/\n");}voidprint_compiled_pattern (bufp) struct re_pattern_buffer *bufp;{ UChar *buffer = bufp->buffer; print_partial_compiled_pattern (buffer, buffer + bufp->used); printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); if (bufp->fastmap_accurate && bufp->fastmap) { printf ("fastmap: "); print_fastmap (bufp->fastmap); } printf ("re_nsub: %d\t", bufp->re_nsub); printf ("regs_alloc: %d\t", bufp->regs_allocated); printf ("can_be_null: %d\t", bufp->can_be_null); printf ("newline_anchor: %d\n", bufp->newline_anchor); printf ("no_sub: %d\t", bufp->no_sub); printf ("not_bol: %d\t", bufp->not_bol); printf ("not_eol: %d\t", bufp->not_eol); printf ("syntax: %d\n", bufp->syntax); /* Perhaps we should print the translate table? */}voidprint_double_string (where, string1, size1, string2, size2) UChar *where; UChar *string1; UChar *string2; Int32 size1; Int32 size2;{ Uns32 this_char; if (where == (UChar *)NULL) printf ("(null)"); else { if (FIRST_STRING_P (where)) { for (this_char = where - string1; this_char < size1; this_char++) printchar (string1[this_char]); where = string2; } for (this_char = where - string2; this_char < size2; this_char++) printchar (string2[this_char]); }}#else /* not DEBUG */#undef assert#define assert(e)#define DEBUG_STATEMENT(e)#define DEBUG_PRINT1(x)#define DEBUG_PRINT2(x1, x2)#define DEBUG_PRINT3(x1, x2, x3)#define DEBUG_PRINT4(x1, x2, x3, x4)#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)#define DEBUG_PRINT_Real64_STRING(w, s1, sz1, s2, sz2)#endif /* not DEBUG *//* Set by `re_set_syntax' to the current regexp syntax to recognize. Can also be assigned to arbitrarily: each pattern buffer stores its own syntax, so it can be changed between regex compilations. */reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;/* Specify the precise syntax of regexps for compilation. This provides for compatibility for various utilities which historically have different, incompatible syntaxes. The argument SYNTAX is a bit mask comprised of the various bits defined in regex.h. We return the old syntax. */reg_syntax_tre_set_syntax (syntax) reg_syntax_t syntax;{ reg_syntax_t ret = re_syntax_options; re_syntax_options = syntax; return ret;}/* This table gives an error message for each of the error codes listed in regex.h. Obviously the order here has to be same as there. */static struct _re_err_pair_ { UChar *msg; Uns32 num;} re_error_pair[] ={ { (UChar *)NULL, REG_NOERROR, }, { "No match", REG_NOMATCH, }, { "Invalid regular expression", REG_BADPAT, }, { "Invalid collation character", REG_ECOLLATE, }, { "Invalid character class name", REG_ECTYPE, }, { "Trailing backslash", REG_EESCAPE, }, { "Invalid back reference", REG_ESUBREG, }, { "Unmatched [ or [^", REG_EBRACK, }, { "Unmatched ( or \\(", REG_EPAREN, }, { "Unmatched \\{", REG_EBRACE, }, { "Invalid content of \\{\\}", REG_BADBR, }, { "Invalid range end", REG_ERANGE, }, { "Memory exhausted", REG_ESPACE, }, { "Invalid preceding regular expression", REG_BADRPT, }, { "Premature end of regular expression", REG_EEND, }, { "Regular expression too big", REG_ESIZE, }, { "Unmatched ) or \\)", REG_ERPAREN, },};static UChar *re_error_msg(Uns32 eno){ Int32 i; for(i = 0; i < sizeof(re_error_pair) / sizeof(re_error_pair[0]); i++){ if(re_error_pair[i].num == eno) return(re_error_pair[i].msg); } return("Internal error: no message for this error number");}/* Subroutine declarations and macros for regex_compile. */static void store_op1 (), store_op2 ();static void insert_op1 (), insert_op2 ();static boolean at_begline_loc_p (), at_endline_loc_p ();static boolean group_in_compile_stack ();static reg_errcode_t compile_range ();/* Fetch the next character in the uncompiled pattern---translating it if necessary. Also cast from a signed character in the constant string passed to us by the user to an unsigned char that we can use as an array index (in, e.g., `translate'). */#define PATFETCH(c) \ do {if (p == pend) return REG_EEND; \ c = (UChar) *p++; \ if (translate) c = translate[c]; \ } while (0)/* Fetch the next character in the uncompiled pattern, with no translation. */#define PATFETCH_RAW(c) \ do {if (p == pend) return REG_EEND; \ c = (UChar) *p++; \ } while (0)/* Go backwards one character in the pattern. */#define PATUNFETCH p--/* If `translate' is non-null, return translate[D], else just D. We cast the subscript to translate because some data is declared as `char *', to avoid warnings when a string constant is passed. But when we use a character as a subscript we must make it unsigned. */#define TRANSLATE(d) (translate ? translate[(UChar) (d)] : (d))/* Macros for outputting the compiled pattern into `buffer'. *//* If the buffer isn't allocated when it comes in, use this. */#define INIT_BUF_SIZE 32/* Make sure we have at least N more bytes of space in buffer. */#define GET_BUFFER_SPACE(n) \ while (b - bufp->buffer + (n) > bufp->allocated) \ EXTEND_BUFFER ()/* Make sure we have one more byte of buffer space and then add C to it. */#define BUF_PUSH(c) \ do { \ GET_BUFFER_SPACE (1); \ *b++ = (UChar) (c); \ } while (0)/* Ensure we have two more bytes of buffer space and then append C1 and C2. */#define BUF_PUSH_2(c1, c2) \ do { \ GET_BUFFER_SPACE (2); \ *b++ = (UChar) (c1); \ *b++ = (UChar) (c2); \ } while (0)/* As with BUF_PUSH_2, except for three bytes. */#define BUF_PUSH_3(c1, c2, c3) \ do { \ GET_BUFFER_SPACE (3); \ *b++ = (UChar) (c1); \ *b++ = (UChar) (c2); \ *b++ = (UChar) (c3); \ } while (0)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -