📄 regex.c
字号:
length = *workp++; /* the length of equivalence_class */ for (i=0 ; i<length ;) { printf("[="); while(*p != 0) PUT_CHAR((i++,*p++)); i++,p++; printf("=]"); } length = *workp++; /* the length of char_range */ for (i=0 ; i<length ; i++) { wchar_t range_start = *p++; wchar_t range_end = *p++; if (MB_CUR_MAX == 1) printf("%c-%c", (char) range_start, (char) range_end); else printf("%C-%C", (wint_t) range_start, (wint_t) range_end); } length = *workp++; /* the length of char */ for (i=0 ; i<length ; i++) if (MB_CUR_MAX == 1) putchar (*p++); else printf("%C", (wint_t) *p++); putchar (']');#else register int c, last = -100; register int in_range = 0; printf ("/charset [%s", (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); assert (p + *p < pend); for (c = 0; c < 256; c++) if (c / 8 < *p && (p[1 + (c/8)] & (1 << (c % 8)))) { /* Are we starting a range? */ if (last + 1 == c && ! in_range) { putchar ('-'); in_range = 1; } /* Have we broken a range? */ else if (last + 1 != c && in_range) { putchar (last); in_range = 0; } if (! in_range) putchar (c); last = c; } if (in_range) putchar (last); putchar (']'); p += 1 + *p;#endif /* MBS_SUPPORT */ } break; case begline: printf ("/begline"); break; case endline: printf ("/endline"); break; case on_failure_jump: extract_number_and_incr (&mcnt, &p);#ifdef _LIBC printf ("/on_failure_jump to %td", p + mcnt - start);#else printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));#endif break; case on_failure_keep_string_jump: extract_number_and_incr (&mcnt, &p);#ifdef _LIBC printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);#else printf ("/on_failure_keep_string_jump to %ld", (long int) (p + mcnt - start));#endif break; case dummy_failure_jump: extract_number_and_incr (&mcnt, &p);#ifdef _LIBC printf ("/dummy_failure_jump to %td", p + mcnt - start);#else printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));#endif break; case push_dummy_failure: printf ("/push_dummy_failure"); break; case maybe_pop_jump: extract_number_and_incr (&mcnt, &p);#ifdef _LIBC printf ("/maybe_pop_jump to %td", p + mcnt - start);#else printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));#endif break; case pop_failure_jump: extract_number_and_incr (&mcnt, &p);#ifdef _LIBC printf ("/pop_failure_jump to %td", p + mcnt - start);#else printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));#endif break; case jump_past_alt: extract_number_and_incr (&mcnt, &p);#ifdef _LIBC printf ("/jump_past_alt to %td", p + mcnt - start);#else printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));#endif break; case jump: extract_number_and_incr (&mcnt, &p);#ifdef _LIBC printf ("/jump to %td", p + mcnt - start);#else printf ("/jump to %ld", (long int) (p + mcnt - start));#endif break; case succeed_n: extract_number_and_incr (&mcnt, &p); p1 = p + mcnt; extract_number_and_incr (&mcnt2, &p);#ifdef _LIBC printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);#else printf ("/succeed_n to %ld, %d times", (long int) (p1 - start), mcnt2);#endif break; case jump_n: extract_number_and_incr (&mcnt, &p); p1 = p + mcnt; extract_number_and_incr (&mcnt2, &p); printf ("/jump_n to %d, %d times", p1 - start, mcnt2); break; case set_number_at: extract_number_and_incr (&mcnt, &p); p1 = p + mcnt; extract_number_and_incr (&mcnt2, &p);#ifdef _LIBC printf ("/set_number_at location %td to %d", p1 - start, mcnt2);#else printf ("/set_number_at location %ld to %d", (long int) (p1 - start), mcnt2);#endif break; case wordbound: printf ("/wordbound"); break; case notwordbound: printf ("/notwordbound"); break; case wordbeg: printf ("/wordbeg"); break; case wordend: printf ("/wordend"); break;# ifdef emacs case before_dot: printf ("/before_dot"); break; case at_dot: printf ("/at_dot"); break; case after_dot: printf ("/after_dot"); break; case syntaxspec: printf ("/syntaxspec"); mcnt = *p++; printf ("/%d", mcnt); break; case notsyntaxspec: printf ("/notsyntaxspec"); mcnt = *p++; printf ("/%d", mcnt); break;# endif /* emacs */ case wordchar: printf ("/wordchar"); break; case notwordchar: printf ("/notwordchar"); break; case begbuf: printf ("/begbuf"); break; case endbuf: printf ("/endbuf"); break; default: printf ("?%ld", (long int) *(p-1)); } putchar ('\n'); }#ifdef _LIBC printf ("%td:\tend of pattern.\n", p - start);#else printf ("%ld:\tend of pattern.\n", (long int) (p - start));#endif}voidprint_compiled_pattern (bufp) struct re_pattern_buffer *bufp;{ US_CHAR_TYPE *buffer = (US_CHAR_TYPE*) bufp->buffer; print_partial_compiled_pattern (buffer, buffer + bufp->used / sizeof(US_CHAR_TYPE)); printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used, bufp->allocated); if (bufp->fastmap_accurate && bufp->fastmap) { printf ("fastmap: "); print_fastmap (bufp->fastmap); }#ifdef _LIBC printf ("re_nsub: %Zd\t", bufp->re_nsub);#else printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);#endif printf ("regs_alloc: %d\t", bufp->regs_allocated); printf ("can_be_null: %d\t", bufp->can_be_null); printf ("newline_anchor: %d\n", bufp->newline_anchor); printf ("no_sub: %d\t", bufp->no_sub); printf ("not_bol: %d\t", bufp->not_bol); printf ("not_eol: %d\t", bufp->not_eol); printf ("syntax: %lx\n", bufp->syntax); /* Perhaps we should print the translate table? */}voidprint_double_string (where, string1, size1, string2, size2) const CHAR_TYPE *where; const CHAR_TYPE *string1; const CHAR_TYPE *string2; int size1; int size2;{ int this_char; if (where == NULL) printf ("(null)"); else { if (FIRST_STRING_P (where)) { for (this_char = where - string1; this_char < size1; this_char++) PUT_CHAR (string1[this_char]); where = string2; } for (this_char = where - string2; this_char < size2; this_char++) PUT_CHAR (string2[this_char]); }}voidprintchar (c) int c;{ putc (c, stderr);}#else /* not DEBUG */# undef assert# define assert(e)# define DEBUG_STATEMENT(e)# define DEBUG_PRINT1(x)# define DEBUG_PRINT2(x1, x2)# define DEBUG_PRINT3(x1, x2, x3)# define DEBUG_PRINT4(x1, x2, x3, x4)# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)#endif /* not DEBUG */#ifdef MBS_SUPPORT/* This convert a multibyte string to a wide character string. And write their correspondances to offset_buffer(see below) and write whether each wchar_t is binary data to is_binary. This assume invalid multibyte sequences as binary data. We assume offset_buffer and is_binary is already allocated enough space. */static size_t convert_mbs_to_wcs (CHAR_TYPE *dest, const unsigned char* src, size_t len, int *offset_buffer, char *is_binary);static size_tconvert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary) CHAR_TYPE *dest; const unsigned char* src; size_t len; /* the length of multibyte string. */ /* It hold correspondances between src(char string) and dest(wchar_t string) for optimization. e.g. src = "xxxyzz" dest = {'X', 'Y', 'Z'} (each "xxx", "y" and "zz" represent one multibyte character corresponding to 'X', 'Y' and 'Z'.) offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")} = {0, 3, 4, 6} */ int *offset_buffer; char *is_binary;{ wchar_t *pdest = dest; const unsigned char *psrc = src; size_t wc_count = 0; if (MB_CUR_MAX == 1) { /* We don't need conversion. */ for ( ; wc_count < len ; ++wc_count) { *pdest++ = *psrc++; is_binary[wc_count] = FALSE; offset_buffer[wc_count] = wc_count; } offset_buffer[wc_count] = wc_count; } else { /* We need conversion. */ mbstate_t mbs; int consumed; size_t mb_remain = len; size_t mb_count = 0; /* Initialize the conversion state. */ memset (&mbs, 0, sizeof (mbstate_t)); offset_buffer[0] = 0; for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed, psrc += consumed) { consumed = mbrtowc (pdest, psrc, mb_remain, &mbs); if (consumed <= 0) /* failed to convert. maybe src contains binary data. So we consume 1 byte manualy. */ { *pdest = *psrc; consumed = 1; is_binary[wc_count] = TRUE; } else is_binary[wc_count] = FALSE; /* In sjis encoding, we use yen sign as escape character in place of reverse solidus. So we convert 0x5c(yen sign in sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse solidus in UCS2). */ if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5) *pdest = (wchar_t) *psrc; offset_buffer[wc_count + 1] = mb_count += consumed; } } return wc_count;}#endif /* MBS_SUPPORT *//* Set by `re_set_syntax' to the current regexp syntax to recognize. Can also be assigned to arbitrarily: each pattern buffer stores its own syntax, so it can be changed between regex compilations. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -