📄 mbregex.c
字号:
and store it in a memory register. Followed by one byte containing the register number. Register numbers must be in the range 0 through MBRE_NREGS. */ start_paren, /* Place holder at the start of (?:..). */ stop_paren, /* Place holder at the end of (?:..). */ casefold_on, /* Turn on casefold flag. */ casefold_off, /* Turn off casefold flag. */ option_set, /* Turn on multi line match (match with newlines). */ start_nowidth, /* Save string point to the stack. */ stop_nowidth, /* Restore string place at the point start_nowidth. */ pop_and_fail, /* Fail after popping nowidth entry from stack. */ stop_backtrack, /* Restore backtrack stack at the point start_nowidth. */ duplicate, /* Match a duplicate of something remembered. Followed by one byte containing the index of the memory register. */ fail, /* always fails. */ wordchar, /* Matches any word-constituent character. */ notwordchar, /* Matches any char that is not a word-constituent. */ wordbeg, /* Succeeds if at word beginning. */ wordend, /* Succeeds if at word end. */ wordbound, /* Succeeds if at a word boundary. */ notwordbound /* Succeeds if not at a word boundary. */ };/* Number of failure points to allocate space for initially, when matching. If this number is exceeded, more space is allocated, so it is not a hard limit. */#ifndef NFAILURES#define NFAILURES 160#endif/* Store NUMBER in two contiguous bytes starting at DESTINATION. */#define STORE_NUMBER(destination, number) \ do { (destination)[0] = (number) & 0377; \ (destination)[1] = (number) >> 8; } while (0)/* Same as STORE_NUMBER, except increment the destination pointer to the byte after where the number is stored. Watch out that values for DESTINATION such as p + 1 won't work, whereas p will. */#define STORE_NUMBER_AND_INCR(destination, number) \ do { STORE_NUMBER(destination, number); \ (destination) += 2; } while (0)/* Put into DESTINATION a number stored in two contingous bytes starting at SOURCE. */#define EXTRACT_NUMBER(destination, source) \ do { (destination) = *(source) & 0377; \ (destination) += SIGN_EXTEND_CHAR(*(char*)((source) + 1)) << 8; } while (0)/* Same as EXTRACT_NUMBER, except increment the pointer for source to point to second byte of SOURCE. Note that SOURCE has to be a value such as p, not, e.g., p + 1. */#define EXTRACT_NUMBER_AND_INCR(destination, source) \ do { EXTRACT_NUMBER(destination, source); \ (source) += 2; } while (0)/* Specify the precise syntax of regexps for compilation. This provides for compatibility for various utilities which historically have different, incompatible syntaxes. The argument SYNTAX is a bit-mask comprised of the various bits defined in regex.h. */#if 0longre_set_syntax(syntax) long syntax;{ /* obsolete */ return 0;}#endif/* Macros for re_compile_pattern, which is found below these definitions. */#define TRANSLATE_P() ((options&MBRE_OPTION_IGNORECASE) && translate)#define MAY_TRANSLATE() ((bufp->options&(MBRE_OPTION_IGNORECASE|MBRE_MAY_IGNORECASE)) && translate)/* Fetch the next character in the uncompiled pattern---translating it if necessary. Also cast from a signed character in the constant string passed to us by the user to an unsigned char that we can use as an array index (in, e.g., `translate'). */#define PATFETCH(c) \ do {if (p == pend) goto end_of_pattern; \ c = (unsigned char) *p++; \ if (TRANSLATE_P()) c = (unsigned char)translate[c]; \ } while (0)/* Fetch the next character in the uncompiled pattern, with no translation. */#define PATFETCH_RAW(c) \ do {if (p == pend) goto end_of_pattern; \ c = (unsigned char)*p++; \ } while (0)/* Go backwards one character in the pattern. */#define PATUNFETCH p--#define MBC2WC(c, p) \ do { \ if (current_mbctype == MBCTYPE_UTF8) { \ int n = mbclen(c) - 1; \ c &= (1<<(MBRE_BYTEWIDTH-2-n)) - 1; \ while (n--) { \ c = c << 6 | (*p++ & ((1<<6)-1)); \ } \ } \ else { \ c <<= 8; \ c |= (unsigned char)*(p)++; \ } \ } while (0)#define PATFETCH_MBC(c) \ do { \ if (p + mbclen(c) - 1 >= pend) goto end_of_pattern; \ MBC2WC(c, p); \ } while(0)#define WC2MBC1ST(c) \ ((c<0x100)?(c):((current_mbctype != MBCTYPE_UTF8)?(((c)>>8)&0xff):utf8_firstbyte(c)))static unsigned intutf8_firstbyte(c) unsigned long c;{ if (c < 0x80) return c; if (c <= 0x7ff) return ((c>>6)&0xff)|0xc0; if (c <= 0xffff) return ((c>>12)&0xff)|0xe0; if (c <= 0x1fffff) return ((c>>18)&0xff)|0xf0; if (c <= 0x3ffffff) return ((c>>24)&0xff)|0xf8; if (c <= 0x7fffffff) return ((c>>30)&0xff)|0xfc;#if SIZEOF_INT > 4 if (c <= 0xfffffffff) return 0xfe;#else return 0xfe;#endif}#if 0static voidprint_mbc(c) unsigned int c;{ if (current_mbctype == MBCTYPE_UTF8) { if (c < 0x80) printf("%c", c); else if (c <= 0x7ff) printf("%c%c", utf8_firstbyte(c), c&0x3f); else if (c <= 0xffff) printf("%c%c%c", utf8_firstbyte(c), (c>>6)&0x3f, c&0x3f); else if (c <= 0x1fffff) printf("%c%c%c%c", utf8_firstbyte(c), (c>>12)&0x3f, (c>>6)&0x3f, c&0x3f); else if (c <= 0x3ffffff) printf("%c%c%c%c%c", utf8_firstbyte(c), (c>>18)&0x3f, (c>>12)&0x3f, (c>>6)&0x3f, c&0x3f); else if (c <= 0x7fffffff) printf("%c%c%c%c%c%c", utf8_firstbyte(c), (c>>24)&0x3f, (c>>18)&0x3f, (c>>12)&0x3f, (c>>6)&0x3f, c&0x3f); } else if (c < 0xff) { printf("\\%o", c); } else { printf("%c%c", c>>MBRE_BYTEWIDTH, c&0xff); }}#endif/* If the buffer isn't allocated when it comes in, use this. */#define INIT_BUF_SIZE 28/* Make sure we have at least N more bytes of space in buffer. */#define GET_BUFFER_SPACE(n) \ do { \ while (b - bufp->buffer + (size_t)(n) >= (size_t)bufp->allocated) \ EXTEND_BUFFER; \ } while (0)/* Make sure we have one more byte of buffer space and then add CH to it. */#define BUFPUSH(ch) \ do { \ GET_BUFFER_SPACE(1); \ *b++ = (char)(ch); \ } while (0)/* Extend the buffer by twice its current size via reallociation and reset the pointers that pointed into the old allocation to point to the correct places in the new allocation. If extending the buffer results in it being larger than 1 << 16, then flag memory exhausted. */#define EXTEND_BUFFER \ do { char *old_buffer = bufp->buffer; \ if (bufp->allocated == (1L<<16)) goto too_big; \ bufp->allocated *= 2; \ if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \ bufp->buffer = (char*)xrealloc(bufp->buffer, bufp->allocated); \ if (bufp->buffer == 0) \ goto memory_exhausted; \ b = (b - old_buffer) + bufp->buffer; \ if (fixup_alt_jump) \ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ if (laststart) \ laststart = (laststart - old_buffer) + bufp->buffer; \ begalt = (begalt - old_buffer) + bufp->buffer; \ if (pending_exact) \ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ } while (0)/* Set the bit for character C in a character set list. */#define SET_LIST_BIT(c) \ (b[(unsigned char)(c) / MBRE_BYTEWIDTH] \ |= 1 << ((unsigned char)(c) % MBRE_BYTEWIDTH))/* Get the next unsigned number in the uncompiled pattern. */#define GET_UNSIGNED_NUMBER(num) \ do { if (p != pend) { \ PATFETCH(c); \ while (ISDIGIT(c)) { \ if (num < 0) \ num = 0; \ num = num * 10 + c - '0'; \ if (p == pend) \ break; \ PATFETCH(c); \ } \ } \ } while (0)#define STREQ(s1, s2) ((strcmp(s1, s2) == 0))#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */#define IS_CHAR_CLASS(string) \ (STREQ(string, "alpha") || STREQ(string, "upper") \ || STREQ(string, "lower") || STREQ(string, "digit") \ || STREQ(string, "alnum") || STREQ(string, "xdigit") \ || STREQ(string, "space") || STREQ(string, "print") \ || STREQ(string, "punct") || STREQ(string, "graph") \ || STREQ(string, "cntrl") || STREQ(string, "blank"))#define STORE_MBC(p, c) \ do { \ (p)[0] = (unsigned char)(((c) >>24) & 0xff); \ (p)[1] = (unsigned char)(((c) >>16) & 0xff); \ (p)[2] = (unsigned char)(((c) >> 8) & 0xff); \ (p)[3] = (unsigned char)(((c) >> 0) & 0xff); \ } while (0)#define STORE_MBC_AND_INCR(p, c) \ do { \ *(p)++ = (unsigned char)(((c) >>24) & 0xff); \ *(p)++ = (unsigned char)(((c) >>16) & 0xff); \ *(p)++ = (unsigned char)(((c) >> 8) & 0xff); \ *(p)++ = (unsigned char)(((c) >> 0) & 0xff); \ } while (0)#define EXTRACT_MBC(p) \ ((unsigned int)((unsigned char)(p)[0] << 24 | \ (unsigned char)(p)[1] << 16 | \ (unsigned char)(p)[2] << 8 | \ (unsigned char)(p)[3]))#define EXTRACT_MBC_AND_INCR(p) \ ((unsigned int)((p) += 4, \ (unsigned char)(p)[-4] << 24 | \ (unsigned char)(p)[-3] << 16 | \ (unsigned char)(p)[-2] << 8 | \ (unsigned char)(p)[-1]))#define EXTRACT_UNSIGNED(p) \ ((unsigned char)(p)[0] | (unsigned char)(p)[1] << 8)#define EXTRACT_UNSIGNED_AND_INCR(p) \ ((p) += 2, (unsigned char)(p)[-2] | (unsigned char)(p)[-1] << 8)/* Handle (mb)?charset(_not)?. Structure of mbcharset(_not)? in compiled pattern. struct { unsinged char id; mbcharset(_not)? unsigned char sbc_size; unsigned char sbc_map[sbc_size]; same as charset(_not)? up to here. unsigned short mbc_size; number of intervals. struct { unsigned long beg; beginning of interval. unsigned long end; end of interval. } intervals[mbc_size]; }; */static voidset_list_bits(c1, c2, b) unsigned long c1, c2; unsigned char *b;{ unsigned char sbc_size = b[-1]; unsigned short mbc_size = EXTRACT_UNSIGNED(&b[sbc_size]); unsigned short beg, end, upb; if (c1 > c2) return; b = &b[sbc_size + 2]; for (beg = 0, upb = mbc_size; beg < upb; ) { unsigned short mid = (unsigned short)(beg + upb) >> 1; if ((int)c1 - 1 > (int)EXTRACT_MBC(&b[mid*8+4])) beg = mid + 1; else upb = mid; } for (end = beg, upb = mbc_size; end < upb; ) { unsigned short mid = (unsigned short)(end + upb) >> 1; if ((int)c2 >= (int)EXTRACT_MBC(&b[mid*8]) - 1) end = mid + 1; else upb = mid; } if (beg != end) { if (c1 > EXTRACT_MBC(&b[beg*8])) c1 = EXTRACT_MBC(&b[beg*8]); if (c2 < EXTRACT_MBC(&b[(end - 1)*8+4])) c2 = EXTRACT_MBC(&b[(end - 1)*8+4]); } if (end < mbc_size && end != beg + 1) /* NOTE: memcpy() would not work here. */ memmove(&b[(beg + 1)*8], &b[end*8], (mbc_size - end)*8); STORE_MBC(&b[beg*8 + 0], c1); STORE_MBC(&b[beg*8 + 4], c2); mbc_size += beg - end + 1; STORE_NUMBER(&b[-2], mbc_size);}static intis_in_list(c, b) unsigned long c; const unsigned char *b;{ unsigned short size; unsigned short i, j; size = *b++; if ((int)c / MBRE_BYTEWIDTH < (int)size && b[c / MBRE_BYTEWIDTH] & 1 << c % MBRE_BYTEWIDTH) { return 1; } b += size + 2; size = EXTRACT_UNSIGNED(&b[-2]); if (size == 0) return 0; for (i = 0, j = size; i < j; ) { unsigned short k = (unsigned short)(i + j) >> 1; if (c > EXTRACT_MBC(&b[k*8+4])) i = k + 1; else j = k; } if (i < size && EXTRACT_MBC(&b[i*8]) <= c && ((unsigned char)c != '\n' && (unsigned char)c != '\0')) return 1; return 0;}#if 0static voidprint_partial_compiled_pattern(start, end) unsigned char *start; unsigned char *end;{ int mcnt, mcnt2; unsigned char *p = start; unsigned char *pend = end; if (start == NULL) { printf("(null)\n"); return; } /* Loop over pattern commands. */ while (p < pend) { switch ((enum regexpcode)*p++) { case unused: printf("/unused"); break; case exactn: mcnt = *p++; printf("/exactn/%d", mcnt); do { putchar('/'); printf("%c", *p++); } while (--mcnt); break; case start_memory: mcnt = *p++; printf("/start_memory/%d/%d", mcnt, *p++); break; case stop_memory: mcnt = *p++; printf("/stop_memory/%d/%d", mcnt, *p++); break; case start_paren: printf("/start_paren"); break; case stop_paren: printf("/stop_paren"); break; case casefold_on: printf("/casefold_on"); break; case casefold_off: printf("/casefold_off"); break; case option_set: printf("/option_set/%d", *p++); break; case start_nowidth: EXTRACT_NUMBER_AND_INCR(mcnt, p); printf("/start_nowidth//%d", mcnt); break; case stop_nowidth: printf("/stop_nowidth//"); p += 2; break; case pop_and_fail: printf("/pop_and_fail"); break; case stop_backtrack: printf("/stop_backtrack//"); p += 2; break; case duplicate: printf("/duplicate/%d", *p++); break; case anychar: printf("/anychar"); break; case anychar_repeat: printf("/anychar_repeat"); break; case charset: case charset_not: { register int c; printf("/charset%s", (enum regexpcode)*(p - 1) == charset_not ? "_not" : ""); mcnt = *p++; printf("/%d", mcnt); for (c = 0; c < mcnt; c++) { unsigned bit; unsigned char map_byte = p[c]; putchar ('/'); for (bit = 0; bit < MBRE_BYTEWIDTH; bit++) if (map_byte & (1 << bit)) printf("%c", c * MBRE_BYTEWIDTH + bit); } p += mcnt; mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -