📄 pcre_internal.h

📁 Scheme跨平台编译器
💻 H
📖 第 1 页 / 共 4 页
字号:
offset limits the size of the compiled regex to around 64K, which is big enoughfor almost everybody. However, I received a request for an even bigger limit.For this reason, and also to make the code easier to maintain, the storing andloading of offsets from the byte string is now handled by the macros that aredefined here.The macros are controlled by the value of LINK_SIZE. This defaults to 2 inthe config.h file, but can be overridden by using -D on the command line. Thisis automated on Unix systems via the "configure" command. */#if LINK_SIZE == 2#define PUT(a,n,d)   \  (a[n] = (d) >> 8), \  (a[(n)+1] = (d) & 255)#define GET(a,n) \  (((a)[n] << 8) | (a)[(n)+1])#define MAX_PATTERN_SIZE (1 << 16)#elif LINK_SIZE == 3#define PUT(a,n,d)       \  (a[n] = (d) >> 16),    \  (a[(n)+1] = (d) >> 8), \  (a[(n)+2] = (d) & 255)#define GET(a,n) \  (((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])#define MAX_PATTERN_SIZE (1 << 24)#elif LINK_SIZE == 4#define PUT(a,n,d)        \  (a[n] = (d) >> 24),     \  (a[(n)+1] = (d) >> 16), \  (a[(n)+2] = (d) >> 8),  \  (a[(n)+3] = (d) & 255)#define GET(a,n) \  (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])#define MAX_PATTERN_SIZE (1 << 30)   /* Keep it positive */#else#error LINK_SIZE must be either 2, 3, or 4#endif/* Convenience macro defined in terms of the others */#define PUTINC(a,n,d)   PUT(a,n,d), a += LINK_SIZE/* PCRE uses some other 2-byte quantities that do not change when the size ofoffsets changes. There are used for repeat counts and for other things such ascapturing parenthesis numbers in back references. */#define PUT2(a,n,d)   \  a[n] = (d) >> 8; \  a[(n)+1] = (d) & 255#define GET2(a,n) \  (((a)[n] << 8) | (a)[(n)+1])#define PUT2INC(a,n,d)  PUT2(a,n,d), a += 2/* When UTF-8 encoding is being used, a character is no longer just a singlebyte. The macros for character handling generate simple sequences when used inbyte-mode, and more complicated ones for UTF-8 characters. BACKCHAR shouldnever be called in byte mode. To make sure it can never even appear when UTF-8support is omitted, we don't even define it. */#ifndef SUPPORT_UTF8#define NEXTCHAR(p) p++;#define GETCHAR(c, eptr) c = *eptr;#define GETCHARTEST(c, eptr) c = *eptr;#define GETCHARINC(c, eptr) c = *eptr++;#define GETCHARINCTEST(c, eptr) c = *eptr++;#define GETCHARLEN(c, eptr, len) c = *eptr;/* #define BACKCHAR(eptr) */#else   /* SUPPORT_UTF8 *//* Advance a character pointer one byte in non-UTF-8 mode and by one characterin UTF-8 mode. */#define NEXTCHAR(p) \  p++; \  if (utf8) { while((*p & 0xc0) == 0x80) p++; }/* Get the next UTF-8 character, not advancing the pointer. This is called whenwe know we are in UTF-8 mode. */#define GETCHAR(c, eptr) \  c = *eptr; \  if (c >= 0xc0) \    { \    int gcii; \    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \    int gcss = 6*gcaa; \    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \    for (gcii = 1; gcii <= gcaa; gcii++) \      { \      gcss -= 6; \      c |= (eptr[gcii] & 0x3f) << gcss; \      } \    }/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing thepointer. */#define GETCHARTEST(c, eptr) \  c = *eptr; \  if (utf8 && c >= 0xc0) \    { \    int gcii; \    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \    int gcss = 6*gcaa; \    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \    for (gcii = 1; gcii <= gcaa; gcii++) \      { \      gcss -= 6; \      c |= (eptr[gcii] & 0x3f) << gcss; \      } \    }/* Get the next UTF-8 character, advancing the pointer. This is called when weknow we are in UTF-8 mode. */#define GETCHARINC(c, eptr) \  c = *eptr++; \  if (c >= 0xc0) \    { \    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \    int gcss = 6*gcaa; \    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \    while (gcaa-- > 0) \      { \      gcss -= 6; \      c |= (*eptr++ & 0x3f) << gcss; \      } \    }/* Get the next character, testing for UTF-8 mode, and advancing the pointer */#define GETCHARINCTEST(c, eptr) \  c = *eptr++; \  if (utf8 && c >= 0xc0) \    { \    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \    int gcss = 6*gcaa; \    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \    while (gcaa-- > 0) \      { \      gcss -= 6; \      c |= (*eptr++ & 0x3f) << gcss; \      } \    }/* Get the next UTF-8 character, not advancing the pointer, incrementing lengthif there are extra bytes. This is called when we know we are in UTF-8 mode. */#define GETCHARLEN(c, eptr, len) \  c = *eptr; \  if (c >= 0xc0) \    { \    int gcii; \    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \    int gcss = 6*gcaa; \    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \    for (gcii = 1; gcii <= gcaa; gcii++) \      { \      gcss -= 6; \      c |= (eptr[gcii] & 0x3f) << gcss; \      } \    len += gcaa; \    }/* If the pointer is not at the start of a character, move it back untilit is. This is called only in UTF-8 mode - we don't put a test within the macrobecause almost all calls are already within a block of UTF-8 only code. */#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--#endif/* In case there is no definition of offsetof() provided - though any properStandard C system should have one. */#ifndef offsetof#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))#endif/* These are the public options that can change during matching. */#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)/* Private flags containing information about the compiled regex. They used tolive at the top end of the options word, but that got almost full, so now theyare in a 16-bit flags word. */#define PCRE_NOPARTIAL     0x0001  /* can't use partial with this regex */#define PCRE_FIRSTSET      0x0002  /* first_byte is set */#define PCRE_REQCHSET      0x0004  /* req_byte is set */#define PCRE_STARTLINE     0x0008  /* start after \n for multiline */#define PCRE_JCHANGED      0x0010  /* j option used in regex */#define PCRE_HASCRORLF     0x0020  /* explicit \r or \n in pattern *//* Options for the "extra" block produced by pcre_study(). */#define PCRE_STUDY_MAPPED   0x01     /* a map of starting chars exists *//* Masks for identifying the public options that are permitted at compiletime, run time, or study time, respectively. */#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \                           PCRE_NEWLINE_ANYCRLF)#define PUBLIC_OPTIONS \  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \   PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \   PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \   PCRE_JAVASCRIPT_COMPAT)#define PUBLIC_EXEC_OPTIONS \  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \   PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)#define PUBLIC_DFA_EXEC_OPTIONS \  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \   PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \   PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)#define PUBLIC_STUDY_OPTIONS 0   /* None defined *//* Magic number to provide a small check against being handed junk. Also usedto detect whether a pattern was compiled on a host of different endianness. */#define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' *//* Negative values for the firstchar and reqchar variables */#define REQ_UNSET (-2)#define REQ_NONE  (-1)/* The maximum remaining length of subject we are prepared to search for areq_byte match. */#define REQ_BYTE_MAX 1000/* Flags added to firstbyte or reqbyte; a "non-literal" item is either avariable-length repeat, or a anything other than literal characters. */#define REQ_CASELESS 0x0100    /* indicates caselessness */#define REQ_VARY     0x0200    /* reqbyte followed non-literal item *//* Miscellaneous definitions */typedef int BOOL;#define FALSE   0#define TRUE    1/* Escape items that are just an encoding of a particular data value. */#ifndef ESC_e#define ESC_e 27#endif#ifndef ESC_f#define ESC_f '\f'#endif#ifndef ESC_n#define ESC_n '\n'
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -