pcre_internal.h
来自「Ubuntu packages of security software。 相」· C头文件 代码 · 共 1,122 行 · 第 1/3 页
H
1,122 行
/* #define BACKCHAR(eptr) */#else /* SUPPORT_UTF8 *//* Get the next UTF-8 character, not advancing the pointer. This is called whenwe know we are in UTF-8 mode. */#define GETCHAR(c, eptr) \ c = *eptr; \ if (c >= 0xc0) \ { \ int gcii; \ int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ int gcss = 6*gcaa; \ c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ for (gcii = 1; gcii <= gcaa; gcii++) \ { \ gcss -= 6; \ c |= (eptr[gcii] & 0x3f) << gcss; \ } \ }/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing thepointer. */#define GETCHARTEST(c, eptr) \ c = *eptr; \ if (utf8 && c >= 0xc0) \ { \ int gcii; \ int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ int gcss = 6*gcaa; \ c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ for (gcii = 1; gcii <= gcaa; gcii++) \ { \ gcss -= 6; \ c |= (eptr[gcii] & 0x3f) << gcss; \ } \ }/* Get the next UTF-8 character, advancing the pointer. This is called when weknow we are in UTF-8 mode. */#define GETCHARINC(c, eptr) \ c = *eptr++; \ if (c >= 0xc0) \ { \ int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ int gcss = 6*gcaa; \ c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ while (gcaa-- > 0) \ { \ gcss -= 6; \ c |= (*eptr++ & 0x3f) << gcss; \ } \ }/* Get the next character, testing for UTF-8 mode, and advancing the pointer */#define GETCHARINCTEST(c, eptr) \ c = *eptr++; \ if (utf8 && c >= 0xc0) \ { \ int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ int gcss = 6*gcaa; \ c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ while (gcaa-- > 0) \ { \ gcss -= 6; \ c |= (*eptr++ & 0x3f) << gcss; \ } \ }/* Get the next UTF-8 character, not advancing the pointer, incrementing lengthif there are extra bytes. This is called when we know we are in UTF-8 mode. */#define GETCHARLEN(c, eptr, len) \ c = *eptr; \ if (c >= 0xc0) \ { \ int gcii; \ int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ int gcss = 6*gcaa; \ c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ for (gcii = 1; gcii <= gcaa; gcii++) \ { \ gcss -= 6; \ c |= (eptr[gcii] & 0x3f) << gcss; \ } \ len += gcaa; \ }/* If the pointer is not at the start of a character, move it back untilit is. This is called only in UTF-8 mode - we don't put a test within the macrobecause almost all calls are already within a block of UTF-8 only code. */#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--#endif/* In case there is no definition of offsetof() provided - though any properStandard C system should have one. */#ifndef offsetof#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))#endif/* These are the public options that can change during matching. */#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)/* Private flags containing information about the compiled regex. They used tolive at the top end of the options word, but that got almost full, so now theyare in a 16-bit flags word. */#define PCRE_NOPARTIAL 0x0001 /* can't use partial with this regex */#define PCRE_FIRSTSET 0x0002 /* first_byte is set */#define PCRE_REQCHSET 0x0004 /* req_byte is set */#define PCRE_STARTLINE 0x0008 /* start after \n for multiline */#define PCRE_JCHANGED 0x0010 /* j option used in regex */#define PCRE_HASCRORLF 0x0020 /* explicit \r or \n in pattern *//* Options for the "extra" block produced by pcre_study(). */#define PCRE_STUDY_MAPPED 0x01 /* a map of starting chars exists *//* Masks for identifying the public options that are permitted at compiletime, run time, or study time, respectively. */#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \ PCRE_NEWLINE_ANYCRLF)#define PUBLIC_OPTIONS \ (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \ PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)#define PUBLIC_EXEC_OPTIONS \ (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \ PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)#define PUBLIC_DFA_EXEC_OPTIONS \ (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \ PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \ PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)#define PUBLIC_STUDY_OPTIONS 0 /* None defined *//* Magic number to provide a small check against being handed junk. Also usedto detect whether a pattern was compiled on a host of different endianness. */#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' *//* Negative values for the firstchar and reqchar variables */#define REQ_UNSET (-2)#define REQ_NONE (-1)/* The maximum remaining length of subject we are prepared to search for areq_byte match. */#define REQ_BYTE_MAX 1000/* Flags added to firstbyte or reqbyte; a "non-literal" item is either avariable-length repeat, or a anything other than literal characters. */#define REQ_CASELESS 0x0100 /* indicates caselessness */#define REQ_VARY 0x0200 /* reqbyte followed non-literal item *//* Miscellaneous definitions */typedef int BOOL;#define FALSE 0#define TRUE 1/* Escape items that are just an encoding of a particular data value. */#ifndef ESC_e#define ESC_e 27#endif#ifndef ESC_f#define ESC_f '\f'#endif#ifndef ESC_n#define ESC_n '\n'#endif#ifndef ESC_r#define ESC_r '\r'#endif/* We can't officially use ESC_t because it is a POSIX reserved identifier(presumably because of all the others like size_t). */#ifndef ESC_tee#define ESC_tee '\t'#endif/* Codes for different types of Unicode property */#define PT_ANY 0 /* Any property - matches all chars */#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */#define PT_GC 2 /* General characteristic (e.g. L) */#define PT_PC 3 /* Particular characteristic (e.g. Lu) */#define PT_SC 4 /* Script (e.g. Han) *//* Flag bits and data types for the extended class (OP_XCLASS) for classes thatcontain UTF-8 characters with values greater than 255. */#define XCL_NOT 0x01 /* Flag: this is a negative class */#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */#define XCL_END 0 /* Marks end of individual items */#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */#define XCL_RANGE 2 /* A range (two multibyte chars) follows */#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) *//* These are escaped items that aren't just an encoding of a particular datavalue such as \n. They must have non-zero values, as check_escape() returnstheir negation. Also, they must appear in the same order as in the opcodedefinitions below, up to ESC_z. There's a dummy for OP_ANY because itcorresponds to "." rather than an escape sequence. The final one must beESC_REF as subsequent values are used for backreferences (\1, \2, \3, etc).There are two tests in the code for an escape greater than ESC_b and less thanESC_Z to detect the types that may be repeated. These are the types thatconsume characters. If any new escapes are put in between that don't consume acharacter, that code will have to change. */enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };/* Opcode table: Starting from 1 (i.e. after OP_END), the values up toOP_EOD must correspond in order to the list of escapes immediately above.*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitionsthat follow must also be updated to match. There is also a table called"coptable" in pcre_dfa_exec.c that must be updated. */enum { OP_END, /* 0 End of pattern */ /* Values corresponding to backslashed metacharacters */ OP_SOD, /* 1 Start of data: \A */ OP_SOM, /* 2 Start of match (subject + offset): \G */ OP_SET_SOM, /* 3 Set start of match (\K) */ OP_NOT_WORD_BOUNDARY, /* 4 \B */ OP_WORD_BOUNDARY, /* 5 \b */ OP_NOT_DIGIT, /* 6 \D */ OP_DIGIT, /* 7 \d */ OP_NOT_WHITESPACE, /* 8 \S */ OP_WHITESPACE, /* 9 \s */ OP_NOT_WORDCHAR, /* 10 \W */ OP_WORDCHAR, /* 11 \w */ OP_ANY, /* 12 Match any character */ OP_ANYBYTE, /* 13 Match any byte (\C); different to OP_ANY for UTF-8 */ OP_NOTPROP, /* 14 \P (not Unicode property) */ OP_PROP, /* 15 \p (Unicode property) */ OP_ANYNL, /* 16 \R (any newline sequence) */ OP_NOT_HSPACE, /* 17 \H (not horizontal whitespace) */ OP_HSPACE, /* 18 \h (horizontal whitespace) */ OP_NOT_VSPACE, /* 19 \V (not vertical whitespace) */ OP_VSPACE, /* 20 \v (vertical whitespace) */ OP_EXTUNI, /* 21 \X (extended Unicode sequence */ OP_EODN, /* 22 End of data or \n at end of data: \Z. */ OP_EOD, /* 23 End of data: \z */ OP_OPT, /* 24 Set runtime options */ OP_CIRC, /* 25 Start of line - varies with multiline switch */ OP_DOLL, /* 26 End of line - varies with multiline switch */ OP_CHAR, /* 27 Match one character, casefully */ OP_CHARNC, /* 28 Match one character, caselessly */ OP_NOT, /* 29 Match one character, not the following one */ OP_STAR, /* 30 The maximizing and minimizing versions of */ OP_MINSTAR, /* 31 these six opcodes must come in pairs, with */ OP_PLUS, /* 32 the minimizing one second. */ OP_MINPLUS, /* 33 This first set applies to single characters.*/ OP_QUERY, /* 34 */ OP_MINQUERY, /* 35 */ OP_UPTO, /* 36 From 0 to n matches */ OP_MINUPTO, /* 37 */ OP_EXACT, /* 38 Exactly n matches */ OP_POSSTAR, /* 39 Possessified star */ OP_POSPLUS, /* 40 Possessified plus */ OP_POSQUERY, /* 41 Posesssified query */ OP_POSUPTO, /* 42 Possessified upto */ OP_NOTSTAR, /* 43 The maximizing and minimizing versions of */ OP_NOTMINSTAR, /* 44 these six opcodes must come in pairs, with */ OP_NOTPLUS, /* 45 the minimizing one second. They must be in */ OP_NOTMINPLUS, /* 46 exactly the same order as those above. */ OP_NOTQUERY, /* 47 This set applies to "not" single characters. */ OP_NOTMINQUERY, /* 48 */ OP_NOTUPTO, /* 49 From 0 to n matches */ OP_NOTMINUPTO, /* 50 */ OP_NOTEXACT, /* 51 Exactly n matches */ OP_NOTPOSSTAR, /* 52 Possessified versions */ OP_NOTPOSPLUS, /* 53 */ OP_NOTPOSQUERY, /* 54 */ OP_NOTPOSUPTO, /* 55 */ OP_TYPESTAR, /* 56 The maximizing and minimizing versions of */ OP_TYPEMINSTAR, /* 57 these six opcodes must come in pairs, with */ OP_TYPEPLUS, /* 58 the minimizing one second. These codes must */ OP_TYPEMINPLUS, /* 59 be in exactly the same order as those above. */ OP_TYPEQUERY, /* 60 This set applies to character types such as \d */ OP_TYPEMINQUERY, /* 61 */ OP_TYPEUPTO, /* 62 From 0 to n matches */ OP_TYPEMINUPTO, /* 63 */ OP_TYPEEXACT, /* 64 Exactly n matches */ OP_TYPEPOSSTAR, /* 65 Possessified versions */ OP_TYPEPOSPLUS, /* 66 */ OP_TYPEPOSQUERY, /* 67 */ OP_TYPEPOSUPTO, /* 68 */ OP_CRSTAR, /* 69 The maximizing and minimizing versions of */ OP_CRMINSTAR, /* 70 all these opcodes must come in pairs, with */ OP_CRPLUS, /* 71 the minimizing one second. These codes must */ OP_CRMINPLUS, /* 72 be in exactly the same order as those above. */ OP_CRQUERY, /* 73 These are for character classes and back refs */ OP_CRMINQUERY, /* 74 */ OP_CRRANGE, /* 75 These are different to the three sets above. */ OP_CRMINRANGE, /* 76 */ OP_CLASS, /* 77 Match a character class, chars < 256 only */ OP_NCLASS, /* 78 Same, but the bitmap was created from a negative class - the difference is relevant only when a UTF-8 character > 255 is encountered. */ OP_XCLASS, /* 79 Extended class for handling UTF-8 chars within the class. This does both positive and negative. */ OP_REF, /* 80 Match a back reference */ OP_RECURSE, /* 81 Match a numbered subpattern (possibly recursive) */ OP_CALLOUT, /* 82 Call out to external function if provided */ OP_ALT, /* 83 Start of alternation */ OP_KET, /* 84 End of group that doesn't have an unbounded repeat */ OP_KETRMAX, /* 85 These two must remain together and in this */ OP_KETRMIN, /* 86 order. They are for groups the repeat for ever. */ /* The assertions must come before BRA, CBRA, ONCE, and COND.*/ OP_ASSERT, /* 87 Positive lookahead */ OP_ASSERT_NOT, /* 88 Negative lookahead */ OP_ASSERTBACK, /* 89 Positive lookbehind */ OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */ OP_REVERSE, /* 91 Move pointer back - used in lookbehind assertions */ /* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first, as there's a test for >= ONCE for a subpattern that isn't an assertion. */ OP_ONCE, /* 92 Atomic group */ OP_BRA, /* 93 Start of non-capturing bracket */ OP_CBRA, /* 94 Start of capturing bracket */ OP_COND, /* 95 Conditional group */ /* These three must follow the previous three, in the same order. There's a
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?