📄 pcre_internal.h
字号:
this number.) The value is limited by the number of opcodes left after OP_BRA,i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additionalopcodes. */#define EXTRACT_BASIC_MAX 100/* This macro defines textual names for all the opcodes. These are used onlyfor debugging. The macro is referenced only in pcre_printint.c. */#define OP_NAME_LIST \ "End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \ "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \ "notprop", "prop", "extuni", \ "\\Z", "\\z", \ "Opt", "^", "$", "char", "charnc", "not", \ "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ "*", "*?", "+", "+?", "?", "??", "{", "{", \ "class", "nclass", "xclass", "Ref", "Recurse", "Callout", \ "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \ "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cond ref",\ "Brazero", "Braminzero", "Branumber", "Bra"/* This macro defines the length of fixed length operations in the compiledregex. The lengths are used when searching for specific things, and also in thedebugging printing of a compiled regex. We use a macro so that it can bedefined close to the definitions of the opcodes themselves.As things have been extended, some of these are no longer fixed lenths, but areminima instead. For example, the length of a single-character repeat may varyin UTF-8 mode. The code that uses this table must know about such things. */#define OP_LENGTHS \ 1, /* End */ \ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \ 1, 1, /* Any, Anybyte */ \ 3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \ 1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \ 2, /* Char - the minimum length */ \ 2, /* Charnc - the minimum length */ \ 2, /* not */ \ /* Positive single-char repeats ** These are */ \ 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ 4, 4, 4, /* upto, minupto, exact ** UTF-8 mode */ \ /* Negative single-char repeats - only for chars < 256 */ \ 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ 4, 4, 4, /* NOT upto, minupto, exact */ \ /* Positive type repeats */ \ 2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \ 4, 4, 4, /* Type upto, minupto, exact */ \ /* Character class & ref repeats */ \ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ 5, 5, /* CRRANGE, CRMINRANGE */ \ 33, /* CLASS */ \ 33, /* NCLASS */ \ 0, /* XCLASS - variable length */ \ 3, /* REF */ \ 1+LINK_SIZE, /* RECURSE */ \ 2+2*LINK_SIZE, /* CALLOUT */ \ 1+LINK_SIZE, /* Alt */ \ 1+LINK_SIZE, /* Ket */ \ 1+LINK_SIZE, /* KetRmax */ \ 1+LINK_SIZE, /* KetRmin */ \ 1+LINK_SIZE, /* Assert */ \ 1+LINK_SIZE, /* Assert not */ \ 1+LINK_SIZE, /* Assert behind */ \ 1+LINK_SIZE, /* Assert behind not */ \ 1+LINK_SIZE, /* Reverse */ \ 1+LINK_SIZE, /* Once */ \ 1+LINK_SIZE, /* COND */ \ 3, /* CREF */ \ 1, 1, /* BRAZERO, BRAMINZERO */ \ 3, /* BRANUMBER */ \ 1+LINK_SIZE /* BRA */ \/* A magic value for OP_CREF to indicate the "in recursion" condition. */#define CREF_RECURSE 0xffff/* Error code numbers. They are given names so that they can more easily betracked. */enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47 };/* The real format of the start of the pcre block; the index of names and thecode vector run on as long as necessary after the end. We store an explicitoffset to the name table so that if a regex is compiled on one host, saved, andthen run on another where the size of pointers is different, all might stillbe well. For the case of compiled-on-4 and run-on-8, we include an extrapointer that is always NULL. For future-proofing, a few dummy fields wereoriginally included - even though you can never get this planning right - butthere is only one left now.NOTE NOTE NOTE:Because people can now save and re-use compiled patterns, any additions to thisstructure should be made at the end, and something earlier (e.g. a newflag in the options or one of the dummy fields) should indicate that the newfields are present. Currently PCRE always sets the dummy fields to zero.NOTE NOTE NOTE:*/typedef struct real_pcre { pcre_uint32 magic_number; pcre_uint32 size; /* Total that was malloced */ pcre_uint32 options; pcre_uint32 dummy1; /* For future use, maybe */ pcre_uint16 top_bracket; pcre_uint16 top_backref; pcre_uint16 first_byte; pcre_uint16 req_byte; pcre_uint16 name_table_offset; /* Offset to name table that follows */ pcre_uint16 name_entry_size; /* Size of any name items */ pcre_uint16 name_count; /* Number of name items */ pcre_uint16 ref_count; /* Reference count */ const unsigned char *tables; /* Pointer to tables or NULL for std */ const unsigned char *nullpad; /* NULL padding */} real_pcre;/* The format of the block used to store data from pcre_study(). The sameremark (see NOTE above) about extending this structure applies. */typedef struct pcre_study_data { pcre_uint32 size; /* Total that was malloced */ pcre_uint32 options; uschar start_bits[32];} pcre_study_data;/* Structure for passing "static" information around between the functionsdoing the compiling, so that they are thread-safe. */typedef struct compile_data { const uschar *lcc; /* Points to lower casing table */ const uschar *fcc; /* Points to case-flipping table */ const uschar *cbits; /* Points to character type table */ const uschar *ctypes; /* Points to table of type maps */ const uschar *start_code; /* The start of the compiled code */ const uschar *start_pattern; /* The start of the pattern */ uschar *name_table; /* The name/number table */ int names_found; /* Number of entries so far */ int name_entry_size; /* Size of each entry */ int top_backref; /* Maximum back reference */ unsigned int backref_map; /* Bitmap of low back refs */ int req_varyopt; /* "After variable item" flag for reqbyte */ BOOL nopartial; /* Set TRUE if partial won't work */} compile_data;/* Structure for maintaining a chain of pointers to the currently incompletebranches, for testing for left recursion. */typedef struct branch_chain { struct branch_chain *outer; uschar *current;} branch_chain;/* Structure for items in a linked list that represents an explicit recursivecall within the pattern. */typedef struct recursion_info { struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ int group_num; /* Number of group that was called */ const uschar *after_call; /* "Return value": points after the call in the expr */ USPTR save_start; /* Old value of md->start_match */ int *offset_save; /* Pointer to start of saved offsets */ int saved_max; /* Number of saved offsets */} recursion_info;/* When compiling in a mode that doesn't use recursive calls to match(),a structure is used to remember local variables on the heap. It is defined inpcre.c, close to the match() function, so that it is easy to keep it in stepwith any changes of local variable. However, the pointer to the current framemust be saved in some "static" place over a longjmp(). We declare thestructure here so that we can put a pointer in the match_data structure.NOTE: This isn't used for a "normal" compilation of pcre. */struct heapframe;/* Structure for passing "static" information around between the functionsdoing traditional NFA matching, so that they are thread-safe. */typedef struct match_data { unsigned long int match_call_count; /* As it says */ unsigned long int match_limit; /* As it says */ unsigned long int match_limit_recursion; /* As it says */ int *offset_vector; /* Offset vector */ int offset_end; /* One past the end */ int offset_max; /* The maximum usable for return data */ const uschar *lcc; /* Points to lower casing table */ const uschar *ctypes; /* Points to table of type maps */ BOOL offset_overflow; /* Set if too many extractions */ BOOL notbol; /* NOTBOL flag */ BOOL noteol; /* NOTEOL flag */ BOOL utf8; /* UTF8 flag */ BOOL endonly; /* Dollar not before final \n */ BOOL notempty; /* Empty string match not wanted */ BOOL partial; /* PARTIAL flag */ BOOL hitend; /* Hit the end of the subject at some point */ const uschar *start_code; /* For use when recursing */ USPTR start_subject; /* Start of the subject string */ USPTR end_subject; /* End of the subject string */ USPTR start_match; /* Start of this match attempt */ USPTR end_match_ptr; /* Subject position at end match */ int end_offset_top; /* Highwater mark at end of match */ int capture_last; /* Most recent capture number */ int start_offset; /* The start offset value */ recursion_info *recursive; /* Linked list of recursion data */ void *callout_data; /* To pass back to callouts */ struct heapframe *thisframe; /* Used only when compiling for no recursion */} match_data;/* A similar structure is used for the same purpose by the DFA matchingfunctions. */typedef struct dfa_match_data { const uschar *start_code; /* Start of the compiled pattern */ const uschar *start_subject; /* Start of the subject string */ const uschar *end_subject; /* End of subject string */ const uschar *tables; /* Character tables */ int moptions; /* Match options */ int poptions; /* Pattern options */ void *callout_data; /* To pass back to callouts */} dfa_match_data;/* Bit definitions for entries in the pcre_ctypes table. */#define ctype_space 0x01#define ctype_letter 0x02#define ctype_digit 0x04#define ctype_xdigit 0x08#define ctype_word 0x10 /* alphameric or '_' */#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) *//* Offsets for the bitmap tables in pcre_cbits. Each table contains a setof bits for a class map. Some classes are built by combining these tables. */#define cbit_space 0 /* [:space:] or \s */#define cbit_xdigit 32 /* [:xdigit:] */#define cbit_digit 64 /* [:digit:] or \d */#define cbit_upper 96 /* [:upper:] */#define cbit_lower 128 /* [:lower:] */#define cbit_word 160 /* [:word:] or \w */#define cbit_graph 192 /* [:graph:] */#define cbit_print 224 /* [:print:] */#define cbit_punct 256 /* [:punct:] */#define cbit_cntrl 288 /* [:cntrl:] */#define cbit_length 320 /* Length of the cbits table *//* Offsets of the various tables from the base tables pointer, andtotal length. */#define lcc_offset 0#define fcc_offset 256#define cbits_offset 512#define ctypes_offset (cbits_offset + cbit_length)#define tables_length (ctypes_offset + 256)/* Layout of the UCP type table that translates property names into types andcodes. */typedef struct { const char *name; pcre_uint16 type; pcre_uint16 value;} ucp_type_table;/* Internal shared data tables. These are tables that are used by more than oneof the exported public functions. They have to be "external" in the C sense,but are not part of the PCRE public API. The data for these tables is in thepcre_tables.c module. */extern const int _pcre_utf8_table1[];extern const int _pcre_utf8_table2[];extern const int _pcre_utf8_table3[];extern const uschar _pcre_utf8_table4[];extern const int _pcre_utf8_table1_size;extern const ucp_type_table _pcre_utt[];extern const int _pcre_utt_size;extern const uschar _pcre_default_tables[];extern const uschar _pcre_OP_lengths[];/* Internal shared functions. These are functions that are used by more thanone of the exported public functions. They have to be "external" in the Csense, but are not part of the PCRE public API. */extern int _pcre_ord2utf8(int, uschar *);extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *, const pcre_study_data *, pcre_study_data *);extern int _pcre_ucp_findprop(const int, int *, int *);extern int _pcre_ucp_othercase(const int);extern int _pcre_valid_utf8(const uschar *, int);extern BOOL _pcre_xclass(int, const uschar *);#endif/* End of pcre_internal.h */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -