⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcre_internal.h

📁 技术管理量不要让站长把时间都花费在为您修正说明上。压缩包解压
💻 H
📖 第 1 页 / 共 3 页
字号:
this number.) The value is limited by the number of opcodes left after OP_BRA,i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additionalopcodes. */#define EXTRACT_BASIC_MAX  100/* This macro defines textual names for all the opcodes. These are used onlyfor debugging. The macro is referenced only in pcre_printint.c. */#define OP_NAME_LIST \  "End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d",                \  "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte",                   \  "notprop", "prop", "extuni",                                    \  "\\Z", "\\z",                                                   \  "Opt", "^", "$", "char", "charnc", "not",                       \  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \  "*", "*?", "+", "+?", "?", "??", "{", "{",                      \  "class", "nclass", "xclass", "Ref", "Recurse", "Callout",       \  "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",     \  "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cond ref",\  "Brazero", "Braminzero", "Branumber", "Bra"/* This macro defines the length of fixed length operations in the compiledregex. The lengths are used when searching for specific things, and also in thedebugging printing of a compiled regex. We use a macro so that it can bedefined close to the definitions of the opcodes themselves.As things have been extended, some of these are no longer fixed lenths, but areminima instead. For example, the length of a single-character repeat may varyin UTF-8 mode. The code that uses this table must know about such things. */#define OP_LENGTHS \  1,                             /* End                                    */ \  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \  1, 1,                          /* Any, Anybyte                           */ \  3, 3, 1,                       /* NOTPROP, PROP, EXTUNI                  */ \  1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \  2,                             /* Char  - the minimum length             */ \  2,                             /* Charnc  - the minimum length           */ \  2,                             /* not                                    */ \  /* Positive single-char repeats                            ** These are  */ \  2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??      ** minima in  */ \  4, 4, 4,                       /* upto, minupto, exact     ** UTF-8 mode */ \  /* Negative single-char repeats - only for chars < 256                   */ \  2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \  4, 4, 4,                       /* NOT upto, minupto, exact               */ \  /* Positive type repeats                                                 */ \  2, 2, 2, 2, 2, 2,              /* Type *, *?, +, +?, ?, ??               */ \  4, 4, 4,                       /* Type upto, minupto, exact              */ \  /* Character class & ref repeats                                         */ \  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */ \  5, 5,                          /* CRRANGE, CRMINRANGE                    */ \ 33,                             /* CLASS                                  */ \ 33,                             /* NCLASS                                 */ \  0,                             /* XCLASS - variable length               */ \  3,                             /* REF                                    */ \  1+LINK_SIZE,                   /* RECURSE                                */ \  2+2*LINK_SIZE,                 /* CALLOUT                                */ \  1+LINK_SIZE,                   /* Alt                                    */ \  1+LINK_SIZE,                   /* Ket                                    */ \  1+LINK_SIZE,                   /* KetRmax                                */ \  1+LINK_SIZE,                   /* KetRmin                                */ \  1+LINK_SIZE,                   /* Assert                                 */ \  1+LINK_SIZE,                   /* Assert not                             */ \  1+LINK_SIZE,                   /* Assert behind                          */ \  1+LINK_SIZE,                   /* Assert behind not                      */ \  1+LINK_SIZE,                   /* Reverse                                */ \  1+LINK_SIZE,                   /* Once                                   */ \  1+LINK_SIZE,                   /* COND                                   */ \  3,                             /* CREF                                   */ \  1, 1,                          /* BRAZERO, BRAMINZERO                    */ \  3,                             /* BRANUMBER                              */ \  1+LINK_SIZE                    /* BRA                                    */ \/* A magic value for OP_CREF to indicate the "in recursion" condition. */#define CREF_RECURSE  0xffff/* Error code numbers. They are given names so that they can more easily betracked. */enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,       ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,       ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,       ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47 };/* The real format of the start of the pcre block; the index of names and thecode vector run on as long as necessary after the end. We store an explicitoffset to the name table so that if a regex is compiled on one host, saved, andthen run on another where the size of pointers is different, all might stillbe well. For the case of compiled-on-4 and run-on-8, we include an extrapointer that is always NULL. For future-proofing, a few dummy fields wereoriginally included - even though you can never get this planning right - butthere is only one left now.NOTE NOTE NOTE:Because people can now save and re-use compiled patterns, any additions to thisstructure should be made at the end, and something earlier (e.g. a newflag in the options or one of the dummy fields) should indicate that the newfields are present. Currently PCRE always sets the dummy fields to zero.NOTE NOTE NOTE:*/typedef struct real_pcre {  pcre_uint32 magic_number;  pcre_uint32 size;               /* Total that was malloced */  pcre_uint32 options;  pcre_uint32 dummy1;             /* For future use, maybe */  pcre_uint16 top_bracket;  pcre_uint16 top_backref;  pcre_uint16 first_byte;  pcre_uint16 req_byte;  pcre_uint16 name_table_offset;  /* Offset to name table that follows */  pcre_uint16 name_entry_size;    /* Size of any name items */  pcre_uint16 name_count;         /* Number of name items */  pcre_uint16 ref_count;          /* Reference count */  const unsigned char *tables;    /* Pointer to tables or NULL for std */  const unsigned char *nullpad;   /* NULL padding */} real_pcre;/* The format of the block used to store data from pcre_study(). The sameremark (see NOTE above) about extending this structure applies. */typedef struct pcre_study_data {  pcre_uint32 size;               /* Total that was malloced */  pcre_uint32 options;  uschar start_bits[32];} pcre_study_data;/* Structure for passing "static" information around between the functionsdoing the compiling, so that they are thread-safe. */typedef struct compile_data {  const uschar *lcc;            /* Points to lower casing table */  const uschar *fcc;            /* Points to case-flipping table */  const uschar *cbits;          /* Points to character type table */  const uschar *ctypes;         /* Points to table of type maps */  const uschar *start_code;     /* The start of the compiled code */  const uschar *start_pattern;  /* The start of the pattern */  uschar *name_table;           /* The name/number table */  int  names_found;             /* Number of entries so far */  int  name_entry_size;         /* Size of each entry */  int  top_backref;             /* Maximum back reference */  unsigned int backref_map;     /* Bitmap of low back refs */  int  req_varyopt;             /* "After variable item" flag for reqbyte */  BOOL nopartial;               /* Set TRUE if partial won't work */} compile_data;/* Structure for maintaining a chain of pointers to the currently incompletebranches, for testing for left recursion. */typedef struct branch_chain {  struct branch_chain *outer;  uschar *current;} branch_chain;/* Structure for items in a linked list that represents an explicit recursivecall within the pattern. */typedef struct recursion_info {  struct recursion_info *prevrec; /* Previous recursion record (or NULL) */  int group_num;                /* Number of group that was called */  const uschar *after_call;     /* "Return value": points after the call in the expr */  USPTR save_start;             /* Old value of md->start_match */  int *offset_save;             /* Pointer to start of saved offsets */  int saved_max;                /* Number of saved offsets */} recursion_info;/* When compiling in a mode that doesn't use recursive calls to match(),a structure is used to remember local variables on the heap. It is defined inpcre.c, close to the match() function, so that it is easy to keep it in stepwith any changes of local variable. However, the pointer to the current framemust be saved in some "static" place over a longjmp(). We declare thestructure here so that we can put a pointer in the match_data structure.NOTE: This isn't used for a "normal" compilation of pcre. */struct heapframe;/* Structure for passing "static" information around between the functionsdoing traditional NFA matching, so that they are thread-safe. */typedef struct match_data {  unsigned long int match_call_count;      /* As it says */  unsigned long int match_limit;           /* As it says */  unsigned long int match_limit_recursion; /* As it says */  int   *offset_vector;         /* Offset vector */  int    offset_end;            /* One past the end */  int    offset_max;            /* The maximum usable for return data */  const uschar *lcc;            /* Points to lower casing table */  const uschar *ctypes;         /* Points to table of type maps */  BOOL   offset_overflow;       /* Set if too many extractions */  BOOL   notbol;                /* NOTBOL flag */  BOOL   noteol;                /* NOTEOL flag */  BOOL   utf8;                  /* UTF8 flag */  BOOL   endonly;               /* Dollar not before final \n */  BOOL   notempty;              /* Empty string match not wanted */  BOOL   partial;               /* PARTIAL flag */  BOOL   hitend;                /* Hit the end of the subject at some point */  const uschar *start_code;     /* For use when recursing */  USPTR  start_subject;         /* Start of the subject string */  USPTR  end_subject;           /* End of the subject string */  USPTR  start_match;           /* Start of this match attempt */  USPTR  end_match_ptr;         /* Subject position at end match */  int    end_offset_top;        /* Highwater mark at end of match */  int    capture_last;          /* Most recent capture number */  int    start_offset;          /* The start offset value */  recursion_info *recursive;    /* Linked list of recursion data */  void  *callout_data;          /* To pass back to callouts */  struct heapframe *thisframe;  /* Used only when compiling for no recursion */} match_data;/* A similar structure is used for the same purpose by the DFA matchingfunctions. */typedef struct dfa_match_data {  const uschar *start_code;     /* Start of the compiled pattern */  const uschar *start_subject;  /* Start of the subject string */  const uschar *end_subject;    /* End of subject string */  const uschar *tables;         /* Character tables */  int   moptions;               /* Match options */  int   poptions;               /* Pattern options */  void  *callout_data;          /* To pass back to callouts */} dfa_match_data;/* Bit definitions for entries in the pcre_ctypes table. */#define ctype_space   0x01#define ctype_letter  0x02#define ctype_digit   0x04#define ctype_xdigit  0x08#define ctype_word    0x10   /* alphameric or '_' */#define ctype_meta    0x80   /* regexp meta char or zero (end pattern) *//* Offsets for the bitmap tables in pcre_cbits. Each table contains a setof bits for a class map. Some classes are built by combining these tables. */#define cbit_space     0      /* [:space:] or \s */#define cbit_xdigit   32      /* [:xdigit:] */#define cbit_digit    64      /* [:digit:] or \d */#define cbit_upper    96      /* [:upper:] */#define cbit_lower   128      /* [:lower:] */#define cbit_word    160      /* [:word:] or \w */#define cbit_graph   192      /* [:graph:] */#define cbit_print   224      /* [:print:] */#define cbit_punct   256      /* [:punct:] */#define cbit_cntrl   288      /* [:cntrl:] */#define cbit_length  320      /* Length of the cbits table *//* Offsets of the various tables from the base tables pointer, andtotal length. */#define lcc_offset      0#define fcc_offset    256#define cbits_offset  512#define ctypes_offset (cbits_offset + cbit_length)#define tables_length (ctypes_offset + 256)/* Layout of the UCP type table that translates property names into types andcodes. */typedef struct {  const char *name;  pcre_uint16 type;  pcre_uint16 value;} ucp_type_table;/* Internal shared data tables. These are tables that are used by more than oneof the exported public functions. They have to be "external" in the C sense,but are not part of the PCRE public API. The data for these tables is in thepcre_tables.c module. */extern const int    _pcre_utf8_table1[];extern const int    _pcre_utf8_table2[];extern const int    _pcre_utf8_table3[];extern const uschar _pcre_utf8_table4[];extern const int    _pcre_utf8_table1_size;extern const ucp_type_table _pcre_utt[];extern const int _pcre_utt_size;extern const uschar _pcre_default_tables[];extern const uschar _pcre_OP_lengths[];/* Internal shared functions. These are functions that are used by more thanone of the exported public functions. They have to be "external" in the Csense, but are not part of the PCRE public API. */extern int         _pcre_ord2utf8(int, uschar *);extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *,                     const pcre_study_data *, pcre_study_data *);extern int         _pcre_ucp_findprop(const int, int *, int *);extern int         _pcre_ucp_othercase(const int);extern int         _pcre_valid_utf8(const uschar *, int);extern BOOL        _pcre_xclass(int, const uschar *);#endif/* End of pcre_internal.h */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -