⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcre_compile.c

📁 PHP v6.0 For Linux 运行环境:Win9X/ WinME/ WinNT/ Win2K/ WinXP
💻 C
字号:
/**************************************************      Perl-Compatible Regular Expressions       **************************************************//* PCRE is a library of functions to support regular expressions whose syntaxand semantics are as close as possible to those of the Perl 5 language.                       Written by Philip Hazel           Copyright (c) 1997-2006 University of Cambridge-----------------------------------------------------------------------------Redistribution and use in source and binary forms, with or withoutmodification, are permitted provided that the following conditions are met:    * Redistributions of source code must retain the above copyright notice,      this list of conditions and the following disclaimer.    * Redistributions in binary form must reproduce the above copyright      notice, this list of conditions and the following disclaimer in the      documentation and/or other materials provided with the distribution.    * Neither the name of the University of Cambridge nor the names of its      contributors may be used to endorse or promote products derived from      this software without specific prior written permission.THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THEIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSEARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BELIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ORCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OFSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESSINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER INCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THEPOSSIBILITY OF SUCH DAMAGE.-----------------------------------------------------------------------------*//* This module contains the external function pcre_compile(), along withsupporting internal functions that are not used by other modules. */#include "pcre_internal.h"/* When DEBUG is defined, we need the pcre_printint() function, which is alsoused by pcretest. DEBUG is not defined when building a production library. */#ifdef DEBUG#include "pcre_printint.src"#endif/**************************************************      Code parameters and static tables         **************************************************//* Maximum number of items on the nested bracket stacks at compile time. Thisapplies to the nesting of all kinds of parentheses. It does not limitun-nested, non-capturing parentheses. This number can be made bigger ifnecessary - it is used to dimension one int and one unsigned char vector atcompile time. */#define BRASTACK_SIZE 200/* Table for handling escaped characters in the range '0'-'z'. Positive returnsare simple data values; negative values are for special things like \d and soon. Zero means further processing is needed (for things like \x), or the escapeis invalid. */#if !EBCDIC   /* This is the "normal" table for ASCII systems */static const short int escapes[] = {     0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */     0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */   '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */     0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */-ESC_P, -ESC_Q,      0, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */-ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */   '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */     0,      0,      0,      0,      0,      0,  ESC_n,      0,   /* h - o */-ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0,      0, -ESC_w,   /* p - w */     0,      0, -ESC_z                                            /* x - z */};#else         /* This is the "abnormal" table for EBCDIC systems */static const short int escapes[] = {/*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',/*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,/*  58 */     0,     0,    '!',     '$',    '*',   ')',    ';',    '~',/*  60 */   '-',   '/',      0,       0,      0,     0,      0,      0,/*  68 */     0,     0,    '|',     ',',    '%',   '_',    '>',    '?',/*  70 */     0,     0,      0,       0,      0,     0,      0,      0,/*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"',/*  80 */     0,     7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,/*  88 */     0,     0,      0,     '{',      0,     0,      0,      0,/*  90 */     0,     0,      0,     'l',      0, ESC_n,      0, -ESC_p,/*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,/*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,     0, -ESC_w,      0,/*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,/*  B0 */     0,     0,      0,       0,      0,     0,      0,      0,/*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',/*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,/*  C8 */     0,     0,      0,       0,      0,     0,      0,      0,/*  D0 */   '}',     0,      0,       0,      0,     0,      0, -ESC_P,/*  D8 */-ESC_Q,     0,      0,       0,      0,     0,      0,      0,/*  E0 */  '\\',     0, -ESC_S,       0,      0,     0, -ESC_W, -ESC_X,/*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,/*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,/*  F8 */     0,     0,      0,       0,      0,     0,      0,      0};#endif/* Tables of names of POSIX character classes and their lengths. The list isterminated by a zero length entry. The first three must be alpha, lower, upper,as this is assumed for handling case independence. */static const char *const posix_names[] = {  "alpha", "lower", "upper",  "alnum", "ascii", "blank", "cntrl", "digit", "graph",  "print", "punct", "space", "word",  "xdigit" };static const uschar posix_name_lengths[] = {  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };/* Table of class bit maps for each POSIX class. Each class is formed from abase map, with an optional addition or removal of another map. Then, for someclasses, there is some additional tweaking: for [:blank:] the vertical spacecharacters are removed, and for [:alpha:] and [:alnum:] the underscorecharacter is removed. The triples in the table consist of the base map offset,second map offset or -1 if no second map, and a non-negative value for mapaddition or a negative value for map subtraction (if there are two maps). Theabsolute value of the third field has these meanings: 0 => no tweaking, 1 =>remove vertical space characters, 2 => remove underscore. */static const int posix_class_maps[] = {  cbit_word,  cbit_digit, -2,             /* alpha */  cbit_lower, -1,          0,             /* lower */  cbit_upper, -1,          0,             /* upper */  cbit_word,  -1,          2,             /* alnum - word without underscore */  cbit_print, cbit_cntrl,  0,             /* ascii */  cbit_space, -1,          1,             /* blank - a GNU extension */  cbit_cntrl, -1,          0,             /* cntrl */  cbit_digit, -1,          0,             /* digit */  cbit_graph, -1,          0,             /* graph */  cbit_print, -1,          0,             /* print */  cbit_punct, -1,          0,             /* punct */  cbit_space, -1,          0,             /* space */  cbit_word,  -1,          0,             /* word - a Perl extension */  cbit_xdigit,-1,          0              /* xdigit */};/* The texts of compile-time error messages. These are "char *" because theyare passed to the outside world. */static const char *error_texts[] = {  "no error",  "\\ at end of pattern",  "\\c at end of pattern",  "unrecognized character follows \\",  "numbers out of order in {} quantifier",  /* 5 */  "number too big in {} quantifier",  "missing terminating ] for character class",  "invalid escape sequence in character class",  "range out of order in character class",  "nothing to repeat",  /* 10 */  "operand of unlimited repeat could match the empty string",  "internal error: unexpected repeat",  "unrecognized character after (?",  "POSIX named classes are supported only within a class",  "missing )",  /* 15 */  "reference to non-existent subpattern",  "erroffset passed as NULL",  "unknown option bit(s) set",  "missing ) after comment",  "parentheses nested too deeply",  /* 20 */  "regular expression too large",  "failed to get memory",  "unmatched parentheses",  "internal error: code overflow",  "unrecognized character after (?<",  /* 25 */  "lookbehind assertion is not fixed length",  "malformed number after (?(",  "conditional group contains more than two branches",  "assertion expected after (?(",  "(?R or (?digits must be followed by )",  /* 30 */  "unknown POSIX class name",  "POSIX collating elements are not supported",  "this version of PCRE is not compiled with PCRE_UTF8 support",  "spare error",  "character value in \\x{...} sequence is too large",  /* 35 */  "invalid condition (?(0)",  "\\C not allowed in lookbehind assertion",  "PCRE does not support \\L, \\l, \\N, \\U, or \\u",  "number after (?C is > 255",  "closing ) for (?C expected",  /* 40 */  "recursive call could loop indefinitely",  "unrecognized character after (?P",  "syntax error after (?P",  "two named groups have the same name",  "invalid UTF-8 string",  /* 45 */  "support for \\P, \\p, and \\X has not been compiled",  "malformed \\P or \\p sequence",  "unknown property name after \\P or \\p"};/* Table to identify digits and hex digits. This is used when compilingpatterns. Note that the tables in chartables are dependent on the locale, andmay mark arbitrary characters as digits - but the PCRE compiling code expectsto handle only 0-9, a-z, and A-Z as digits when compiling. That is why we havea private table here. It costs 256 bytes, but it is a lot faster than doingcharacter value tests (at least in some simple cases I timed), and in someapplications one wants PCRE to compile efficiently as well as matchefficiently.For convenience, we use the same bit definitions as in chartables:  0x04   decimal digit  0x08   hexadecimal digitThen we can use ctype_digit and ctype_xdigit in the code. */#if !EBCDIC    /* This is the "normal" case, for ASCII systems */static const unsigned char digitab[] =  {  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - '  */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ( - /  */  0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  */  0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /*  8 - ?  */  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  @ - G  */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  H - O  */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  P - W  */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  X - _  */  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  ` - g  */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  h - o  */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  p - w  */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  x -127 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */#else          /* This is the "abnormal" case, for EBCDIC systems */static const unsigned char digitab[] =  {  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15    */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 10 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31    */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  32- 39 20 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  40- 47    */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  48- 55 30 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  56- 63    */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88- 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -